From e8b0ba83ff7a6c934ee5a3616a5439326a7bce61 Mon Sep 17 00:00:00 2001
From: ScottPJones <scottjones@alum.mit.edu>
Date: Fri, 10 Jul 2015 16:32:30 -0400
Subject: [PATCH] Update comments

---
 base/unicode/checkstring.jl |  8 ++---
 base/unicode/utf16.jl       | 20 ++++++-------
 base/unicode/utf32.jl       | 20 ++++++-------
 base/unicode/utf8.jl        | 12 ++++----
 test/unicode/utf32.jl       | 59 +++++++++++++++++++------------------
 5 files changed, 61 insertions(+), 58 deletions(-)

diff --git a/base/unicode/checkstring.jl b/base/unicode/checkstring.jl
index 8483c05d9f2c5c..eeac2c3e21052d 100644
--- a/base/unicode/checkstring.jl
+++ b/base/unicode/checkstring.jl
@@ -23,7 +23,7 @@ const UTF_SURROGATE = 32        ##< surrogate pairs present
     (ch << 6) | (byt & 0x3f)
 end
 
-"
+"""
 Validates and calculates number of characters in a UTF-8,UTF-16 or UTF-32 encoded vector/string
 
 Warning: this function does not check the bounds of the start or end positions
@@ -46,7 +46,7 @@ Use `checkstring` to make sure the bounds are checked
 
 ### Throws:
 * `UnicodeError`
-"
+"""
 function unsafe_checkstring end
 
 function unsafe_checkstring(dat::Vector{UInt8},
@@ -191,7 +191,7 @@ function unsafe_checkstring{T <: Union{Vector{UInt16}, Vector{UInt32}, AbstractS
     return totalchar, flags, num4byte, num3byte, num2byte
 end
 
-"
+"""
 Validates and calculates number of characters in a UTF-8,UTF-16 or UTF-32 encoded vector/string
 
 This function checks the bounds of the start and end positions
@@ -214,7 +214,7 @@ Use `unsafe_checkstring` to avoid that overhead if the bounds have already been
 
 ### Throws:
 * `UnicodeError`
-"
+"""
 function checkstring end
 
 # No need to check bounds if using defaults
diff --git a/base/unicode/utf16.jl b/base/unicode/utf16.jl
index 7f97a46db4acf8..e835c77145ac8e 100644
--- a/base/unicode/utf16.jl
+++ b/base/unicode/utf16.jl
@@ -101,7 +101,7 @@ function isvalid(::Type{UTF16String}, data::AbstractArray{UInt16})
     return i > n || !is_surrogate_codeunit(data[i])
 end
 
-"
+"""
 Converts an `AbstractString` to a `UTF16String`
 
 ### Returns:
@@ -109,7 +109,7 @@ Converts an `AbstractString` to a `UTF16String`
 
 ### Throws:
 *   `UnicodeError`
-"
+"""
 function convert(::Type{UTF16String}, str::AbstractString)
     len, flags, num4byte = unsafe_checkstring(str)
     buf = Vector{UInt16}(len+num4byte+1)
@@ -128,7 +128,7 @@ function convert(::Type{UTF16String}, str::AbstractString)
     UTF16String(buf)
 end
 
-"
+"""
 Converts a `UTF8String` to a `UTF16String`
 
 ### Returns:
@@ -136,7 +136,7 @@ Converts a `UTF8String` to a `UTF16String`
 
 ### Throws:
 *   `UnicodeError`
-"
+"""
 function convert(::Type{UTF16String}, str::UTF8String)
     dat = str.data
     # handle zero length string quickly
@@ -174,7 +174,7 @@ function convert(::Type{UTF16String}, str::UTF8String)
     UTF16String(buf)
 end
 
-"
+"""
 Converts a `UTF16String` to a `UTF8String`
 
 ### Returns:
@@ -182,7 +182,7 @@ Converts a `UTF16String` to a `UTF8String`
 
 ### Throws:
 *   `UnicodeError`
-"
+"""
 function convert(::Type{UTF8String}, str::UTF16String)
     dat = str.data
     len = sizeof(dat) >>> 1
@@ -194,7 +194,7 @@ function convert(::Type{UTF8String}, str::UTF16String)
     return encode_to_utf8(UInt16, dat, len + num2byte + num3byte*2 + num4byte*3)
 end
 
-"
+"""
 Converts a vector of `Char` to a `UTF16String`
 
 ### Returns:
@@ -202,7 +202,7 @@ Converts a vector of `Char` to a `UTF16String`
 
 ### Throws:
 *   `UnicodeError`
-"
+"""
 function convert(::Type{UTF16String}, chrs::Vector{Char})
     len = sizeof(chrs)
     # handle zero length string quickly
@@ -216,7 +216,7 @@ function convert(::Type{UTF16String}, chrs::Vector{Char})
     return encode_to_utf16(dat, len)
 end
 
-"
+"""
 Converts an already validated UTF-32 encoded vector of `UInt32` to a `UTF16String`
 
 ### Input Arguments:
@@ -225,7 +225,7 @@ Converts an already validated UTF-32 encoded vector of `UInt32` to a `UTF16Strin
 
 ### Returns:
 *   `::UTF16String`
-"
+"""
 function encode_to_utf16(dat, len)
     buf = Vector{UInt16}(len)
     @inbounds buf[len] = 0 # NULL termination
diff --git a/base/unicode/utf32.jl b/base/unicode/utf32.jl
index c738fec577410a..1987cb56552509 100644
--- a/base/unicode/utf32.jl
+++ b/base/unicode/utf32.jl
@@ -15,7 +15,7 @@ utf32(x) = convert(UTF32String, x)
 convert(::Type{UTF32String}, c::Char) = UTF32String(Char[c, Char(0)])
 convert(::Type{UTF32String}, s::UTF32String) = s
 
-"
+"""
 Converts an `AbstractString` to a `UTF32String`
 
 ### Returns:
@@ -23,7 +23,7 @@ Converts an `AbstractString` to a `UTF32String`
 
 ### Throws:
 *   `UnicodeError`
-"
+"""
 function convert(::Type{UTF32String}, str::AbstractString)
     len, flags = unsafe_checkstring(str)
     buf = Vector{Char}(len+1)
@@ -33,7 +33,7 @@ function convert(::Type{UTF32String}, str::AbstractString)
     UTF32String(buf)
 end
 
-"
+"""
 Converts a `UTF32String` to a `UTF8String`
 
 ### Returns:
@@ -41,7 +41,7 @@ Converts a `UTF32String` to a `UTF8String`
 
 ### Throws:
 *   `UnicodeError`
-"
+"""
 function convert(::Type{UTF8String},  str::UTF32String)
     dat = reinterpret(UInt32, str.data)
     len = sizeof(dat) >>> 2
@@ -53,7 +53,7 @@ function convert(::Type{UTF8String},  str::UTF32String)
     return encode_to_utf8(UInt32, dat, len + num2byte + num3byte*2 + num4byte*3)
 end
 
-"
+"""
 Converts a `UTF8String` to a `UTF32String`
 
 ### Returns:
@@ -61,7 +61,7 @@ Converts a `UTF8String` to a `UTF32String`
 
 ### Throws:
 *   `UnicodeError`
-"
+"""
 function convert(::Type{UTF32String}, str::UTF8String)
     dat = str.data
     # handle zero length string quickly
@@ -107,7 +107,7 @@ function convert(::Type{UTF32String}, str::UTF8String)
     UTF32String(buf)
 end
 
-"
+"""
 Converts a `UTF16String` to `UTF32String`
 
 ### Returns:
@@ -115,7 +115,7 @@ Converts a `UTF16String` to `UTF32String`
 
 ### Throws:
 *   `UnicodeError`
-"
+"""
 function convert(::Type{UTF32String}, str::UTF16String)
     dat = str.data
     len = sizeof(dat)
@@ -138,7 +138,7 @@ function convert(::Type{UTF32String}, str::UTF16String)
     UTF32String(buf)
 end
 
-"
+"""
 Converts a `UTF32String` to `UTF16String`
 
 ### Returns:
@@ -146,7 +146,7 @@ Converts a `UTF32String` to `UTF16String`
 
 ### Throws:
 *   `UnicodeError`
-"
+"""
 function convert(::Type{UTF16String}, str::UTF32String)
     dat = reinterpret(UInt32, str.data)
     len = sizeof(dat)
diff --git a/base/unicode/utf8.jl b/base/unicode/utf8.jl
index db246148c517f7..d8ff8304a89838 100644
--- a/base/unicode/utf8.jl
+++ b/base/unicode/utf8.jl
@@ -217,7 +217,7 @@ utf8(x) = convert(UTF8String, x)
 convert(::Type{UTF8String}, s::UTF8String) = s
 convert(::Type{UTF8String}, s::ASCIIString) = UTF8String(s.data)
 
-"
+"""
 Converts a UTF-8 encoded vector of `UInt8` to a `UTF8String`
 
 ### Returns:
@@ -225,7 +225,7 @@ Converts a UTF-8 encoded vector of `UInt8` to a `UTF8String`
 
 ### Throws:
 *   `UnicodeError`
-"
+"""
 function convert(::Type{UTF8String}, dat::Vector{UInt8})
     # handle zero length string quickly
     isempty(dat) && return empty_utf8
@@ -302,7 +302,7 @@ function convert(::Type{UTF8String}, a::Vector{UInt8}, invalids_as::AbstractStri
 end
 convert(::Type{UTF8String}, s::AbstractString) = utf8(bytestring(s))
 
-"
+"""
 Converts a vector of `Char` to a `UTF8String`
 
 ### Returns:
@@ -310,7 +310,7 @@ Converts a vector of `Char` to a `UTF8String`
 
 ### Throws:
 *   `UnicodeError`
-"
+"""
 function convert(::Type{UTF8String}, chrs::Vector{Char})
     len = sizeof(chrs)
     # handle zero length string quickly
@@ -322,7 +322,7 @@ function convert(::Type{UTF8String}, chrs::Vector{Char})
     return encode_to_utf8(UInt32, dat, len + num2byte + num3byte*2 + num4byte*3)
 end
 
-"
+"""
 Converts an already validated vector of `UInt16` or `UInt32` to a `UTF8String`
 
 ### Input Arguments:
@@ -331,7 +331,7 @@ Converts an already validated vector of `UInt16` or `UInt32` to a `UTF8String`
 
 ### Returns:
 * `UTF8String`
-"
+"""
 function encode_to_utf8{T<:Union{UInt16, UInt32}}(::Type{T}, dat, len)
     buf = Vector{UInt8}(len)
     out = 0
diff --git a/test/unicode/utf32.jl b/test/unicode/utf32.jl
index f554558e146e97..784127b4235f14 100644
--- a/test/unicode/utf32.jl
+++ b/test/unicode/utf32.jl
@@ -32,8 +32,8 @@ str3_UTF8 = "abcd\uff\uff\u7fff\u7fff"
 str4_UTF8 = "abcd\uff\u7ff\u7fff\U7ffff"
 strS_UTF8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xed\xa0\x80\xed\xb0\x80")
 strC_UTF8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\U10000")
-strZ_UTF8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xc0\x80")
 strz_UTF8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\0")
+strZ      = b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xc0\x80"
 
 strA_UTF16 = utf16(strA_UTF8)
 strL_UTF16 = utf16(strL_UTF8)
@@ -68,97 +68,100 @@ tstcvt(str4_UTF8,str4_UTF16,str4_UTF32)
 @test utf16(strS_UTF32) == strC_UTF8
 
 # Test converting overlong \0
-# @test utf8(strZ_UTF8)  == strz_UTF8   # currently broken! (in utf8.jl)
-@test utf16(strZ_UTF8) == strz_UTF8
-@test utf32(strZ_UTF8) == strz_UTF8
+@test utf8(strZ)  == strz_UTF8
+@test utf16(UTF8String(strZ)) == strz_UTF8
+@test utf32(UTF8String(strZ)) == strz_UTF8
 
 # Test invalid sequences
 
+@inline strval(::Type{UTF8String}, dat) = dat
+@inline strval(::Union(Type{UTF16String},Type{UTF32String}), dat) = UTF8String(dat)
+
 byt = 0x0
-for T in (UTF16String, UTF32String)
+for T in (UTF8String, UTF16String, UTF32String)
     try
     # Continuation byte not after lead
     for byt in 0x80:0xbf
-        @test_throws UnicodeError convert(T,  UTF8String(UInt8[byt]))
+        @test_throws UnicodeError convert(T,  strval(T, UInt8[byt]))
     end
 
     # Test lead bytes
     for byt in 0xc0:0xff
         # Single lead byte at end of string
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[byt]))
         # Lead followed by non-continuation character < 0x80
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[byt,0]))
         # Lead followed by non-continuation character > 0xbf
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0xc0]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[byt,0xc0]))
     end
 
     # Test overlong 2-byte
     for byt in 0x81:0xbf
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[0xc0,byt]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[0xc0,byt]))
     end
     for byt in 0x80:0xbf
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[0xc1,byt]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[0xc1,byt]))
     end
 
     # Test overlong 3-byte
     for byt in 0x80:0x9f
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[0xe0,byt,0x80]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[0xe0,byt,0x80]))
     end
 
     # Test overlong 4-byte
     for byt in 0x80:0x8f
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[0xef,byt,0x80,0x80]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[0xef,byt,0x80,0x80]))
     end
 
     # Test 4-byte > 0x10ffff
     for byt in 0x90:0xbf
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[0xf4,byt,0x80,0x80]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[0xf4,byt,0x80,0x80]))
     end
     for byt in 0xf5:0xf7
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[byt,0x80,0x80,0x80]))
     end
 
     # Test 5-byte
     for byt in 0xf8:0xfb
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80,0x80]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[byt,0x80,0x80,0x80,0x80]))
     end
 
     # Test 6-byte
     for byt in 0xfc:0xfd
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80,0x80,0x80]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[byt,0x80,0x80,0x80,0x80,0x80]))
     end
 
     # Test 7-byte
-    @test_throws UnicodeError convert(T, UTF8String(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80,0x80]))
+    @test_throws UnicodeError convert(T, strval(T, UInt8[0xfe,0x80,0x80,0x80,0x80,0x80,0x80]))
 
     # Three and above byte sequences
     for byt in 0xe0:0xef
         # Lead followed by only 1 continuation byte
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[byt,0x80]))
         # Lead ended by non-continuation character < 0x80
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[byt,0x80,0]))
         # Lead ended by non-continuation character > 0xbf
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0xc0]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[byt,0x80,0xc0]))
     end
 
     # 3-byte encoded surrogate character(s)
     # Single surrogate
-    @test_throws UnicodeError convert(T, UTF8String(UInt8[0xed,0xa0,0x80]))
+    @test_throws UnicodeError convert(T, strval(T, UInt8[0xed,0xa0,0x80]))
     # Not followed by surrogate
-    @test_throws UnicodeError convert(T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0x80,0x80]))
+    @test_throws UnicodeError convert(T, strval(T, UInt8[0xed,0xa0,0x80,0xed,0x80,0x80]))
     # Trailing surrogate first
-    @test_throws UnicodeError convert(T, UTF8String(UInt8[0xed,0xb0,0x80,0xed,0xb0,0x80]))
+    @test_throws UnicodeError convert(T, strval(T, UInt8[0xed,0xb0,0x80,0xed,0xb0,0x80]))
     # Followed by lead surrogate
-    @test_throws UnicodeError convert(T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0xa0,0x80]))
+    @test_throws UnicodeError convert(T, strval(T, UInt8[0xed,0xa0,0x80,0xed,0xa0,0x80]))
 
     # Four byte sequences
     for byt in 0xf0:0xf4
         # Lead followed by only 2 continuation bytes
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0x80]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[byt,0x80,0x80]))
         # Lead followed by non-continuation character < 0x80
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0x80,0]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[byt,0x80,0x80,0]))
         # Lead followed by non-continuation character > 0xbf
-        @test_throws UnicodeError convert(T, UTF8String(UInt8[byt,0x80,0x80,0xc0]))
+        @test_throws UnicodeError convert(T, strval(T, UInt8[byt,0x80,0x80,0xc0]))
     end
     catch exp ;
         println("Error checking $T: $byt")