Skip to content

Commit

Permalink
Updated to use unsafe_checkstring, fix comments
Browse files Browse the repository at this point in the history
  • Loading branch information
ScottPJones committed Jun 14, 2015
1 parent eb556a9 commit 44a3343
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 58 deletions.
9 changes: 0 additions & 9 deletions base/utf16.jl
Original file line number Diff line number Diff line change
Expand Up @@ -259,15 +259,6 @@ function convert(::Type{UTF16String}, str::ASCIIString)
@inbounds return fast_utf_copy(UTF16String, UInt16, length(dat), dat, true)
end

unsafe_convert{T<:Union(Int16,UInt16)}(::Type{Ptr{T}}, s::UTF16String) =
convert(Ptr{T}, pointer(s))

function convert(::Type{UTF16String}, data::AbstractVector{UInt16})
!isvalid(UTF16String, data) && throw(UnicodeError(UTF_ERR_INVALID_16,0,0))
len = length(data)
@inbounds return UTF16String(setindex!(copy!(Vector{UInt16}(len+1),1,data,1,len),0,len+1))
end

convert(::Type{Vector{UInt16}}, str::UTF16String) = str.data
convert(::Type{Array{UInt16}}, str::UTF16String) = str.data

Expand Down
53 changes: 10 additions & 43 deletions base/utf32.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,6 @@ reverse(s::UTF32String) = UTF32String(reverse!(copy(s.data), 1, length(s)))

sizeof(s::UTF32String) = sizeof(s.data) - sizeof(Char)

function convert(::Type{UTF32String}, data::AbstractVector{Char})
len = length(data)
@inbounds return UTF32String(setindex!(copy!(Vector{Char}(len+1),1,data,1,len),0,len+1))
end

const empty_utf32 = UTF32String(UInt32[0])

utf32(x) = convert(UTF32String, x)
Expand All @@ -23,18 +18,14 @@ convert(::Type{UTF32String}, s::UTF32String) = s
"
Converts an `AbstractString` to a `UTF16String`
### Input Arguments:
* `::Type{UTF32String}`
* `str::AbstractString`
### Returns:
* `::UTF32String`
* `UTF32String`
### Throws:
* `UnicodeError`
"
function convert(::Type{UTF32String}, str::AbstractString)
len, flags = check_string(str)
len, flags = unsafe_checkstring(str)
buf = Vector{Char}(len+1)
out = 0
@inbounds for ch in str ; buf[out += 1] = ch ; end
Expand All @@ -45,12 +36,8 @@ end
"
Converts a UTF-32 encoded vector of `UInt32` to a `UTF8String`
### Input Arguments:
* `::Type{UTF8String}`
* `dat::Vector{UInt32}`
### Returns:
* `::UTF8String`
* `UTF8String`
### Throws:
* `UnicodeError`
Expand All @@ -60,20 +47,16 @@ function convert(::Type{UTF8String}, dat::Vector{UInt32})
# handle zero length string quickly
len == 0 && return empty_utf8
# get number of bytes to allocate
len, flags, num4byte, num3byte, num2byte = check_string(dat, len>>>2)
len, flags, num4byte, num3byte, num2byte = unsafe_checkstring(dat, 1, len>>>2)
flags == 0 && @inbounds return UTF8String(copy!(Vector{UInt8}(len), 1, dat, 1, len))
return encode_to_utf8(UInt32, dat, len + num2byte + num3byte*2 + num4byte*3)
end

"
Converts a `UTF32String` to a `UTF8String`
### Input Arguments:
* `::Type{UTF8String}`
* `str::UTF32String`
### Returns:
* `::UTF8String`
* `UTF8String`
### Throws:
* `UnicodeError`
Expand All @@ -84,18 +67,14 @@ function convert(::Type{UTF8String}, str::UTF32String)
# handle zero length string quickly
len <= 1 && return empty_utf8
# get number of bytes to allocate
len, flags, num4byte, num3byte, num2byte = check_string(dat, len-1)
len, flags, num4byte, num3byte, num2byte = unsafe_checkstring(dat, 1, len-1)
flags == 0 && @inbounds return UTF8String(copy!(Vector{UInt8}(len), 1, dat, 1, len))
return encode_to_utf8(UInt32, dat, len + num2byte + num3byte*2 + num4byte*3)
end

"
Converts a `UTF8String` to a `UTF32String`
### Input Arguments:
* `::Type{UTF32String}`
* `str::UTF8String`
### Returns:
* `::UTF32String`
Expand All @@ -107,7 +86,7 @@ function convert(::Type{UTF32String}, str::UTF8String)
# handle zero length string quickly
sizeof(dat) == 0 && return empty_utf32
# Validate UTF-8 encoding, and get number of words to create
len, flags = check_string(dat)
len, flags = unsafe_checkstring(dat)
# Optimize case where no characters > 0x7f
flags == 0 && @inbounds return fast_utf_copy(UTF32String, Char, len, dat, true)
# has multi-byte UTF-8 sequences
Expand Down Expand Up @@ -150,10 +129,6 @@ end
"
Converts a `UTF16String` to `UTF32String`
### Input Arguments:
* `::Type{UTF32String}`
* `str::UTF16String`
### Returns:
* `::UTF32String`
Expand All @@ -166,7 +141,7 @@ function convert(::Type{UTF32String}, str::UTF16String)
# handle zero length string quickly (account for trailing \0)
len <= 2 && return empty_utf32
# get number of words to create
len, flags, num4byte = check_string(dat, len>>>1)
len, flags, num4byte = unsafe_checkstring(dat, 1, len>>>1)
# No surrogate pairs, do optimized copy
(flags & UTF_UNICODE4) == 0 && @inbounds return UTF32String(copy!(Vector{Char}(len), dat))
local ch::UInt32
Expand All @@ -185,10 +160,6 @@ end
"
Converts a UTF-32 encoded vector of `UInt32` to a `UTF16String`
### Input Arguments:
* `::Type{UTF16String}`
* `dat::Vector{UInt32}`
### Returns:
* `::UTF16String`
Expand All @@ -200,7 +171,7 @@ function convert(::Type{UTF16String}, dat::Vector{UInt32})
# handle zero length string quickly
len <= 4 && return empty_utf16
# get number of words to allocate
len, flags, num4byte = check_string(dat, len>>>2)
len, flags, num4byte = unsafe_checkstring(dat, 1, len>>>2)
len += num4byte + 1
# optimized path, no surrogates
num4byte == 0 && @inbounds return fast_utf_copy(UTF16String, UInt16, len, dat)
Expand All @@ -210,10 +181,6 @@ end
"
Converts a `UTF32String` to `UTF16String`
### Input Arguments:
* `::Type{UTF16String}`
* `str::UTF32String`
### Returns:
* `::UTF16String`
Expand All @@ -226,7 +193,7 @@ function convert(::Type{UTF16String}, str::UTF32String)
# handle zero length string quickly
len <= 4 && return empty_utf16
# get number of words to allocate
len, flags, num4byte = check_string(dat, len>>>2)
len, flags, num4byte = unsafe_checkstring(dat, 1, len>>>2)
# optimized path, no surrogates
num4byte == 0 && @inbounds return UTF16String(copy!(Vector{UInt16}(len), dat))
return encode_to_utf16(dat, len + num4byte)
Expand Down
8 changes: 2 additions & 6 deletions base/utf8.jl
Original file line number Diff line number Diff line change
Expand Up @@ -220,12 +220,8 @@ convert(::Type{UTF8String}, s::ASCIIString) = UTF8String(s.data)
"
Converts a UTF-8 encoded vector of `UInt8` to a `UTF8String`
### Input Arguments:
* `::Type{UTF8String}`
* `dat::Vector{UInt8}`
### Returns:
* `::UTF8String`
* `UTF8String`
### Throws:
* `UnicodeError`
Expand All @@ -234,7 +230,7 @@ function convert(::Type{UTF8String}, dat::Vector{UInt8})
# handle zero length string quickly
isempty(dat) && return empty_utf8
# get number of bytes to allocate
len, flags, num4byte, num3byte, num2byte = check_string(dat)
len, flags, num4byte, num3byte, num2byte = unsafe_checkstring(dat)
if (flags & (UTF_LONG | UTF_SURROGATE)) == 0
len = sizeof(dat)
@inbounds return UTF8String(copy!(Vector{UInt8}(len), 1, dat, 1, len))
Expand Down

0 comments on commit 44a3343

Please sign in to comment.