Skip to content

Commit

Permalink
Merge pull request #11735 from ScottPJones/spj/utf16map
Browse files Browse the repository at this point in the history
Fix #11460, Fix #11464 uppercase/lowercase/map on a UTF16String should return a UTF16String
  • Loading branch information
tkelman committed Jun 24, 2015
2 parents 42a594d + 3e591e0 commit a282fcd
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
25 changes: 25 additions & 0 deletions base/utf16.jl
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,28 @@ function utf16(p::Union{Ptr{UInt16}, Ptr{Int16}})
while unsafe_load(p, len+1) != 0; len += 1; end
utf16(p, len)
end

function map(fun, str::UTF16String)
buf = UInt16[]
sizehint!(buf, length(str.data))
for ch in str
c2 = fun(ch)
if !isa(c2, Char)
throw(UnicodeError(UTF_ERR_MAP_CHAR, 0, 0))
end
uc = reinterpret(UInt32, c2)
if uc < 0x10000
if utf16_is_surrogate(UInt16(uc))
throw(UnicodeError(UTF_ERR_INVALID_CHAR, 0, uc))
end
push!(buf, UInt16(uc))
elseif uc <= 0x10ffff
push!(buf, UInt16(0xd7c0 + (uc >> 10)))
push!(buf, UInt16(0xdc00 + (uc & 0x3ff)))
else
throw(UnicodeError(UTF_ERR_INVALID_CHAR, 0, uc))
end
end
push!(buf, 0)
UTF16String(buf)
end
20 changes: 20 additions & 0 deletions test/strings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1893,3 +1893,23 @@ end
@test [c for c in "ḟøøƀäṙ"] == ['', 'ø', 'ø', 'ƀ', 'ä', '']
@test [i for i in eachindex("ḟøøƀäṙ")] == [1, 4, 6, 8, 10, 12]
@test [x for x in enumerate("ḟøøƀäṙ")] == [(1, ''), (2, 'ø'), (3, 'ø'), (4, 'ƀ'), (5, 'ä'), (6, '')]

# issue # 11464: uppercase/lowercase of UTF16String becomes a UTF8String
str = "abcdef\uff\uffff\u10ffffABCDEF"
@test typeof(uppercase("abcdef")) == ASCIIString
@test typeof(uppercase(utf8(str))) == UTF8String
@test typeof(uppercase(utf16(str))) == UTF16String
@test typeof(uppercase(utf32(str))) == UTF32String
@test typeof(lowercase("ABCDEF")) == ASCIIString
@test typeof(lowercase(utf8(str))) == UTF8String
@test typeof(lowercase(utf16(str))) == UTF16String
@test typeof(lowercase(utf32(str))) == UTF32String

foomap(ch) = (ch > 65)
foobar(ch) = Char(0xd800)
foobaz(ch) = Char(0x200000)
@test_throws UnicodeError map(foomap, utf16(str))
@test_throws UnicodeError map(foobar, utf16(str))
@test_throws UnicodeError map(foobaz, utf16(str))


0 comments on commit a282fcd

Please sign in to comment.