Skip to content

Commit

Permalink
rename CharString -> UTF32String (fix #4943)
Browse files Browse the repository at this point in the history
  • Loading branch information
stevengj committed Nov 26, 2013
1 parent 811df26 commit ce40f89
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 19 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ Library improvements

* New string type, `UTF16String` ([#4930]).

* `CharString` is renamed to `UTF32String` ([#4943]).

Deprecated or removed
---------------------

Expand Down
2 changes: 1 addition & 1 deletion base/char.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,4 @@ sizeof(::Type{Char}) = 4
## printing & showing characters ##

print(io::IO, c::Char) = (write(io,c); nothing)
show(io::IO, c::Char) = (print(io,'\''); print_escaped(io,CharString(c),"'"); print(io,'\''))
show(io::IO, c::Char) = (print(io,'\''); print_escaped(io,UTF32String(c),"'"); print(io,'\''))
3 changes: 3 additions & 0 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -356,5 +356,8 @@ export mmread
export Stat
const Stat = StatStruct

export CharString
const CharString = UTF32String

# 0.3 discontinued functions

3 changes: 2 additions & 1 deletion base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ export
BitMatrix,
BitVector,
BunchKaufman,
CharString,
Cholesky,
CholeskyPivoted,
Cmd,
Expand Down Expand Up @@ -105,6 +104,7 @@ export
Triangular,
Tridiagonal,
UTF16String,
UTF32String,
VecOrMat,
Vector,
VersionNumber,
Expand Down Expand Up @@ -819,6 +819,7 @@ export
uppercase,
utf8,
utf16,
utf32,
warn,
xdump,

Expand Down
34 changes: 20 additions & 14 deletions base/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -565,25 +565,28 @@ next(s::GenericString, i::Int) = next(s.string, i)

## plain old character arrays ##

immutable CharString <: DirectIndexString
immutable UTF32String <: DirectIndexString
chars::Array{Char,1}

CharString(a::Array{Char,1}) = new(a)
CharString(c::Char...) = new([ c[i] for i=1:length(c) ])
UTF32String(a::Array{Char,1}) = new(a)
UTF32String(c::Char...) = new([ c[i] for i=1:length(c) ])
end
CharString(x...) = CharString(map(char,x)...)
UTF32String(x...) = UTF32String(map(char,x)...)

next(s::CharString, i::Int) = (s.chars[i], i+1)
endof(s::CharString) = length(s.chars)
length(s::CharString) = length(s.chars)
next(s::UTF32String, i::Int) = (s.chars[i], i+1)
endof(s::UTF32String) = length(s.chars)
length(s::UTF32String) = length(s.chars)

convert(::Type{CharString}, s::String) = CharString(Char[c for c in s])
convert{T<:String}(::Type{T}, v::Vector{Char}) = convert(T, CharString(v))
utf32(x) = convert(UTF32String, x)
convert(::Type{UTF32String}, s::String) = UTF32String(Char[c for c in s])
convert{T<:String}(::Type{T}, v::Vector{Char}) = convert(T, UTF32String(v))
convert(::Type{Array{Char,1}}, s::UTF32String) = s.chars
convert(::Type{Array{Char}}, s::UTF32String) = s.chars

reverse(s::CharString) = CharString(reverse(s.chars))
reverse(s::UTF32String) = UTF32String(reverse(s.chars))

sizeof(s::CharString) = sizeof(s.chars)
convert{T<:Union(Int32,Uint32)}(::Type{Ptr{T}}, s::CharString) =
sizeof(s::UTF32String) = sizeof(s.chars)
convert{T<:Union(Int32,Uint32,Char)}(::Type{Ptr{T}}, s::UTF32String) =
convert(Ptr{T}, s.chars)

## substrings reference original strings ##
Expand Down Expand Up @@ -813,8 +816,11 @@ end
## string promotion rules ##

promote_rule(::Type{UTF8String} , ::Type{ASCIIString}) = UTF8String
promote_rule(::Type{UTF8String} , ::Type{CharString} ) = UTF8String
promote_rule(::Type{ASCIIString}, ::Type{CharString} ) = UTF8String
promote_rule(::Type{UTF8String} , ::Type{UTF16String} ) = UTF8String
promote_rule(::Type{ASCIIString}, ::Type{UTF16String} ) = UTF8String
promote_rule(::Type{UTF32String} , ::Type{UTF16String} ) = UTF8String
promote_rule(::Type{UTF8String} , ::Type{UTF32String} ) = UTF8String
promote_rule(::Type{ASCIIString}, ::Type{UTF32String} ) = UTF8String
promote_rule{T<:String}(::Type{RepString}, ::Type{T}) = RepString

## printing literal quoted string data ##
Expand Down
2 changes: 2 additions & 0 deletions base/utf16.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ convert(::Type{UTF16String}, s::UTF16String) = s
convert(::Type{UTF16String}, s::String) = encode16(s)
convert(::Type{UTF8String}, s::UTF16String) =
sprint(length(s.data), io->for c in s; write(io,c::Char); end)
convert(::Type{Array{Uint16,1}}, s::UTF16String) = s.data
convert(::Type{Array{Uint16}}, s::UTF16String) = s.data

sizeof(s::UTF16String) = sizeof(s.data)
convert{T<:Union(Int16,Uint16)}(::Type{Ptr{T}}, s::UTF16String) =
Expand Down
2 changes: 1 addition & 1 deletion test/strings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,7 @@ bin_val = hex2bytes("07bf")
@test sizeof(RopeString("abc","def")) == 6

# issue #3597
@test string(CharString(['T', 'e', 's', 't'])[1:1], "X") == "TX"
@test string(UTF32String(['T', 'e', 's', 't'])[1:1], "X") == "TX"

# issue #3710
@test prevind(SubString("{var}",2,4),4) == 3
Expand Down
4 changes: 2 additions & 2 deletions test/unicode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ for encoding in ["UTF-32LE", "UTF-16BE", "UTF-16LE", "UTF-8"]
end

f=open(joinpath(unicodedir,"UTF-32LE.unicode"))
str1 = CharString(reinterpret(Char, read(f, Uint32, 1112065)[2:]))
str1 = UTF32String(reinterpret(Char, read(f, Uint32, 1112065)[2:]))
close(f)

f=open(joinpath(unicodedir,"UTF-8.unicode"))
Expand All @@ -29,7 +29,7 @@ close(f)
@test str1 == str2

str1 = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
str2 = CharString(
str2 = UTF32String(
8704, 32, 949, 32, 62, 32, 48, 44, 32, 8707, 32,
948, 32, 62, 32, 48, 58, 32, 124, 120, 45, 121, 124,
32, 60, 32, 948, 32, 8658, 32, 124, 102, 40, 120,
Expand Down

0 comments on commit ce40f89

Please sign in to comment.