Skip to content

Commit

Permalink
Print some useful info when hitting some cases of invalid UTF-8 (#24311)
Browse files Browse the repository at this point in the history
  • Loading branch information
andreasnoack authored Oct 26, 2017
1 parent 7ae9955 commit 29fcb37
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
2 changes: 1 addition & 1 deletion base/strings/errors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
## Error messages for Unicode / UTF support

const UTF_ERR_SHORT = "invalid UTF-8 sequence starting at index <<1>> (0x<<2>> missing one or more continuation bytes)"
const UTF_ERR_INVALID_INDEX = "invalid character index"
const UTF_ERR_INVALID_INDEX = "invalid character index <<1>> (0x<<2>> is a continuation byte)"

struct UnicodeError <: Exception
errmsg::AbstractString ##< A UTF_ERR_ message
Expand Down
15 changes: 15 additions & 0 deletions test/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -655,3 +655,18 @@ end
@test last(s, length(s)) == s
@test_throws BoundsError last(s, length(s)+1)
end

@testset "invalid code point" begin
s = String([0x61, 0xba, 0x41])
@test !isvalid(s)
@test_throws UnicodeError s[2]
e = try
s[2]
catch e
e
end
b = IOBuffer()
show(b, e)
@test String(take!(b)) == "UnicodeError: invalid character index 2 (0xba is a continuation byte)"
end

0 comments on commit 29fcb37

Please sign in to comment.