From 8142486d64efe9aa0d996609066e099f89d55724 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 3 Jan 2017 16:40:27 -0500 Subject: [PATCH 1/6] more verbose multi-line display(c) for Char --- base/char.jl | 8 ++++++++ base/strings/utf8proc.jl | 43 ++++++++++++++++++++++++++++++++++++---- test/char.jl | 2 ++ 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/base/char.jl b/base/char.jl index a5ca950e02a21..1a024d0916eed 100644 --- a/base/char.jl +++ b/base/char.jl @@ -77,3 +77,11 @@ function show(io::IO, c::Char) end return end + +using UTF8proc: category_abbrev, category_string +function show(io::IO, ::MIME"text/plain", c::Char) + show(io, c) + u = UInt32(c) + print(io, ": ", isascii(c) ? "ASCII/" : "", "Unicode U+", hex(u, u > 0xffff ? 8 : 4)) + print(io, " (category ", category_abbrev(c), ": ", category_string(c), ")") +end diff --git a/base/strings/utf8proc.jl b/base/strings/utf8proc.jl index d83095b508abb..03361145f7a26 100644 --- a/base/strings/utf8proc.jl +++ b/base/strings/utf8proc.jl @@ -5,7 +5,7 @@ module UTF8proc import Base: show, ==, hash, string, Symbol, isless, length, eltype, start, next, done, convert, isvalid, lowercase, uppercase, titlecase -export isgraphemebreak +export isgraphemebreak, category_abbrev, category_string # also exported by Base: export normalize_string, graphemes, is_assigned_char, charwidth, isvalid, @@ -51,6 +51,40 @@ const UTF8PROC_CATEGORY_CF = 27 const UTF8PROC_CATEGORY_CS = 28 const UTF8PROC_CATEGORY_CO = 29 +# strings corresponding to the category constants" +const category_strings = [ + "Other, not assigned", + "Letter, uppercase", + "Letter, lowercase", + "Letter, titlecase", + "Letter, modifier", + "Letter, other", + "Mark, nonspacing", + "Mark, spacing combining", + "Mark, enclosing", + "Number, decimal digit", + "Number, letter", + "Number, other", + "Punctuation, connector", + "Punctuation, dash", + "Punctuation, open", + "Punctuation, close", + "Punctuation, initial quote", + "Punctuation, final quote", + "Punctuation, other", + "Symbol, math", + "Symbol, currency", + "Symbol, modifier", + "Symbol, other", + "Separator, space", + "Separator, line", + "Separator, paragraph", + "Other, control", + "Other, format", + "Other, surrogate", + "Other, private use" +] + const UTF8PROC_STABLE = (1<<1) const UTF8PROC_COMPAT = (1<<2) const UTF8PROC_COMPOSE = (1<<3) @@ -164,9 +198,10 @@ titlecase(c::Char) = isascii(c) ? ('a' <= c <= 'z' ? c - 0x20 : c) : Char(ccall( ############################################################################ # returns UTF8PROC_CATEGORY code in 0:30 giving Unicode category -function category_code(c) - return ccall(:utf8proc_category, Cint, (UInt32,), c) -end +category_code(c) = ccall(:utf8proc_category, Cint, (UInt32,), c) + +category_abbrev(c) = unsafe_string(ccall(:utf8proc_category_string, Cstring, (UInt32,), c)) +category_string(c) = category_strings[category_code(c)+1] """ is_assigned_char(c) -> Bool diff --git a/test/char.jl b/test/char.jl index f2fc7bef5d5c0..9b99f07a8748a 100644 --- a/test/char.jl +++ b/test/char.jl @@ -195,3 +195,5 @@ let end @test !isequal('x', 120) + +@test sprint(show, "text/plain", '$') == "'\$': ASCII/Unicode U+0024 (category Sc: Symbol, currency)" From a0a8c2393abb68c63caed80acdccb6a6c0b020d9 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 3 Jan 2017 16:45:51 -0500 Subject: [PATCH 2/6] tweaks --- base/strings/utf8proc.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/base/strings/utf8proc.jl b/base/strings/utf8proc.jl index 03361145f7a26..00ae091051e3d 100644 --- a/base/strings/utf8proc.jl +++ b/base/strings/utf8proc.jl @@ -5,7 +5,7 @@ module UTF8proc import Base: show, ==, hash, string, Symbol, isless, length, eltype, start, next, done, convert, isvalid, lowercase, uppercase, titlecase -export isgraphemebreak, category_abbrev, category_string +export isgraphemebreak, category_code, category_abbrev, category_string # also exported by Base: export normalize_string, graphemes, is_assigned_char, charwidth, isvalid, @@ -51,7 +51,7 @@ const UTF8PROC_CATEGORY_CF = 27 const UTF8PROC_CATEGORY_CS = 28 const UTF8PROC_CATEGORY_CO = 29 -# strings corresponding to the category constants" +# strings corresponding to the category constants const category_strings = [ "Other, not assigned", "Letter, uppercase", @@ -200,6 +200,7 @@ titlecase(c::Char) = isascii(c) ? ('a' <= c <= 'z' ? c - 0x20 : c) : Char(ccall( # returns UTF8PROC_CATEGORY code in 0:30 giving Unicode category category_code(c) = ccall(:utf8proc_category, Cint, (UInt32,), c) +# more human-readable representations of the category code category_abbrev(c) = unsafe_string(ccall(:utf8proc_category_string, Cstring, (UInt32,), c)) category_string(c) = category_strings[category_code(c)+1] From fe1eee746140a6ac99da0afc922785a283c05559 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 3 Jan 2017 17:15:19 -0500 Subject: [PATCH 3/6] shorten padding for non-BMP char display --- base/char.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/char.jl b/base/char.jl index 1a024d0916eed..00b0939a98adc 100644 --- a/base/char.jl +++ b/base/char.jl @@ -82,6 +82,6 @@ using UTF8proc: category_abbrev, category_string function show(io::IO, ::MIME"text/plain", c::Char) show(io, c) u = UInt32(c) - print(io, ": ", isascii(c) ? "ASCII/" : "", "Unicode U+", hex(u, u > 0xffff ? 8 : 4)) + print(io, ": ", isascii(c) ? "ASCII/" : "", "Unicode U+", hex(u, u > 0xffff ? 6 : 4)) print(io, " (category ", category_abbrev(c), ": ", category_string(c), ")") end From 26a2e9d3de046f7d5e80ae4f73a7a0c036814fd4 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 3 Jan 2017 19:05:39 -0500 Subject: [PATCH 4/6] UTF8proc is not defined yet --- base/char.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/base/char.jl b/base/char.jl index 00b0939a98adc..f3c3b83abab56 100644 --- a/base/char.jl +++ b/base/char.jl @@ -78,10 +78,9 @@ function show(io::IO, c::Char) return end -using UTF8proc: category_abbrev, category_string function show(io::IO, ::MIME"text/plain", c::Char) show(io, c) u = UInt32(c) print(io, ": ", isascii(c) ? "ASCII/" : "", "Unicode U+", hex(u, u > 0xffff ? 6 : 4)) - print(io, " (category ", category_abbrev(c), ": ", category_string(c), ")") + print(io, " (category ", UTF8proc.category_abbrev(c), ": ", UTF8proc.category_string(c), ")") end From a456fd45e3e4f022f2543a85b0abd43f5d20488c Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 3 Jan 2017 19:19:11 -0500 Subject: [PATCH 5/6] more bootstrapping fixes --- base/multimedia.jl | 9 ++++----- base/sysimg.jl | 6 ++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/base/multimedia.jl b/base/multimedia.jl index d88ba0a4c6f4b..6756adad54846 100644 --- a/base/multimedia.jl +++ b/base/multimedia.jl @@ -11,17 +11,16 @@ export Display, display, pushdisplay, popdisplay, displayable, redisplay, # that Julia's dispatch and overloading mechanisms can be used to # dispatch show and to add conversions for new types. -immutable MIME{mime} end +# defined in sysimg.jl for bootstrapping: +# immutable MIME{mime} end +# macro MIME_str(s) +import Base: MIME, @MIME_str import Base: show, print, string, convert MIME(s) = MIME{Symbol(s)}() show{mime}(io::IO, ::MIME{mime}) = print(io, "MIME type ", string(mime)) print{mime}(io::IO, ::MIME{mime}) = print(io, mime) -macro MIME_str(s) - :(MIME{$(Expr(:quote, Symbol(s)))}) -end - ########################################################################### # For any type T one can define show(io, ::MIME"type", x::T) = ... # in order to provide a way to export T as a given mime type. diff --git a/base/sysimg.jl b/base/sysimg.jl index b43b7aa1562e4..a078ef0a3d2c0 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -158,6 +158,12 @@ include("io.jl") include("iostream.jl") include("iobuffer.jl") +# define MIME"foo/bar" early so that we can overload 3-arg show +immutable MIME{mime} end +macro MIME_str(s) + :(MIME{$(Expr(:quote, Symbol(s)))}) +end + # strings & printing include("char.jl") include("intfuncs.jl") From d7cfa80f3b13291826654067a2f15432c764809f Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 4 Jan 2017 12:39:53 -0500 Subject: [PATCH 6/6] test repr for char --- test/char.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/char.jl b/test/char.jl index 9b99f07a8748a..8a8a2f825cbd3 100644 --- a/test/char.jl +++ b/test/char.jl @@ -197,3 +197,4 @@ end @test !isequal('x', 120) @test sprint(show, "text/plain", '$') == "'\$': ASCII/Unicode U+0024 (category Sc: Symbol, currency)" +@test repr('$') == "'\$'"