From 49cbf5aa10ecf06501e6a3094489c7c198fa491e Mon Sep 17 00:00:00 2001 From: Klaus Crusius Date: Fri, 29 Sep 2017 17:40:04 +0200 Subject: [PATCH] allow non-ascii characters in printf format strings (#23899) fixes issue #23880 --- base/printf.jl | 15 ++++++++------- test/printf.jl | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/base/printf.jl b/base/printf.jl index 2f8347f6f8a95..c6e121405f02f 100644 --- a/base/printf.jl +++ b/base/printf.jl @@ -36,23 +36,24 @@ end ### printf format string parsing ### function parse(s::AbstractString) - # parse format string in to stings and format tuples + # parse format string into strings and format tuples list = [] i = j = start(s) + j1 = 0 # invariant: j1 == prevind(s, j) while !done(s,j) c, k = next(s,j) if c == '%' - isempty(s[i:j-1]) || push!(list, s[i:j-1]) + i > j1 || push!(list, s[i:j1]) flags, width, precision, conversion, k = parse1(s,k) '\'' in flags && error("printf format flag ' not yet supported") conversion == 'n' && error("printf feature %n not supported") push!(list, conversion == '%' ? "%" : (flags,width,precision,conversion)) - i = j = k - else - j = k + i = k end + j1 = j + j = k end - isempty(s[i:end]) || push!(list, s[i:end]) + i > endof(s) || push!(list, s[i:end]) # coalesce adjacent strings i = 1 while i < length(list) @@ -98,7 +99,7 @@ function parse1(s::AbstractString, k::Integer) while c in "#0- + '" c, k = next_or_die(s,k) end - flags = String(s[j:k-2]) + flags = String(s[j:prevind(s,k)-1]) # exploiting that all flags are one-byte. # parse width while '0' <= c <= '9' width = 10*width + c-'0' diff --git a/test/printf.jl b/test/printf.jl index 90c0e10769621..0674d172d66d1 100644 --- a/test/printf.jl +++ b/test/printf.jl @@ -272,3 +272,18 @@ end # Check bug with trailing nul printing BigFloat @test (@sprintf("%.330f", BigFloat(1)))[end] != '\0' + +# Check utf8 strings #23880 +@test (@sprintf("X%d", 2)) == "X2" +@test (@sprintf("\u00d0%d", 2)) == "\u00d02" +@test (@sprintf("\u0f00%d", 2)) == "\u0f002" +@test (@sprintf("\U0001ffff%d", 2)) == "\U0001ffff2" +@test (@sprintf("%dX%d", 1, 2)) == "1X2" +@test (@sprintf("%d\u00d0%d", 1, 2)) == "1\u00d02" +@test (@sprintf("%d\u0f00%d", 1, 2)) == "1\u0f002" +@test (@sprintf("%d\U0001ffff%d", 1, 2)) == "1\U0001ffff2" +@test (@sprintf("%d\u2203%d\u0203", 1, 2)) == "1\u22032\u0203" +@test_throws ArgumentError @macroexpand(@sprintf("%y%d", 1, 2)) +@test_throws ArgumentError @macroexpand(@sprintf("%\u00d0%d", 1, 2)) +@test_throws ArgumentError @macroexpand(@sprintf("%\u0f00%d", 1, 2)) +@test_throws ArgumentError @macroexpand(@sprintf("%\U0001ffff%d", 1, 2))