From 3d75d1ee0da24b4b006661744d42edd8d9d14618 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 6 Oct 2020 17:30:34 -0400 Subject: [PATCH 1/2] investigate swprintf call failure (#37735) --- test/ccall.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/ccall.jl b/test/ccall.jl index 7b72969357ff8..062727da260d5 100644 --- a/test/ccall.jl +++ b/test/ccall.jl @@ -1719,6 +1719,7 @@ end else len = @ccall swprintf(buffer::Ptr{Cwchar_t}, n::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint end + Libc.systemerror("swprintf", len < 0) str = GC.@preserve buffer unsafe_string(pointer(buffer), len) @test str == "α+β=15" str = GC.@preserve buffer unsafe_string(Cwstring(pointer(buffer))) From ee3ff3c9c97461b77a54a7a14b4d6b49bb4b6e31 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 7 Oct 2020 12:35:08 -0400 Subject: [PATCH 2/2] ensure libc is using a UTF8-compatible encoding Some basic functionality in libc (such as printf) might be broken if the string encoding is not matching our Cstring expectations. --- src/support/libsupportinit.c | 43 ++++++++++++++++++++++----- stdlib/Dates/test/io.jl | 52 ++++++++++++++++----------------- stdlib/Dates/test/types.jl | 2 +- test/ccall.jl | 15 +++++----- test/misc.jl | 38 +++++++----------------- test/testhelpers/withlocales.jl | 28 ++++++++++++++++++ 6 files changed, 108 insertions(+), 70 deletions(-) create mode 100644 test/testhelpers/withlocales.jl diff --git a/src/support/libsupportinit.c b/src/support/libsupportinit.c index 1dbce675be34a..c0ccf7836017e 100644 --- a/src/support/libsupportinit.c +++ b/src/support/libsupportinit.c @@ -7,18 +7,47 @@ extern "C" { #endif -static int isInitialized = 0; +static const char *jl_strchrnul(const char *s, int c) +{ + char *p = strchr(s, c); + if (p) + return p; + return s + strlen(s); +} void libsupport_init(void) { + static int isInitialized = 0; if (!isInitialized) { - - setlocale(LC_ALL, ""); // set to user locale - setlocale(LC_NUMERIC, "C"); // use locale-independent numeric formats - ios_init_stdstreams(); - - isInitialized=1; + isInitialized = 1; + + // adopt the user's locale for most formatting + setlocale(LC_ALL, ""); + // but use locale-independent numeric formats (for parsing) + setlocale(LC_NUMERIC, "C"); + // and try to specify ASCII or UTF-8 (preferred) for our Libc and Cstring functions + char *ctype = setlocale(LC_CTYPE, NULL); + if (ctype) { + size_t codeset = jl_strchrnul(ctype, '.') - ctype; + if (strncmp(ctype + codeset, ".UTF-8", strlen(".UTF-8")) == 0 || + strncmp(ctype + codeset, ".utf-8", strlen(".utf-8")) == 0 || + strncmp(ctype + codeset, ".utf8", strlen(".utf8")) == 0) + return; // already UTF-8 + ctype = (char*)memcpy(malloc_s(codeset + sizeof(".UTF-8")), ctype, codeset); + strcpy(ctype + codeset, ".UTF-8"); + } + setlocale(LC_CTYPE, "C"); // ASCII +#ifndef _OS_WINDOWS_ + if (setlocale(LC_CTYPE, "C.UTF-8") == NULL && // Linux/FreeBSD name + setlocale(LC_CTYPE, "en_US.UTF-8") == NULL && // Common name + setlocale(LC_CTYPE, "UTF-8") == NULL && // Apple name + (ctype == NULL || setlocale(LC_CTYPE, ctype) == NULL)) { // attempt to form it manually + ios_puts("WARNING: failed to select UTF-8 encoding, using ASCII\n", ios_stderr); + } +#endif + if (ctype) + free(ctype); } } diff --git a/stdlib/Dates/test/io.jl b/stdlib/Dates/test/io.jl index 450b2b9c92eee..d82dac88276cb 100644 --- a/stdlib/Dates/test/io.jl +++ b/stdlib/Dates/test/io.jl @@ -5,6 +5,9 @@ module IOTests using Test using Dates +const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test") +include(joinpath(BASE_TEST_PATH, "testhelpers", "withlocales.jl")) + @testset "string/show representation of Date" begin @test string(Dates.Date(1, 1, 1)) == "0001-01-01" # January 1st, 1 AD/CE @test sprint(show, Dates.Date(1, 1, 1)) == "Dates.Date(\"0001-01-01\")" @@ -515,36 +518,31 @@ end end @testset "AM/PM" begin - # get the current locale - LC_TIME = 2 - time_locale = ccall(:setlocale, Cstring, (Cint, Cstring), LC_TIME, C_NULL) - try - # set the locale - ccall(:setlocale, Cstring, (Cint, Cstring), LC_TIME, "C") - - for (t12,t24) in (("12:00am","00:00"), ("12:07am","00:07"), ("01:24AM","01:24"), - ("12:00pm","12:00"), ("12:15pm","12:15"), ("11:59PM","23:59")) - d = DateTime("2018-01-01T$t24:00") - t = Time("$t24:00") - for HH in ("HH","II") - @test DateTime("2018-01-01 $t12","yyyy-mm-dd $HH:MMp") == d - @test Time("$t12","$HH:MMp") == t - end - tmstruct = Libc.strptime("%I:%M%p", t12) - @test Time(tmstruct) == t - @test uppercase(t12) == Dates.format(t, "II:MMp") == - Dates.format(d, "II:MMp") == - Libc.strftime("%I:%M%p", tmstruct) + for (t12,t24) in (("12:00am","00:00"), ("12:07am","00:07"), ("01:24AM","01:24"), + ("12:00pm","12:00"), ("12:15pm","12:15"), ("11:59PM","23:59")) + d = DateTime("2018-01-01T$t24:00") + t = Time("$t24:00") + for HH in ("HH","II") + @test DateTime("2018-01-01 $t12","yyyy-mm-dd $HH:MMp") == d + @test Time("$t12","$HH:MMp") == t end - for bad in ("00:24am", "00:24pm", "13:24pm", "2pm", "12:24p.m.", "12:24 pm", "12:24pµ") - @eval @test_throws ArgumentError Time($bad, "II:MMp") + local tmstruct, strftime + withlocales(["C"]) do + # test am/pm comparison handling + tmstruct = Libc.strptime("%I:%M%p", t12) + strftime = Libc.strftime("%I:%M%p", tmstruct) + nothing end - # if am/pm is missing, defaults to 24-hour clock - @eval Time("13:24", "II:MMp") == Time("13:24", "HH:MM") - finally - # recover the locale - ccall(:setlocale, Cstring, (Cint, Cstring), LC_TIME, time_locale) + @test Time(tmstruct) == t + @test uppercase(t12) == Dates.format(t, "II:MMp") == + Dates.format(d, "II:MMp") == + strftime + end + for bad in ("00:24am", "00:24pm", "13:24pm", "2pm", "12:24p.m.", "12:24 pm", "12:24pµ") + @test_throws ArgumentError Time(bad, "II:MMp") end + # if am/pm is missing, defaults to 24-hour clock + @test Time("13:24", "II:MMp") == Time("13:24", "HH:MM") end end diff --git a/stdlib/Dates/test/types.jl b/stdlib/Dates/test/types.jl index 19575428305f7..8823e56e41a2f 100644 --- a/stdlib/Dates/test/types.jl +++ b/stdlib/Dates/test/types.jl @@ -257,7 +257,7 @@ end @testset "issue #31524" begin dt1 = Libc.strptime("%Y-%M-%dT%H:%M:%SZ", "2018-11-16T10:26:14Z") - dt2 = Base.Libc.TmStruct(14, 30, 5, 10, 1, 99, 3, 40, 0) + dt2 = Libc.TmStruct(14, 30, 5, 10, 1, 99, 3, 40, 0) time = Time(dt1) @test typeof(time) == Time diff --git a/test/ccall.jl b/test/ccall.jl index 062727da260d5..c8484579929e1 100644 --- a/test/ccall.jl +++ b/test/ccall.jl @@ -1710,15 +1710,14 @@ end @test str == "hi+1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-1.1-2.2-3.3-4.4-5.5-6.6-7.7-8.8-9.9\n" end + @testset "Cwstring" begin - n = 100 - buffer = Array{Cwchar_t}(undef, n) - if Sys.iswindows() - # sprintf throws an error on Windows, see https://github.com/JuliaLang/julia/pull/36040#issuecomment-634774055 - len = @ccall swprintf_s(buffer::Ptr{Cwchar_t}, n::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint - else - len = @ccall swprintf(buffer::Ptr{Cwchar_t}, n::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint - end + buffer = Array{Cwchar_t}(undef, 100) + len = @static if Sys.iswindows() + @ccall swprintf_s(buffer::Ptr{Cwchar_t}, length(buffer)::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint + else + @ccall swprintf(buffer::Ptr{Cwchar_t}, length(buffer)::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint + end Libc.systemerror("swprintf", len < 0) str = GC.@preserve buffer unsafe_string(pointer(buffer), len) @test str == "α+β=15" diff --git a/test/misc.jl b/test/misc.jl index 920a03abdb520..62caf29aa7571 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -1,6 +1,7 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license isdefined(Main, :FakePTYs) || @eval Main include("testhelpers/FakePTYs.jl") +include("testhelpers/withlocales.jl") # Tests that do not really go anywhere else @@ -697,36 +698,19 @@ end # issue #27239 @testset "strftime tests issue #27239" begin - - # save current locales - locales = Dict() - for cat in 0:9999 - cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, C_NULL) - if cstr != C_NULL - locales[cat] = unsafe_string(cstr) - end - end - # change to non-Unicode Korean - for (cat, _) in locales - korloc = ["ko_KR.EUC-KR", "ko_KR.CP949", "ko_KR.949", "Korean_Korea.949"] - for lc in korloc - cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc) - end + korloc = ["ko_KR.EUC-KR", "ko_KR.CP949", "ko_KR.949", "Korean_Korea.949"] + timestrs = String[] + withlocales(korloc) do + # system dependent formats + push!(timestrs, Libc.strftime(0.0)) + push!(timestrs, Libc.strftime("%a %A %b %B %p %Z", 0)) end - - # system dependent formats - timestr_c = Libc.strftime(0.0) - timestr_aAbBpZ = Libc.strftime("%a %A %b %B %p %Z", 0) - - # recover locales - for (cat, lc) in locales - cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc) - end - # tests - @test isvalid(timestr_c) - @test isvalid(timestr_aAbBpZ) + isempty(timestrs) && @warn "skipping stftime tests: no locale found for testing" + for s in timestrs + @test isvalid(s) + end end diff --git a/test/testhelpers/withlocales.jl b/test/testhelpers/withlocales.jl new file mode 100644 index 0000000000000..a3be17cce4464 --- /dev/null +++ b/test/testhelpers/withlocales.jl @@ -0,0 +1,28 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +function withlocales(f, newlocales) + # save current locales + locales = Dict{Int,String}() + for cat in 0:9999 + cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, C_NULL) + if cstr != C_NULL + locales[cat] = unsafe_string(cstr) + end + end + timestrs = String[] + try + # change to each of given locales + for lc in newlocales + set = true + for (cat, _) in locales + set &= ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc) != C_NULL + end + set && f() + end + finally + # recover locales + for (cat, lc) in locales + cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc) + end + end +end