Skip to content

Commit

Permalink
ensure libc is using a UTF8-compatible encoding
Browse files Browse the repository at this point in the history
Some basic functionality in libc (such as printf) might be broken if the
string encoding is not matching our Cstring expectations.
  • Loading branch information
vtjnash committed Oct 19, 2020
1 parent 3d75d1e commit ee3ff3c
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 70 deletions.
43 changes: 36 additions & 7 deletions src/support/libsupportinit.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,47 @@
extern "C" {
#endif

static int isInitialized = 0;
static const char *jl_strchrnul(const char *s, int c)
{
char *p = strchr(s, c);
if (p)
return p;
return s + strlen(s);
}

void libsupport_init(void)
{
static int isInitialized = 0;
if (!isInitialized) {

setlocale(LC_ALL, ""); // set to user locale
setlocale(LC_NUMERIC, "C"); // use locale-independent numeric formats

ios_init_stdstreams();

isInitialized=1;
isInitialized = 1;

// adopt the user's locale for most formatting
setlocale(LC_ALL, "");
// but use locale-independent numeric formats (for parsing)
setlocale(LC_NUMERIC, "C");
// and try to specify ASCII or UTF-8 (preferred) for our Libc and Cstring functions
char *ctype = setlocale(LC_CTYPE, NULL);
if (ctype) {
size_t codeset = jl_strchrnul(ctype, '.') - ctype;
if (strncmp(ctype + codeset, ".UTF-8", strlen(".UTF-8")) == 0 ||
strncmp(ctype + codeset, ".utf-8", strlen(".utf-8")) == 0 ||
strncmp(ctype + codeset, ".utf8", strlen(".utf8")) == 0)
return; // already UTF-8
ctype = (char*)memcpy(malloc_s(codeset + sizeof(".UTF-8")), ctype, codeset);
strcpy(ctype + codeset, ".UTF-8");
}
setlocale(LC_CTYPE, "C"); // ASCII
#ifndef _OS_WINDOWS_
if (setlocale(LC_CTYPE, "C.UTF-8") == NULL && // Linux/FreeBSD name
setlocale(LC_CTYPE, "en_US.UTF-8") == NULL && // Common name
setlocale(LC_CTYPE, "UTF-8") == NULL && // Apple name
(ctype == NULL || setlocale(LC_CTYPE, ctype) == NULL)) { // attempt to form it manually
ios_puts("WARNING: failed to select UTF-8 encoding, using ASCII\n", ios_stderr);
}
#endif
if (ctype)
free(ctype);
}
}

Expand Down
52 changes: 25 additions & 27 deletions stdlib/Dates/test/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ module IOTests
using Test
using Dates

const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
include(joinpath(BASE_TEST_PATH, "testhelpers", "withlocales.jl"))

@testset "string/show representation of Date" begin
@test string(Dates.Date(1, 1, 1)) == "0001-01-01" # January 1st, 1 AD/CE
@test sprint(show, Dates.Date(1, 1, 1)) == "Dates.Date(\"0001-01-01\")"
Expand Down Expand Up @@ -515,36 +518,31 @@ end
end

@testset "AM/PM" begin
# get the current locale
LC_TIME = 2
time_locale = ccall(:setlocale, Cstring, (Cint, Cstring), LC_TIME, C_NULL)
try
# set the locale
ccall(:setlocale, Cstring, (Cint, Cstring), LC_TIME, "C")

for (t12,t24) in (("12:00am","00:00"), ("12:07am","00:07"), ("01:24AM","01:24"),
("12:00pm","12:00"), ("12:15pm","12:15"), ("11:59PM","23:59"))
d = DateTime("2018-01-01T$t24:00")
t = Time("$t24:00")
for HH in ("HH","II")
@test DateTime("2018-01-01 $t12","yyyy-mm-dd $HH:MMp") == d
@test Time("$t12","$HH:MMp") == t
end
tmstruct = Libc.strptime("%I:%M%p", t12)
@test Time(tmstruct) == t
@test uppercase(t12) == Dates.format(t, "II:MMp") ==
Dates.format(d, "II:MMp") ==
Libc.strftime("%I:%M%p", tmstruct)
for (t12,t24) in (("12:00am","00:00"), ("12:07am","00:07"), ("01:24AM","01:24"),
("12:00pm","12:00"), ("12:15pm","12:15"), ("11:59PM","23:59"))
d = DateTime("2018-01-01T$t24:00")
t = Time("$t24:00")
for HH in ("HH","II")
@test DateTime("2018-01-01 $t12","yyyy-mm-dd $HH:MMp") == d
@test Time("$t12","$HH:MMp") == t
end
for bad in ("00:24am", "00:24pm", "13:24pm", "2pm", "12:24p.m.", "12:24 pm", "12:24pµ")
@eval @test_throws ArgumentError Time($bad, "II:MMp")
local tmstruct, strftime
withlocales(["C"]) do
# test am/pm comparison handling
tmstruct = Libc.strptime("%I:%M%p", t12)
strftime = Libc.strftime("%I:%M%p", tmstruct)
nothing
end
# if am/pm is missing, defaults to 24-hour clock
@eval Time("13:24", "II:MMp") == Time("13:24", "HH:MM")
finally
# recover the locale
ccall(:setlocale, Cstring, (Cint, Cstring), LC_TIME, time_locale)
@test Time(tmstruct) == t
@test uppercase(t12) == Dates.format(t, "II:MMp") ==
Dates.format(d, "II:MMp") ==
strftime
end
for bad in ("00:24am", "00:24pm", "13:24pm", "2pm", "12:24p.m.", "12:24 pm", "12:24pµ")
@test_throws ArgumentError Time(bad, "II:MMp")
end
# if am/pm is missing, defaults to 24-hour clock
@test Time("13:24", "II:MMp") == Time("13:24", "HH:MM")
end

end
2 changes: 1 addition & 1 deletion stdlib/Dates/test/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ end

@testset "issue #31524" begin
dt1 = Libc.strptime("%Y-%M-%dT%H:%M:%SZ", "2018-11-16T10:26:14Z")
dt2 = Base.Libc.TmStruct(14, 30, 5, 10, 1, 99, 3, 40, 0)
dt2 = Libc.TmStruct(14, 30, 5, 10, 1, 99, 3, 40, 0)

time = Time(dt1)
@test typeof(time) == Time
Expand Down
15 changes: 7 additions & 8 deletions test/ccall.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1710,15 +1710,14 @@ end
@test str == "hi+1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-1.1-2.2-3.3-4.4-5.5-6.6-7.7-8.8-9.9\n"
end


@testset "Cwstring" begin
n = 100
buffer = Array{Cwchar_t}(undef, n)
if Sys.iswindows()
# sprintf throws an error on Windows, see https://github.com/JuliaLang/julia/pull/36040#issuecomment-634774055
len = @ccall swprintf_s(buffer::Ptr{Cwchar_t}, n::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint
else
len = @ccall swprintf(buffer::Ptr{Cwchar_t}, n::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint
end
buffer = Array{Cwchar_t}(undef, 100)
len = @static if Sys.iswindows()
@ccall swprintf_s(buffer::Ptr{Cwchar_t}, length(buffer)::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint
else
@ccall swprintf(buffer::Ptr{Cwchar_t}, length(buffer)::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint
end
Libc.systemerror("swprintf", len < 0)
str = GC.@preserve buffer unsafe_string(pointer(buffer), len)
@test str == "α+β=15"
Expand Down
38 changes: 11 additions & 27 deletions test/misc.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

isdefined(Main, :FakePTYs) || @eval Main include("testhelpers/FakePTYs.jl")
include("testhelpers/withlocales.jl")

# Tests that do not really go anywhere else

Expand Down Expand Up @@ -697,36 +698,19 @@ end

# issue #27239
@testset "strftime tests issue #27239" begin

# save current locales
locales = Dict()
for cat in 0:9999
cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, C_NULL)
if cstr != C_NULL
locales[cat] = unsafe_string(cstr)
end
end

# change to non-Unicode Korean
for (cat, _) in locales
korloc = ["ko_KR.EUC-KR", "ko_KR.CP949", "ko_KR.949", "Korean_Korea.949"]
for lc in korloc
cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc)
end
korloc = ["ko_KR.EUC-KR", "ko_KR.CP949", "ko_KR.949", "Korean_Korea.949"]
timestrs = String[]
withlocales(korloc) do
# system dependent formats
push!(timestrs, Libc.strftime(0.0))
push!(timestrs, Libc.strftime("%a %A %b %B %p %Z", 0))
end

# system dependent formats
timestr_c = Libc.strftime(0.0)
timestr_aAbBpZ = Libc.strftime("%a %A %b %B %p %Z", 0)

# recover locales
for (cat, lc) in locales
cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc)
end

# tests
@test isvalid(timestr_c)
@test isvalid(timestr_aAbBpZ)
isempty(timestrs) && @warn "skipping stftime tests: no locale found for testing"
for s in timestrs
@test isvalid(s)
end
end


Expand Down
28 changes: 28 additions & 0 deletions test/testhelpers/withlocales.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

function withlocales(f, newlocales)
# save current locales
locales = Dict{Int,String}()
for cat in 0:9999
cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, C_NULL)
if cstr != C_NULL
locales[cat] = unsafe_string(cstr)
end
end
timestrs = String[]
try
# change to each of given locales
for lc in newlocales
set = true
for (cat, _) in locales
set &= ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc) != C_NULL
end
set && f()
end
finally
# recover locales
for (cat, lc) in locales
cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc)
end
end
end

0 comments on commit ee3ff3c

Please sign in to comment.