From bf1c9999dd35a6ccf7412114b75b943c02cb870d Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 6 Oct 2020 08:19:03 +0200 Subject: [PATCH] Port the Float16 runtime functions to C. The Julia runtime wasn't safe wrt. GC operations. I left the commit in for archival purposes, in case we want to revisit this again later. --- base/Base.jl | 4 - base/runtime/runtime.jl | 161 --------------------------------- src/intrinsics.cpp | 195 ++++++++++++++++++++++++++++++++++++++++ src/julia.expmap | 6 ++ test/choosetests.jl | 2 +- test/runtime.jl | 143 ----------------------------- 6 files changed, 202 insertions(+), 309 deletions(-) delete mode 100644 base/runtime/runtime.jl delete mode 100644 test/runtime.jl diff --git a/base/Base.jl b/base/Base.jl index 509027a4b5abbf..207b571f30e16f 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -129,10 +129,6 @@ end include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "build_h.jl")) # include($BUILDROOT/base/build_h.jl) include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "version_git.jl")) # include($BUILDROOT/base/version_git.jl) -# run-time library -include("runtime/runtime.jl") -using .Runtime - # numeric operations include("hashing.jl") include("rounding.jl") diff --git a/base/runtime/runtime.jl b/base/runtime/runtime.jl deleted file mode 100644 index 616851ced0606a..00000000000000 --- a/base/runtime/runtime.jl +++ /dev/null @@ -1,161 +0,0 @@ -# This file is a part of Julia. License is MIT: https://julialang.org/license - -module Runtime - -function _ccallable(rt::Type, sigt::Type) - ccall(:jl_extern_c, Cvoid, (Any, Any), rt, sigt) -end - -# early version of `@ccallable` -macro ccallable(rt, def) - sig = def.args[1] - f = :(typeof($(sig.args[1]))) - at = map(sig.args[2:end]) do a - a.args[end] - end - return quote - $(esc(def)) - _ccallable($(esc(rt)), $(Expr(:curly, :Tuple, esc(f), map(esc, at)...))) - end - return -end - -# early version if `isapple()` -# -# Xcode links compiler-rt, so we shouldn't emit our implementation to avoid duplicate -const KERNEL = ccall(:jl_get_UNAME, Any, ()) -isapple() = (KERNEL === :Apple || KERNEL === :Darwin) - - -## Float16 intrinsics - -# note that we can't actually use Float16 in these implementations, as LLVM will happily -# lower, e.g., `reinterpret(Float16, ::UInt16)` / `bitcast i16 to half` to `truncsfhf2` -# because it wants to store the `half` in a single-precision register. this causes recursion -# when compiling these intrinsics. LLVM's compiler-rt similarly returns i16 for Float16. - -# Float32 -> Float16 algorithm from: -# "Fast Half Float Conversion" by Jeroen van der Zijp -# ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf -# -# With adjustments for round-to-nearest, ties to even. -# -let _basetable = Vector{UInt16}(undef, 512), - _shifttable = Vector{UInt8}(undef, 512) - for i = 0:255 - e = i - 127 - if e < -25 # Very small numbers map to zero - _basetable[i|0x000+1] = 0x0000 - _basetable[i|0x100+1] = 0x8000 - _shifttable[i|0x000+1] = 25 - _shifttable[i|0x100+1] = 25 - elseif e < -14 # Small numbers map to denorms - _basetable[i|0x000+1] = 0x0000 - _basetable[i|0x100+1] = 0x8000 - _shifttable[i|0x000+1] = -e-1 - _shifttable[i|0x100+1] = -e-1 - elseif e <= 15 # Normal numbers just lose precision - _basetable[i|0x000+1] = ((e+15)<<10) - _basetable[i|0x100+1] = ((e+15)<<10) | 0x8000 - _shifttable[i|0x000+1] = 13 - _shifttable[i|0x100+1] = 13 - elseif e < 128 # Large numbers map to Infinity - _basetable[i|0x000+1] = 0x7C00 - _basetable[i|0x100+1] = 0xFC00 - _shifttable[i|0x000+1] = 24 - _shifttable[i|0x100+1] = 24 - else # Infinity and NaN's stay Infinity and NaN's - _basetable[i|0x000+1] = 0x7C00 - _basetable[i|0x100+1] = 0xFC00 - _shifttable[i|0x000+1] = 13 - _shifttable[i|0x100+1] = 13 - end - end - global const shifttable = (_shifttable...,) - global const basetable = (_basetable...,) -end - -# truncation -function truncsfhf2(val::Float32) - f = reinterpret(UInt32, val) - if f&0x7fffffff > 0x7f800000 # isnan without reinterpreting as Float32 - t = 0x8000 ⊻ (0x8000 & ((f >> 0x10) % UInt16)) - return t ⊻ ((f >> 0xd) % UInt16) - end - i = ((f & ~Base.significand_mask(Float32)) >> Base.significand_bits(Float32)) + 1 - @inbounds sh = shifttable[i] - f &= Base.significand_mask(Float32) - # If `val` is subnormal, the tables are set up to force the - # result to 0, so the significand has an implicit `1` in the - # cases we care about. - f |= Base.significand_mask(Float32) + 0x1 - @inbounds h = (basetable[i] + (f >> sh) & Base.significand_mask(Float16)) % UInt16 - # round - # NOTE: we maybe should ignore NaNs here, but the payload is - # getting truncated anyway so "rounding" it might not matter - nextbit = (f >> (sh-1)) & 1 - if nextbit != 0 && (h & 0x7C00) != 0x7C00 - # Round halfway to even or check lower bits - if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0 - h += UInt16(1) - end - end - h -end -truncdfhf2(x::Float64) = truncsfhf2(Float32(x)) -if !isapple() - @ccallable UInt16 __truncsfhf2(val::Float32) = truncsfhf2(val) - @ccallable UInt16 __gnu_f2h_ieee(val::Float32) = truncsfhf2(val) - @ccallable UInt16 __truncdfhf2(val::Float64) = truncdfhf2(val) -end - -# extension -function extendhfsf2(val::UInt16) - local ival::UInt32 = val - local sign::UInt32 = (ival & 0x8000) >> 15 - local exp::UInt32 = (ival & 0x7c00) >> 10 - local sig::UInt32 = (ival & 0x3ff) >> 0 - local ret::UInt32 - - if exp == 0 - if sig == 0 - sign = sign << 31 - ret = sign | exp | sig - else - n_bit = 1 - bit = 0x0200 - while (bit & sig) == 0 - n_bit = n_bit + 1 - bit = bit >> 1 - end - sign = sign << 31 - exp = ((-14 - n_bit + 127) << 23) % UInt32 - sig = ((sig & (~bit)) << n_bit) << (23 - 10) - ret = sign | exp | sig - end - elseif exp == 0x1f - if sig == 0 # Inf - if sign == 0 - ret = 0x7f800000 - else - ret = 0xff800000 - end - else # NaN - ret = 0x7fc00000 | (sign<<31) | (sig<<(23-10)) - end - else - sign = sign << 31 - exp = ((exp - 15 + 127) << 23) % UInt32 - sig = sig << (23 - 10) - ret = sign | exp | sig - end - reinterpret(Float32, ret) -end -extendhfdf2(x::UInt16) = Float64(extendhfsf2(x)) -if !isapple() - @ccallable Float32 __extendhfsf2(val::UInt16) = extendhfsf2(val) - @ccallable Float32 __gnu_h2f_ieee(val::UInt16) = extendhfsf2(val) -end -@ccallable Float32 __extendhfdf2(val::UInt16) = extendhfdf2(val) - -end diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index e33aad90d9f47e..0db0b7d2b44e3c 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -1299,3 +1299,198 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg } assert(0 && "unreachable"); } + + +// float16 intrinsics +// TODO: use LLVM's compiler-rt + +static inline float half_to_float(uint16_t ival) +{ + uint32_t sign = (ival & 0x8000) >> 15; + uint32_t exp = (ival & 0x7c00) >> 10; + uint32_t sig = (ival & 0x3ff) >> 0; + uint32_t ret; + + if (exp == 0) { + if (sig == 0) { + sign = sign << 31; + ret = sign | exp | sig; + } + else { + int n_bit = 1; + uint16_t bit = 0x0200; + while ((bit & sig) == 0) { + n_bit = n_bit + 1; + bit = bit >> 1; + } + sign = sign << 31; + exp = ((-14 - n_bit + 127) << 23); + sig = ((sig & (~bit)) << n_bit) << (23 - 10); + ret = sign | exp | sig; + } + } + else if (exp == 0x1f) { + if (sig == 0) { // Inf + if (sign == 0) + ret = 0x7f800000; + else + ret = 0xff800000; + } + else // NaN + ret = 0x7fc00000 | (sign << 31) | (sig << (23 - 10)); + } + else { + sign = sign << 31; + exp = ((exp - 15 + 127) << 23); + sig = sig << (23 - 10); + ret = sign | exp | sig; + } + + float fret = *((float *)(&ret)); + return fret; +} + +// float to half algorithm from: +// "Fast Half Float Conversion" by Jeroen van der Zijp +// ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf +// +// With adjustments for round-to-nearest, ties to even. + +static uint16_t basetable[512] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, + 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, + 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, + 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, + 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, + 0xf000, 0xf400, 0xf800, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00}; + +static uint8_t shifttable[512] = { + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, + 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x0d, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, + 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d}; + +static inline uint16_t float_to_half(float param) +{ + uint32_t f = *((uint32_t *)(¶m)); + if ((f & 0x7fff) > 0x7c00) { // NaN + uint32_t t = 0x8000 ^ (0x8000 & ((uint16_t)(f >> 0x10))); + return t ^ ((uint16_t)(f >> 0xd)); + } + int i = ((f & ~0x007fffff) >> 23); + uint8_t sh = shifttable[i]; + f &= 0x007fffff; + // If `val` is subnormal, the tables are set up to force the + // result to 0, so the significand has an implicit `1` in the + // cases we care about. + f |= 0x007fffff + 0x1; + uint16_t h = (uint16_t)(basetable[i] + ((f >> sh) & 0x03ff)); + // round + // NOTE: we maybe should ignore NaNs here, but the payload is + // getting truncated anyway so "rounding" it might not matter + int nextbit = (f >> (sh - 1)) & 1; + if (nextbit != 0 && (h & 0x7C00) != 0x7C00) { + // Round halfway to even or check lower bits + if ((h & 1) == 1 || (f & ((1 << (sh - 1)) - 1)) != 0) + h += UINT16_C(1); + } + return h; +} + +#if !defined(_OS_DARWIN_) // xcode already links compiler-rt + +extern "C" JL_DLLEXPORT float __gnu_h2f_ieee(uint16_t param) +{ + return half_to_float(param); +} + +extern "C" JL_DLLEXPORT float __extendhfsf2(uint16_t param) +{ + return half_to_float(param); +} + +extern "C" JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param) +{ + return float_to_half(param); +} + +extern "C" JL_DLLEXPORT uint16_t __truncdfhf2(double param) +{ + return float_to_half((float)param); +} + +#endif diff --git a/src/julia.expmap b/src/julia.expmap index aa77f4c8cff310..baf3220f147ed7 100644 --- a/src/julia.expmap +++ b/src/julia.expmap @@ -42,6 +42,12 @@ environ; __progname; + /* compiler run-time intrinsics */ + __gnu_h2f_ieee; + __extendhfsf2; + __gnu_f2h_ieee; + __truncdfhf2; + local: *; }; diff --git a/test/choosetests.jl b/test/choosetests.jl index 673e161ad82973..887814c6bab0ac 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -33,7 +33,7 @@ in the `choices` argument: """ function choosetests(choices = []) testnames = [ - "subarray", "core", "compiler", "worlds", "runtime", + "subarray", "core", "compiler", "worlds", "keywordargs", "numbers", "subtype", "char", "strings", "triplequote", "unicode", "intrinsics", "dict", "hashing", "iobuffer", "staged", "offsetarray", diff --git a/test/runtime.jl b/test/runtime.jl deleted file mode 100644 index 76c81c96571574..00000000000000 --- a/test/runtime.jl +++ /dev/null @@ -1,143 +0,0 @@ -# This file is a part of Julia. License is MIT: https://julialang.org/license - -using Base: Runtime - -@testset "truncdfhf2" begin - test_truncdfhf2(a, expected) = - @test Runtime.truncdfhf2(Float64(a)) === reinterpret(UInt16, expected) - # NaN - test_truncdfhf2(NaN, NaN16) - # inf - test_truncdfhf2(Inf, Inf16) - test_truncdfhf2(-Inf, -Inf16) - # zero - test_truncdfhf2(0.0, 0x0000) - test_truncdfhf2(-0.0, 0x8000) - test_truncdfhf2(3.1415926535, 0x4248) - test_truncdfhf2(-3.1415926535, 0xc248) - test_truncdfhf2(0x1.987124876876324p+1000, 0x7c00) - test_truncdfhf2(0x1.987124876876324p+12, 0x6e62) - test_truncdfhf2(0x1.0p+0, 0x3c00) - test_truncdfhf2(0x1.0p-14, 0x0400) - # denormal - test_truncdfhf2(0x1.0p-20, 0x0010) - test_truncdfhf2(0x1.0p-24, 0x0001) - test_truncdfhf2(-0x1.0p-24, 0x8001) - test_truncdfhf2(0x1.5p-25, 0x0001) - # and back to zero - test_truncdfhf2(0x1.0p-25, 0x0000) - test_truncdfhf2(-0x1.0p-25, 0x8000) - # max (precise) - test_truncdfhf2(65504.0, 0x7bff) - # max (rounded) - test_truncdfhf2(65519.0, 0x7bff) - # max (to +inf) - test_truncdfhf2(65520.0, 0x7c00) - test_truncdfhf2(-65520.0, 0xfc00) - test_truncdfhf2(65536.0, 0x7c00) -end - -@testset "truncsfhf2" begin - test_truncsfhf2(a, expected) = - @test Runtime.truncsfhf2(Float32(a)) === reinterpret(UInt16, expected) - # NaN - test_truncsfhf2(NaN32, NaN16) - # inf - test_truncsfhf2(Inf32, Inf16) - test_truncsfhf2(-Inf32, -Inf16) - # zero - test_truncsfhf2(0.0f0, 0x0000) - test_truncsfhf2(-0.0f0, 0x8000) - test_truncsfhf2(3.1415926535f0, 0x4248) - test_truncsfhf2(-3.1415926535f0, 0xc248) - test_truncsfhf2(0x1.987124876876324p+100, 0x7c00) - test_truncsfhf2(0x1.987124876876324p+12, 0x6e62) - test_truncsfhf2(0x1.0p+0, 0x3c00) - test_truncsfhf2(0x1.0p-14, 0x0400) - # denormal - test_truncsfhf2(0x1.0p-20, 0x0010) - test_truncsfhf2(0x1.0p-24, 0x0001) - test_truncsfhf2(-0x1.0p-24, 0x8001) - test_truncsfhf2(0x1.5p-25, 0x0001) - # and back to zero - test_truncsfhf2(0x1.0p-25, 0x0000) - test_truncsfhf2(-0x1.0p-25, 0x8000) - # max (precise) - test_truncsfhf2(65504.0f0, 0x7bff) - # max (rounded) - test_truncsfhf2(65519.0f0, 0x7bff) - # max (to +inf) - test_truncsfhf2(65520.0f0, 0x7c00) - test_truncsfhf2(65536.0f0, 0x7c00) - test_truncsfhf2(-65520.0f0, 0xfc00) -end - -@testset "extendhfsf2" begin - function test_extendhfsf2(a::UInt16, expected) - b = Runtime.extendhfsf2(a) - b16 = Float16(b) - expected16 = Float16(expected) - @test reinterpret(UInt16, b16) == reinterpret(UInt16, expected16) - end - # NaN - test_extendhfsf2(0x7e00, NaN32) - # inf - test_extendhfsf2(0x7c00, Inf32) - test_extendhfsf2(0xfc00, -Inf32) - # zero - test_extendhfsf2(0x0000, 0.0f0) - test_extendhfsf2(0x8000, -0.0f0) - test_extendhfsf2(0x4248, π) - test_extendhfsf2(0xc248, -π) - test_extendhfsf2(0x7c00, 0x1.987124876876324p+100) - test_extendhfsf2(0x6e62, 0x1.988p+12) - test_extendhfsf2(0x3c00, 0x1.0p+0) - test_extendhfsf2(0x0400, 0x1.0p-14) - # denormal - test_extendhfsf2(0x0010, 0x1.0p-20) - test_extendhfsf2(0x0001, 0x1.0p-24) - test_extendhfsf2(0x8001, -0x1.0p-24) - test_extendhfsf2(0x0001, 0x1.5p-25) - # and back to zero - test_extendhfsf2(0x0000, 0x1.0p-25) - test_extendhfsf2(0x8000, -0x1.0p-25) - # max (precise) - test_extendhfsf2(0x7bff, 65504.0f0) - # max (rounded) - test_extendhfsf2(0x7bff, 65504.0f0) -end - -@testset "extendhfdf2" begin - function test_extendhfdf2(a::UInt16, expected) - b = Runtime.extendhfdf2(a) - b16 = Float16(reinterpret(Float64, b)) - expected16 = Float16(expected) - @test reinterpret(UInt16, b16) == reinterpret(UInt16, expected16) - end - # NaN - test_extendhfdf2(0x7e00, NaN64) - # inf - test_extendhfdf2(0x7c00, Inf64) - test_extendhfdf2(0xfc00, -Inf64) - # zero - test_extendhfdf2(0x0000, 0.0) - test_extendhfdf2(0x8000, -0.0) - test_extendhfdf2(0x4248, π) - test_extendhfdf2(0xc248, -π) - test_extendhfdf2(0x7c00, 0x1.987124876876324p+100) - test_extendhfdf2(0x6e62, 0x1.988p+12) - test_extendhfdf2(0x3c00, 0x1.0p+0) - test_extendhfdf2(0x0400, 0x1.0p-14) - # denormal - test_extendhfdf2(0x0010, 0x1.0p-20) - test_extendhfdf2(0x0001, 0x1.0p-24) - test_extendhfdf2(0x8001, -0x1.0p-24) - test_extendhfdf2(0x0001, 0x1.5p-25) - # and back to zero - test_extendhfdf2(0x0000, 0x1.0p-25) - test_extendhfdf2(0x8000, -0x1.0p-25) - # max (precise) - test_extendhfdf2(0x7bff, 65504.0) - # max (rounded) - test_extendhfdf2(0x7bff, 65504.0) -end