From 3cf2f31a619effc4a0651cd01262f1adb34ba108 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Fri, 11 Sep 2020 13:56:47 +0200 Subject: [PATCH] Implement Float16 conversions using integer arithmetic. --- base/runtime/runtime.jl | 35 ++++---- test/runtime.jl | 177 +++++++++++++++++++++++++--------------- 2 files changed, 131 insertions(+), 81 deletions(-) diff --git a/base/runtime/runtime.jl b/base/runtime/runtime.jl index e6af0a9dcf951..616851ced0606 100644 --- a/base/runtime/runtime.jl +++ b/base/runtime/runtime.jl @@ -29,6 +29,11 @@ isapple() = (KERNEL === :Apple || KERNEL === :Darwin) ## Float16 intrinsics +# note that we can't actually use Float16 in these implementations, as LLVM will happily +# lower, e.g., `reinterpret(Float16, ::UInt16)` / `bitcast i16 to half` to `truncsfhf2` +# because it wants to store the `half` in a single-precision register. this causes recursion +# when compiling these intrinsics. LLVM's compiler-rt similarly returns i16 for Float16. + # Float32 -> Float16 algorithm from: # "Fast Half Float Conversion" by Jeroen van der Zijp # ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf @@ -73,9 +78,9 @@ end # truncation function truncsfhf2(val::Float32) f = reinterpret(UInt32, val) - if isnan(val) + if f&0x7fffffff > 0x7f800000 # isnan without reinterpreting as Float32 t = 0x8000 ⊻ (0x8000 & ((f >> 0x10) % UInt16)) - return reinterpret(Float16, t ⊻ ((f >> 0xd) % UInt16)) + return t ⊻ ((f >> 0xd) % UInt16) end i = ((f & ~Base.significand_mask(Float32)) >> Base.significand_bits(Float32)) + 1 @inbounds sh = shifttable[i] @@ -95,17 +100,18 @@ function truncsfhf2(val::Float32) h += UInt16(1) end end - reinterpret(Float16, h) + h end -if !Sys.isapple() - @ccallable Float16 __truncsfhf2(val::Float32) = truncsfhf2(val) - @ccallable Float16 __gnu_f2h_ieee(val::Float32) = truncsfhf2(val) - @ccallable Float16 __truncdfhf2(x::Float64) = truncsfhf2(Float32(x)) +truncdfhf2(x::Float64) = truncsfhf2(Float32(x)) +if !isapple() + @ccallable UInt16 __truncsfhf2(val::Float32) = truncsfhf2(val) + @ccallable UInt16 __gnu_f2h_ieee(val::Float32) = truncsfhf2(val) + @ccallable UInt16 __truncdfhf2(val::Float64) = truncdfhf2(val) end # extension -function extendhfsf2(val::Float16) - local ival::UInt32 = reinterpret(UInt16, val) +function extendhfsf2(val::UInt16) + local ival::UInt32 = val local sign::UInt32 = (ival & 0x8000) >> 15 local exp::UInt32 = (ival & 0x7c00) >> 10 local sig::UInt32 = (ival & 0x3ff) >> 0 @@ -143,12 +149,13 @@ function extendhfsf2(val::Float16) sig = sig << (23 - 10) ret = sign | exp | sig end - return reinterpret(Float32, ret) + reinterpret(Float32, ret) end -if !Sys.isapple() - @ccallable Float32 __extendhfsf2(val::Float16) = extendhfsf2(val) - @ccallable Float32 __gnu_h2f_ieee(val::Float16) = extendhfsf2(val) +extendhfdf2(x::UInt16) = Float64(extendhfsf2(x)) +if !isapple() + @ccallable Float32 __extendhfsf2(val::UInt16) = extendhfsf2(val) + @ccallable Float32 __gnu_h2f_ieee(val::UInt16) = extendhfsf2(val) end -@ccallable Float64 __extendhfdf2(x::Float16) = Float64(extendhfdf2(x)) +@ccallable Float32 __extendhfdf2(val::UInt16) = extendhfdf2(val) end diff --git a/test/runtime.jl b/test/runtime.jl index 34551b9937bdc..76c81c9657157 100644 --- a/test/runtime.jl +++ b/test/runtime.jl @@ -3,98 +3,141 @@ using Base: Runtime @testset "truncdfhf2" begin - @test Runtime.__truncdfhf2(NaN) === NaN16 - @test Runtime.__truncdfhf2(Inf) === Inf16 - @test Runtime.__truncdfhf2(-Inf) === -Inf16 - @test Runtime.__truncdfhf2(0.0) === reinterpret(Float16, 0x0000) - @test Runtime.__truncdfhf2(-0.0) === reinterpret(Float16, 0x8000) - @test Runtime.__truncdfhf2(3.1415926535) === reinterpret(Float16, 0x4248) - @test Runtime.__truncdfhf2(-3.1415926535) === reinterpret(Float16, 0xc248) - @test Runtime.__truncdfhf2(0x1.987124876876324p+1000) === reinterpret(Float16, 0x7c00) - @test Runtime.__truncdfhf2(0x1.987124876876324p+12) === reinterpret(Float16, 0x6e62) - @test Runtime.__truncdfhf2(0x1.0p+0) === reinterpret(Float16, 0x3c00) - @test Runtime.__truncdfhf2(0x1.0p-14) === reinterpret(Float16, 0x0400) + test_truncdfhf2(a, expected) = + @test Runtime.truncdfhf2(Float64(a)) === reinterpret(UInt16, expected) + # NaN + test_truncdfhf2(NaN, NaN16) + # inf + test_truncdfhf2(Inf, Inf16) + test_truncdfhf2(-Inf, -Inf16) + # zero + test_truncdfhf2(0.0, 0x0000) + test_truncdfhf2(-0.0, 0x8000) + test_truncdfhf2(3.1415926535, 0x4248) + test_truncdfhf2(-3.1415926535, 0xc248) + test_truncdfhf2(0x1.987124876876324p+1000, 0x7c00) + test_truncdfhf2(0x1.987124876876324p+12, 0x6e62) + test_truncdfhf2(0x1.0p+0, 0x3c00) + test_truncdfhf2(0x1.0p-14, 0x0400) # denormal - @test Runtime.__truncdfhf2(0x1.0p-20) === reinterpret(Float16, 0x0010) - @test Runtime.__truncdfhf2(0x1.0p-24) === reinterpret(Float16, 0x0001) - @test Runtime.__truncdfhf2(-0x1.0p-24) === reinterpret(Float16, 0x8001) - @test Runtime.__truncdfhf2(0x1.5p-25) === reinterpret(Float16, 0x0001) + test_truncdfhf2(0x1.0p-20, 0x0010) + test_truncdfhf2(0x1.0p-24, 0x0001) + test_truncdfhf2(-0x1.0p-24, 0x8001) + test_truncdfhf2(0x1.5p-25, 0x0001) # and back to zero - @test Runtime.__truncdfhf2(0x1.0p-25) === reinterpret(Float16, 0x0000) - @test Runtime.__truncdfhf2(-0x1.0p-25) === reinterpret(Float16, 0x8000) + test_truncdfhf2(0x1.0p-25, 0x0000) + test_truncdfhf2(-0x1.0p-25, 0x8000) # max (precise) - @test Runtime.__truncdfhf2(65504.0) === reinterpret(Float16, 0x7bff) + test_truncdfhf2(65504.0, 0x7bff) # max (rounded) - @test Runtime.__truncdfhf2(65519.0) === reinterpret(Float16, 0x7bff) + test_truncdfhf2(65519.0, 0x7bff) # max (to +inf) - @test Runtime.__truncdfhf2(65520.0) === reinterpret(Float16, 0x7c00) - @test Runtime.__truncdfhf2(-65520.0) === reinterpret(Float16, 0xfc00) - @test Runtime.__truncdfhf2(65536.0) === reinterpret(Float16, 0x7c00) + test_truncdfhf2(65520.0, 0x7c00) + test_truncdfhf2(-65520.0, 0xfc00) + test_truncdfhf2(65536.0, 0x7c00) end @testset "truncsfhf2" begin + test_truncsfhf2(a, expected) = + @test Runtime.truncsfhf2(Float32(a)) === reinterpret(UInt16, expected) # NaN - @test Runtime.__truncsfhf2(NaN32) === reinterpret(Float16, 0x7e00) + test_truncsfhf2(NaN32, NaN16) # inf - @test Runtime.__truncsfhf2(Inf32) === reinterpret(Float16, 0x7c00) - @test Runtime.__truncsfhf2(-Inf32) === reinterpret(Float16, 0xfc00) + test_truncsfhf2(Inf32, Inf16) + test_truncsfhf2(-Inf32, -Inf16) # zero - @test Runtime.__truncsfhf2(0.0f0) === reinterpret(Float16, 0x0000) - @test Runtime.__truncsfhf2(-0.0f0) === reinterpret(Float16, 0x8000) - @test Runtime.__truncsfhf2(3.1415926535f0) === reinterpret(Float16, 0x4248) - @test Runtime.__truncsfhf2(-3.1415926535f0) === reinterpret(Float16, 0xc248) - @test Runtime.__truncsfhf2(Float32(0x1.987124876876324p+100)) === reinterpret(Float16, 0x7c00) - @test Runtime.__truncsfhf2(Float32(0x1.987124876876324p+12)) === reinterpret(Float16, 0x6e62) - @test Runtime.__truncsfhf2(Float32(0x1.0p+0)) === reinterpret(Float16, 0x3c00) - @test Runtime.__truncsfhf2(Float32(0x1.0p-14)) === reinterpret(Float16, 0x0400) + test_truncsfhf2(0.0f0, 0x0000) + test_truncsfhf2(-0.0f0, 0x8000) + test_truncsfhf2(3.1415926535f0, 0x4248) + test_truncsfhf2(-3.1415926535f0, 0xc248) + test_truncsfhf2(0x1.987124876876324p+100, 0x7c00) + test_truncsfhf2(0x1.987124876876324p+12, 0x6e62) + test_truncsfhf2(0x1.0p+0, 0x3c00) + test_truncsfhf2(0x1.0p-14, 0x0400) # denormal - @test Runtime.__truncsfhf2(Float32(0x1.0p-20)) === reinterpret(Float16, 0x0010) - @test Runtime.__truncsfhf2(Float32(0x1.0p-24)) === reinterpret(Float16, 0x0001) - @test Runtime.__truncsfhf2(Float32(-0x1.0p-24)) === reinterpret(Float16, 0x8001) - @test Runtime.__truncsfhf2(Float32(0x1.5p-25)) === reinterpret(Float16, 0x0001) + test_truncsfhf2(0x1.0p-20, 0x0010) + test_truncsfhf2(0x1.0p-24, 0x0001) + test_truncsfhf2(-0x1.0p-24, 0x8001) + test_truncsfhf2(0x1.5p-25, 0x0001) # and back to zero - @test Runtime.__truncsfhf2(Float32(0x1.0p-25)) === reinterpret(Float16, 0x0000) - @test Runtime.__truncsfhf2(Float32(-0x1.0p-25)) === reinterpret(Float16, 0x8000) + test_truncsfhf2(0x1.0p-25, 0x0000) + test_truncsfhf2(-0x1.0p-25, 0x8000) # max (precise) - @test Runtime.__truncsfhf2(65504.0f0) === reinterpret(Float16, 0x7bff) + test_truncsfhf2(65504.0f0, 0x7bff) # max (rounded) - @test Runtime.__truncsfhf2(65519.0f0) === reinterpret(Float16, 0x7bff) + test_truncsfhf2(65519.0f0, 0x7bff) # max (to +inf) - @test Runtime.__truncsfhf2(65520.0f0) === reinterpret(Float16, 0x7c00) - @test Runtime.__truncsfhf2(65536.0f0) === reinterpret(Float16, 0x7c00) - @test Runtime.__truncsfhf2(-65520.0f0) === reinterpret(Float16, 0xfc00) + test_truncsfhf2(65520.0f0, 0x7c00) + test_truncsfhf2(65536.0f0, 0x7c00) + test_truncsfhf2(-65520.0f0, 0xfc00) end @testset "extendhfsf2" begin - # These tests are taken fromt the compiler-rt testsuite. Were as of 3.9.0 - # the test are done with compareResultH (so with after casting to UInt16) - # Tests that are marked broken fail as === Float32 comparisons. + function test_extendhfsf2(a::UInt16, expected) + b = Runtime.extendhfsf2(a) + b16 = Float16(b) + expected16 = Float16(expected) + @test reinterpret(UInt16, b16) == reinterpret(UInt16, expected16) + end + # NaN + test_extendhfsf2(0x7e00, NaN32) + # inf + test_extendhfsf2(0x7c00, Inf32) + test_extendhfsf2(0xfc00, -Inf32) + # zero + test_extendhfsf2(0x0000, 0.0f0) + test_extendhfsf2(0x8000, -0.0f0) + test_extendhfsf2(0x4248, π) + test_extendhfsf2(0xc248, -π) + test_extendhfsf2(0x7c00, 0x1.987124876876324p+100) + test_extendhfsf2(0x6e62, 0x1.988p+12) + test_extendhfsf2(0x3c00, 0x1.0p+0) + test_extendhfsf2(0x0400, 0x1.0p-14) + # denormal + test_extendhfsf2(0x0010, 0x1.0p-20) + test_extendhfsf2(0x0001, 0x1.0p-24) + test_extendhfsf2(0x8001, -0x1.0p-24) + test_extendhfsf2(0x0001, 0x1.5p-25) + # and back to zero + test_extendhfsf2(0x0000, 0x1.0p-25) + test_extendhfsf2(0x8000, -0x1.0p-25) + # max (precise) + test_extendhfsf2(0x7bff, 65504.0f0) + # max (rounded) + test_extendhfsf2(0x7bff, 65504.0f0) +end - ## +@testset "extendhfdf2" begin + function test_extendhfdf2(a::UInt16, expected) + b = Runtime.extendhfdf2(a) + b16 = Float16(reinterpret(Float64, b)) + expected16 = Float16(expected) + @test reinterpret(UInt16, b16) == reinterpret(UInt16, expected16) + end # NaN - @test Runtime.__extendhfsf2(reinterpret(Float16, 0x7e00)) === NaN32 + test_extendhfdf2(0x7e00, NaN64) # inf - @test Runtime.__extendhfsf2(reinterpret(Float16, 0x7c00)) === Inf32 - @test Runtime.__extendhfsf2(reinterpret(Float16, 0xfc00)) === -Inf32 + test_extendhfdf2(0x7c00, Inf64) + test_extendhfdf2(0xfc00, -Inf64) # zero - @test Runtime.__extendhfsf2(reinterpret(Float16, 0x0000)) === 0.0f0 - @test Runtime.__extendhfsf2(reinterpret(Float16, 0x8000)) === -0.0f0 - @test Runtime.__extendhfsf2(reinterpret(Float16, 0x4248)) ≈ Float32(π) - @test Runtime.__extendhfsf2(reinterpret(Float16, 0xc248)) ≈ Float32(-π) - # @test Runtime.__extendhfsf2(reinterpret(Float16, 0x7c00)) === Float32(0x1.987124876876324p+100) - @test Runtime.__extendhfsf2(reinterpret(Float16, 0x6e62)) === Float32(0x1.988p+12) - @test Runtime.__extendhfsf2(reinterpret(Float16, 0x3c00)) === Float32(0x1.0p+0) - @test Runtime.__extendhfsf2(reinterpret(Float16, 0x0400)) === Float32(0x1.0p-14) + test_extendhfdf2(0x0000, 0.0) + test_extendhfdf2(0x8000, -0.0) + test_extendhfdf2(0x4248, π) + test_extendhfdf2(0xc248, -π) + test_extendhfdf2(0x7c00, 0x1.987124876876324p+100) + test_extendhfdf2(0x6e62, 0x1.988p+12) + test_extendhfdf2(0x3c00, 0x1.0p+0) + test_extendhfdf2(0x0400, 0x1.0p-14) # denormal - @test Runtime.__extendhfsf2(reinterpret(Float16, 0x0010)) === Float32(0x1.0p-20) - @test Runtime.__extendhfsf2(reinterpret(Float16, 0x0001)) === Float32(0x1.0p-24) - @test Runtime.__extendhfsf2(reinterpret(Float16, 0x8001)) === Float32(-0x1.0p-24) - @test_broken Runtime.__extendhfsf2(reinterpret(Float16, 0x0001)) === Float32(0x1.5p-25) + test_extendhfdf2(0x0010, 0x1.0p-20) + test_extendhfdf2(0x0001, 0x1.0p-24) + test_extendhfdf2(0x8001, -0x1.0p-24) + test_extendhfdf2(0x0001, 0x1.5p-25) # and back to zero - # @test Runtime.__extendhfsf2(reinterpret(Float16, 0x0000)) === Float32(0x1.0p-25) - # @test Runtime.__extendhfsf2(reinterpret(Float16, 0x8000)) === Float32(-0x1.0p-25) + test_extendhfdf2(0x0000, 0x1.0p-25) + test_extendhfdf2(0x8000, -0x1.0p-25) # max (precise) - @test Runtime.__extendhfsf2(reinterpret(Float16, 0x7bff)) === 65504.0f0 + test_extendhfdf2(0x7bff, 65504.0) # max (rounded) - @test Runtime.__extendhfsf2(reinterpret(Float16, 0x7bff)) === 65504.0f0 + test_extendhfdf2(0x7bff, 65504.0) end