Implement Float16 conversions using integer arithmetic.

JuliaLang · Sep 11, 2020 · 3cf2f31 · 3cf2f31
1 parent a96fa08
commit 3cf2f31
Show file tree

Hide file tree

Showing 2 changed files with 131 additions and 81 deletions.
diff --git a/base/runtime/runtime.jl b/base/runtime/runtime.jl
@@ -29,6 +29,11 @@ isapple() = (KERNEL === :Apple || KERNEL === :Darwin)
 
 ## Float16 intrinsics
 
+# note that we can't actually use Float16 in these implementations, as LLVM will happily
+# lower, e.g., `reinterpret(Float16, ::UInt16)` / `bitcast i16 to half` to `truncsfhf2`
+# because it wants to store the `half` in a single-precision register. this causes recursion
+# when compiling these intrinsics. LLVM's compiler-rt similarly returns i16 for Float16.
+
 # Float32 -> Float16 algorithm from:
 #   "Fast Half Float Conversion" by Jeroen van der Zijp
 #   ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
@@ -73,9 +78,9 @@ end
 # truncation
 function truncsfhf2(val::Float32)
     f = reinterpret(UInt32, val)
-    if isnan(val)
+    if f&0x7fffffff > 0x7f800000  # isnan without reinterpreting as Float32
         t = 0x8000 ⊻ (0x8000 & ((f >> 0x10) % UInt16))
-        return reinterpret(Float16, t ⊻ ((f >> 0xd) % UInt16))
+        return t ⊻ ((f >> 0xd) % UInt16)
     end
     i = ((f & ~Base.significand_mask(Float32)) >> Base.significand_bits(Float32)) + 1
     @inbounds sh = shifttable[i]
@@ -95,17 +100,18 @@ function truncsfhf2(val::Float32)
             h += UInt16(1)
         end
     end
-    reinterpret(Float16, h)
+    h
 end
-if !Sys.isapple()
-    @ccallable Float16 __truncsfhf2(val::Float32) = truncsfhf2(val)
-    @ccallable Float16 __gnu_f2h_ieee(val::Float32) = truncsfhf2(val)
-    @ccallable Float16 __truncdfhf2(x::Float64) = truncsfhf2(Float32(x))
+truncdfhf2(x::Float64) = truncsfhf2(Float32(x))
+if !isapple()
+    @ccallable UInt16 __truncsfhf2(val::Float32) = truncsfhf2(val)
+    @ccallable UInt16 __gnu_f2h_ieee(val::Float32) = truncsfhf2(val)
+    @ccallable UInt16 __truncdfhf2(val::Float64) = truncdfhf2(val)
 end
 
 # extension
-function extendhfsf2(val::Float16)
-    local ival::UInt32 = reinterpret(UInt16, val)
+function extendhfsf2(val::UInt16)
+    local ival::UInt32 = val
     local sign::UInt32 = (ival & 0x8000) >> 15
     local exp::UInt32  = (ival & 0x7c00) >> 10
     local sig::UInt32  = (ival & 0x3ff) >> 0
@@ -143,12 +149,13 @@ function extendhfsf2(val::Float16)
         sig  = sig << (23 - 10)
         ret = sign | exp | sig
     end
-    return reinterpret(Float32, ret)
+    reinterpret(Float32, ret)
 end
-if !Sys.isapple()
-    @ccallable Float32 __extendhfsf2(val::Float16) = extendhfsf2(val)
-    @ccallable Float32 __gnu_h2f_ieee(val::Float16) = extendhfsf2(val)
+extendhfdf2(x::UInt16) = Float64(extendhfsf2(x))
+if !isapple()
+    @ccallable Float32 __extendhfsf2(val::UInt16) = extendhfsf2(val)
+    @ccallable Float32 __gnu_h2f_ieee(val::UInt16) = extendhfsf2(val)
 end
-@ccallable Float64 __extendhfdf2(x::Float16) = Float64(extendhfdf2(x))
+@ccallable Float32 __extendhfdf2(val::UInt16) = extendhfdf2(val)
 
 end
diff --git a/test/runtime.jl b/test/runtime.jl
@@ -3,98 +3,141 @@
 using Base: Runtime
 
 @testset "truncdfhf2" begin
-    @test Runtime.__truncdfhf2(NaN) === NaN16
-    @test Runtime.__truncdfhf2(Inf) === Inf16
-    @test Runtime.__truncdfhf2(-Inf) === -Inf16
-    @test Runtime.__truncdfhf2(0.0) === reinterpret(Float16, 0x0000)
-    @test Runtime.__truncdfhf2(-0.0) === reinterpret(Float16, 0x8000)
-    @test Runtime.__truncdfhf2(3.1415926535) === reinterpret(Float16, 0x4248)
-    @test Runtime.__truncdfhf2(-3.1415926535) === reinterpret(Float16, 0xc248)
-    @test Runtime.__truncdfhf2(0x1.987124876876324p+1000) === reinterpret(Float16, 0x7c00)
-    @test Runtime.__truncdfhf2(0x1.987124876876324p+12) === reinterpret(Float16, 0x6e62)
-    @test Runtime.__truncdfhf2(0x1.0p+0) === reinterpret(Float16, 0x3c00)
-    @test Runtime.__truncdfhf2(0x1.0p-14) === reinterpret(Float16, 0x0400)
+    test_truncdfhf2(a, expected) =
+        @test Runtime.truncdfhf2(Float64(a)) === reinterpret(UInt16, expected)
+    # NaN
+    test_truncdfhf2(NaN, NaN16)
+    # inf
+    test_truncdfhf2(Inf, Inf16)
+    test_truncdfhf2(-Inf, -Inf16)
+    # zero
+    test_truncdfhf2(0.0, 0x0000)
+    test_truncdfhf2(-0.0, 0x8000)
+    test_truncdfhf2(3.1415926535, 0x4248)
+    test_truncdfhf2(-3.1415926535, 0xc248)
+    test_truncdfhf2(0x1.987124876876324p+1000, 0x7c00)
+    test_truncdfhf2(0x1.987124876876324p+12, 0x6e62)
+    test_truncdfhf2(0x1.0p+0, 0x3c00)
+    test_truncdfhf2(0x1.0p-14, 0x0400)
     # denormal
-    @test Runtime.__truncdfhf2(0x1.0p-20) === reinterpret(Float16, 0x0010)
-    @test Runtime.__truncdfhf2(0x1.0p-24) === reinterpret(Float16, 0x0001)
-    @test Runtime.__truncdfhf2(-0x1.0p-24) === reinterpret(Float16, 0x8001)
-    @test Runtime.__truncdfhf2(0x1.5p-25) === reinterpret(Float16, 0x0001)
+    test_truncdfhf2(0x1.0p-20, 0x0010)
+    test_truncdfhf2(0x1.0p-24, 0x0001)
+    test_truncdfhf2(-0x1.0p-24, 0x8001)
+    test_truncdfhf2(0x1.5p-25, 0x0001)
     # and back to zero
-    @test Runtime.__truncdfhf2(0x1.0p-25) === reinterpret(Float16, 0x0000)
-    @test Runtime.__truncdfhf2(-0x1.0p-25) === reinterpret(Float16, 0x8000)
+    test_truncdfhf2(0x1.0p-25, 0x0000)
+    test_truncdfhf2(-0x1.0p-25, 0x8000)
     # max (precise)
-    @test Runtime.__truncdfhf2(65504.0) === reinterpret(Float16, 0x7bff)
+    test_truncdfhf2(65504.0, 0x7bff)
     # max (rounded)
-    @test Runtime.__truncdfhf2(65519.0) === reinterpret(Float16, 0x7bff)
+    test_truncdfhf2(65519.0, 0x7bff)
     # max (to +inf)
-    @test Runtime.__truncdfhf2(65520.0) === reinterpret(Float16, 0x7c00)
-    @test Runtime.__truncdfhf2(-65520.0) === reinterpret(Float16, 0xfc00)
-    @test Runtime.__truncdfhf2(65536.0) === reinterpret(Float16, 0x7c00)
+    test_truncdfhf2(65520.0, 0x7c00)
+    test_truncdfhf2(-65520.0, 0xfc00)
+    test_truncdfhf2(65536.0, 0x7c00)
 end
 
 @testset "truncsfhf2" begin
+    test_truncsfhf2(a, expected) =
+        @test Runtime.truncsfhf2(Float32(a)) === reinterpret(UInt16, expected)
     # NaN
-    @test Runtime.__truncsfhf2(NaN32) === reinterpret(Float16, 0x7e00)
+    test_truncsfhf2(NaN32, NaN16)
     # inf
-    @test Runtime.__truncsfhf2(Inf32) === reinterpret(Float16, 0x7c00)
-    @test Runtime.__truncsfhf2(-Inf32) === reinterpret(Float16, 0xfc00)
+    test_truncsfhf2(Inf32, Inf16)
+    test_truncsfhf2(-Inf32, -Inf16)
     # zero
-    @test Runtime.__truncsfhf2(0.0f0) === reinterpret(Float16, 0x0000)
-    @test Runtime.__truncsfhf2(-0.0f0) === reinterpret(Float16, 0x8000)
-    @test Runtime.__truncsfhf2(3.1415926535f0) === reinterpret(Float16, 0x4248)
-    @test Runtime.__truncsfhf2(-3.1415926535f0) === reinterpret(Float16, 0xc248)
-    @test Runtime.__truncsfhf2(Float32(0x1.987124876876324p+100)) === reinterpret(Float16, 0x7c00)
-    @test Runtime.__truncsfhf2(Float32(0x1.987124876876324p+12)) === reinterpret(Float16, 0x6e62)
-    @test Runtime.__truncsfhf2(Float32(0x1.0p+0)) === reinterpret(Float16, 0x3c00)
-    @test Runtime.__truncsfhf2(Float32(0x1.0p-14)) === reinterpret(Float16, 0x0400)
+    test_truncsfhf2(0.0f0, 0x0000)
+    test_truncsfhf2(-0.0f0, 0x8000)
+    test_truncsfhf2(3.1415926535f0, 0x4248)
+    test_truncsfhf2(-3.1415926535f0, 0xc248)
+    test_truncsfhf2(0x1.987124876876324p+100, 0x7c00)
+    test_truncsfhf2(0x1.987124876876324p+12, 0x6e62)
+    test_truncsfhf2(0x1.0p+0, 0x3c00)
+    test_truncsfhf2(0x1.0p-14, 0x0400)
     # denormal
-    @test Runtime.__truncsfhf2(Float32(0x1.0p-20)) === reinterpret(Float16, 0x0010)
-    @test Runtime.__truncsfhf2(Float32(0x1.0p-24)) === reinterpret(Float16, 0x0001)
-    @test Runtime.__truncsfhf2(Float32(-0x1.0p-24)) === reinterpret(Float16, 0x8001)
-    @test Runtime.__truncsfhf2(Float32(0x1.5p-25)) === reinterpret(Float16, 0x0001)
+    test_truncsfhf2(0x1.0p-20, 0x0010)
+    test_truncsfhf2(0x1.0p-24, 0x0001)
+    test_truncsfhf2(-0x1.0p-24, 0x8001)
+    test_truncsfhf2(0x1.5p-25, 0x0001)
     # and back to zero
-    @test Runtime.__truncsfhf2(Float32(0x1.0p-25)) === reinterpret(Float16, 0x0000)
-    @test Runtime.__truncsfhf2(Float32(-0x1.0p-25)) === reinterpret(Float16, 0x8000)
+    test_truncsfhf2(0x1.0p-25, 0x0000)
+    test_truncsfhf2(-0x1.0p-25, 0x8000)
     # max (precise)
-    @test Runtime.__truncsfhf2(65504.0f0) === reinterpret(Float16, 0x7bff)
+    test_truncsfhf2(65504.0f0, 0x7bff)
     # max (rounded)
-    @test Runtime.__truncsfhf2(65519.0f0) === reinterpret(Float16, 0x7bff)
+    test_truncsfhf2(65519.0f0, 0x7bff)
     # max (to +inf)
-    @test Runtime.__truncsfhf2(65520.0f0) === reinterpret(Float16, 0x7c00)
-    @test Runtime.__truncsfhf2(65536.0f0) === reinterpret(Float16, 0x7c00)
-    @test Runtime.__truncsfhf2(-65520.0f0) === reinterpret(Float16, 0xfc00)
+    test_truncsfhf2(65520.0f0, 0x7c00)
+    test_truncsfhf2(65536.0f0, 0x7c00)
+    test_truncsfhf2(-65520.0f0, 0xfc00)
 end
 
 @testset "extendhfsf2" begin
-    # These tests are taken fromt the compiler-rt testsuite. Were as of 3.9.0
-    # the test are done with compareResultH (so with after casting to UInt16)
-    # Tests that are marked broken fail as === Float32 comparisons.
+    function test_extendhfsf2(a::UInt16, expected)
+        b = Runtime.extendhfsf2(a)
+        b16 = Float16(b)
+        expected16 = Float16(expected)
+        @test reinterpret(UInt16, b16) == reinterpret(UInt16, expected16)
+    end
+    # NaN
+    test_extendhfsf2(0x7e00, NaN32)
+    # inf
+    test_extendhfsf2(0x7c00, Inf32)
+    test_extendhfsf2(0xfc00, -Inf32)
+    # zero
+    test_extendhfsf2(0x0000, 0.0f0)
+    test_extendhfsf2(0x8000, -0.0f0)
+    test_extendhfsf2(0x4248, π)
+    test_extendhfsf2(0xc248, -π)
+    test_extendhfsf2(0x7c00, 0x1.987124876876324p+100)
+    test_extendhfsf2(0x6e62, 0x1.988p+12)
+    test_extendhfsf2(0x3c00, 0x1.0p+0)
+    test_extendhfsf2(0x0400, 0x1.0p-14)
+    # denormal
+    test_extendhfsf2(0x0010, 0x1.0p-20)
+    test_extendhfsf2(0x0001, 0x1.0p-24)
+    test_extendhfsf2(0x8001, -0x1.0p-24)
+    test_extendhfsf2(0x0001, 0x1.5p-25)
+    # and back to zero
+    test_extendhfsf2(0x0000, 0x1.0p-25)
+    test_extendhfsf2(0x8000, -0x1.0p-25)
+    # max (precise)
+    test_extendhfsf2(0x7bff, 65504.0f0)
+    # max (rounded)
+    test_extendhfsf2(0x7bff, 65504.0f0)
+end
 
-    ##
+@testset "extendhfdf2" begin
+    function test_extendhfdf2(a::UInt16, expected)
+        b = Runtime.extendhfdf2(a)
+        b16 = Float16(reinterpret(Float64, b))
+        expected16 = Float16(expected)
+        @test reinterpret(UInt16, b16) == reinterpret(UInt16, expected16)
+    end
     # NaN
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0x7e00)) === NaN32
+    test_extendhfdf2(0x7e00, NaN64)
     # inf
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0x7c00)) === Inf32
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0xfc00)) === -Inf32
+    test_extendhfdf2(0x7c00, Inf64)
+    test_extendhfdf2(0xfc00, -Inf64)
     # zero
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0x0000)) === 0.0f0
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0x8000)) === -0.0f0
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0x4248)) ≈ Float32(π)
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0xc248)) ≈ Float32(-π)
-    # @test Runtime.__extendhfsf2(reinterpret(Float16, 0x7c00)) === Float32(0x1.987124876876324p+100)
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0x6e62)) === Float32(0x1.988p+12)
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0x3c00)) === Float32(0x1.0p+0)
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0x0400)) === Float32(0x1.0p-14)
+    test_extendhfdf2(0x0000, 0.0)
+    test_extendhfdf2(0x8000, -0.0)
+    test_extendhfdf2(0x4248, π)
+    test_extendhfdf2(0xc248, -π)
+    test_extendhfdf2(0x7c00, 0x1.987124876876324p+100)
+    test_extendhfdf2(0x6e62, 0x1.988p+12)
+    test_extendhfdf2(0x3c00, 0x1.0p+0)
+    test_extendhfdf2(0x0400, 0x1.0p-14)
     # denormal
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0x0010)) === Float32(0x1.0p-20)
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0x0001)) === Float32(0x1.0p-24)
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0x8001)) === Float32(-0x1.0p-24)
-    @test_broken Runtime.__extendhfsf2(reinterpret(Float16, 0x0001)) === Float32(0x1.5p-25)
+    test_extendhfdf2(0x0010, 0x1.0p-20)
+    test_extendhfdf2(0x0001, 0x1.0p-24)
+    test_extendhfdf2(0x8001, -0x1.0p-24)
+    test_extendhfdf2(0x0001, 0x1.5p-25)
     # and back to zero
-    # @test Runtime.__extendhfsf2(reinterpret(Float16, 0x0000)) === Float32(0x1.0p-25)
-    # @test Runtime.__extendhfsf2(reinterpret(Float16, 0x8000)) === Float32(-0x1.0p-25)
+    test_extendhfdf2(0x0000, 0x1.0p-25)
+    test_extendhfdf2(0x8000, -0x1.0p-25)
     # max (precise)
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0x7bff)) === 65504.0f0
+    test_extendhfdf2(0x7bff, 65504.0)
     # max (rounded)
-    @test Runtime.__extendhfsf2(reinterpret(Float16, 0x7bff)) === 65504.0f0
+    test_extendhfdf2(0x7bff, 65504.0)
 end