Only pass Float16 as Int16 in the runtime.

This fixes a corruption where the wrong register was used.
JuliaLang · Sep 11, 2020 · f001564 · f001564
1 parent 2984b75
commit f001564
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 34 deletions.
diff --git a/base/runtime/runtime.jl b/base/runtime/runtime.jl
@@ -32,7 +32,7 @@ isapple() = (KERNEL === :Apple || KERNEL === :Darwin)
 # note that we can't actually use Float16 in these implementations, as LLVM will happily
 # lower, e.g., `reinterpret(Float16, ::UInt16)` / `bitcast i16 to half` to `truncsfhf2`
 # because it wants to store the `half` in a single-precision register. this causes recursion
-# when compiling these intrinsics. LLVM's compiler-rt similarly returns plain integers.
+# when compiling these intrinsics. LLVM's compiler-rt similarly returns i16 for Float16.
 
 # Float32 -> Float16 algorithm from:
 #   "Fast Half Float Conversion" by Jeroen van der Zijp
@@ -76,7 +76,8 @@ let _basetable = Vector{UInt16}(undef, 512),
 end
 
 # truncation
-function truncsfhf2(f::UInt32)
+function truncsfhf2(val::Float32)
+    f = reinterpret(UInt32, val)
     if f&0x7fffffff > 0x7f800000  # isnan without reinterpreting as Float32
         t = 0x8000 ⊻ (0x8000 & ((f >> 0x10) % UInt16))
         return t ⊻ ((f >> 0xd) % UInt16)
@@ -101,11 +102,11 @@ function truncsfhf2(f::UInt32)
     end
     h
 end
-truncdfhf2(x::UInt64) = truncsfhf2(reinterpret(UInt32, Float32(reinterpret(Float64, x))))
+truncdfhf2(x::Float64) = truncsfhf2(Float32(x))
 if !isapple()
-    @ccallable UInt16 __truncsfhf2(val::UInt32) = truncsfhf2(val)
-    @ccallable UInt16 __gnu_f2h_ieee(val::UInt32) = truncsfhf2(val)
-    @ccallable UInt16 __truncdfhf2(val::UInt64) = truncdfhf2(val)
+    @ccallable UInt16 __truncsfhf2(val::Float32) = truncsfhf2(val)
+    @ccallable UInt16 __gnu_f2h_ieee(val::Float32) = truncsfhf2(val)
+    @ccallable UInt16 __truncdfhf2(val::Float64) = truncdfhf2(val)
 end
 
 # extension
@@ -148,13 +149,13 @@ function extendhfsf2(val::UInt16)
         sig  = sig << (23 - 10)
         ret = sign | exp | sig
     end
-    ret
+    reinterpret(Float32, ret)
 end
-extendhfdf2(x::UInt16) = reinterpret(UInt64, Float64(reinterpret(Float32, extendhfsf2(x))))
+extendhfdf2(x::UInt16) = Float64(extendhfsf2(x))
 if !isapple()
-    @ccallable UInt32 __extendhfsf2(val::UInt16) = extendhfsf2(val)
-    @ccallable UInt32 __gnu_h2f_ieee(val::UInt16) = extendhfsf2(val)
+    @ccallable Float32 __extendhfsf2(val::UInt16) = extendhfsf2(val)
+    @ccallable Float32 __gnu_h2f_ieee(val::UInt16) = extendhfsf2(val)
 end
-@ccallable UInt64 __extendhfdf2(val::UInt16) = extendhfdf2(val)
+@ccallable Float32 __extendhfdf2(val::UInt16) = extendhfdf2(val)
 
 end
diff --git a/test/runtime.jl b/test/runtime.jl
@@ -4,7 +4,7 @@ using Base: Runtime
 
 @testset "truncdfhf2" begin
     test_truncdfhf2(a, expected) =
-        @test Runtime.truncdfhf2(reinterpret(UInt64, Float64(a))) === reinterpret(UInt16, expected)
+        @test Runtime.truncdfhf2(Float64(a)) === reinterpret(UInt16, expected)
     # NaN
     test_truncdfhf2(NaN, NaN16)
     # inf
@@ -39,7 +39,7 @@ end
 
 @testset "truncsfhf2" begin
     test_truncsfhf2(a, expected) =
-        @test Runtime.truncsfhf2(reinterpret(UInt32, Float32(a))) === reinterpret(UInt16, expected)
+        @test Runtime.truncsfhf2(Float32(a)) === reinterpret(UInt16, expected)
     # NaN
     test_truncsfhf2(NaN32, NaN16)
     # inf
@@ -73,9 +73,9 @@ end
 end
 
 @testset "extendhfsf2" begin
-    function test_extendhfsf2(a::UInt16, expected::Float32)
+    function test_extendhfsf2(a::UInt16, expected)
         b = Runtime.extendhfsf2(a)
-        b16 = Float16(reinterpret(Float32, b))
+        b16 = Float16(b)
         expected16 = Float16(expected)
         @test reinterpret(UInt16, b16) == reinterpret(UInt16, expected16)
     end
@@ -87,34 +87,31 @@ end
     # zero
     test_extendhfsf2(0x0000, 0.0f0)
     test_extendhfsf2(0x8000, -0.0f0)
-    test_extendhfsf2(0x4248, Float32(π))
-    test_extendhfsf2(0xc248, Float32(-π))
-    test_extendhfsf2(0x7c00, Float32(0x1.987124876876324p+100))
-    test_extendhfsf2(0x6e62, Float32(0x1.988p+12))
-    test_extendhfsf2(0x3c00, Float32(0x1.0p+0))
-    test_extendhfsf2(0x0400, Float32(0x1.0p-14))
+    test_extendhfsf2(0x4248, π)
+    test_extendhfsf2(0xc248, -π)
+    test_extendhfsf2(0x7c00, 0x1.987124876876324p+100)
+    test_extendhfsf2(0x6e62, 0x1.988p+12)
+    test_extendhfsf2(0x3c00, 0x1.0p+0)
+    test_extendhfsf2(0x0400, 0x1.0p-14)
     # denormal
-    test_extendhfsf2(0x0010, Float32(0x1.0p-20))
-    test_extendhfsf2(0x0001, Float32(0x1.0p-24))
-    test_extendhfsf2(0x8001, Float32(-0x1.0p-24))
-    test_extendhfsf2(0x0001, Float32(0x1.5p-25))
+    test_extendhfsf2(0x0010, 0x1.0p-20)
+    test_extendhfsf2(0x0001, 0x1.0p-24)
+    test_extendhfsf2(0x8001, -0x1.0p-24)
+    test_extendhfsf2(0x0001, 0x1.5p-25)
     # and back to zero
-    test_extendhfsf2(0x0000, Float32(0x1.0p-25))
-    test_extendhfsf2(0x8000, Float32(-0x1.0p-25))
+    test_extendhfsf2(0x0000, 0x1.0p-25)
+    test_extendhfsf2(0x8000, -0x1.0p-25)
     # max (precise)
     test_extendhfsf2(0x7bff, 65504.0f0)
     # max (rounded)
     test_extendhfsf2(0x7bff, 65504.0f0)
-
-    # BROKEN: once this works, remove the calls to Float32 in these tests
-    @test Float16(Float32(0x1.0p-14)) == Float16(0x1.0p-14)
 end
 
 @testset "extendhfdf2" begin
-    function test_extendhfdf2(a::UInt16, expected::Float64)
+    function test_extendhfdf2(a::UInt16, expected)
         b = Runtime.extendhfdf2(a)
         b16 = Float16(reinterpret(Float64, b))
-        expected16 = Float16(Float32(expected)) # see BROKEN above
+        expected16 = Float16(expected)
         @test reinterpret(UInt16, b16) == reinterpret(UInt16, expected16)
     end
     # NaN
@@ -125,8 +122,8 @@ end
     # zero
     test_extendhfdf2(0x0000, 0.0)
     test_extendhfdf2(0x8000, -0.0)
-    test_extendhfdf2(0x4248, Float64(π))
-    test_extendhfdf2(0xc248, Float64(-π))
+    test_extendhfdf2(0x4248, π)
+    test_extendhfdf2(0xc248, -π)
     test_extendhfdf2(0x7c00, 0x1.987124876876324p+100)
     test_extendhfdf2(0x6e62, 0x1.988p+12)
     test_extendhfdf2(0x3c00, 0x1.0p+0)