Skip to content

Commit

Permalink
Implement Float16 conversions using integer arithmetic.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Sep 11, 2020
1 parent a96fa08 commit 3cf2f31
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 81 deletions.
35 changes: 21 additions & 14 deletions base/runtime/runtime.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ isapple() = (KERNEL === :Apple || KERNEL === :Darwin)

## Float16 intrinsics

# note that we can't actually use Float16 in these implementations, as LLVM will happily
# lower, e.g., `reinterpret(Float16, ::UInt16)` / `bitcast i16 to half` to `truncsfhf2`
# because it wants to store the `half` in a single-precision register. this causes recursion
# when compiling these intrinsics. LLVM's compiler-rt similarly returns i16 for Float16.

# Float32 -> Float16 algorithm from:
# "Fast Half Float Conversion" by Jeroen van der Zijp
# ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
Expand Down Expand Up @@ -73,9 +78,9 @@ end
# truncation
function truncsfhf2(val::Float32)
f = reinterpret(UInt32, val)
if isnan(val)
if f&0x7fffffff > 0x7f800000 # isnan without reinterpreting as Float32
t = 0x8000 (0x8000 & ((f >> 0x10) % UInt16))
return reinterpret(Float16, t ((f >> 0xd) % UInt16))
return t ((f >> 0xd) % UInt16)
end
i = ((f & ~Base.significand_mask(Float32)) >> Base.significand_bits(Float32)) + 1
@inbounds sh = shifttable[i]
Expand All @@ -95,17 +100,18 @@ function truncsfhf2(val::Float32)
h += UInt16(1)
end
end
reinterpret(Float16, h)
h
end
if !Sys.isapple()
@ccallable Float16 __truncsfhf2(val::Float32) = truncsfhf2(val)
@ccallable Float16 __gnu_f2h_ieee(val::Float32) = truncsfhf2(val)
@ccallable Float16 __truncdfhf2(x::Float64) = truncsfhf2(Float32(x))
truncdfhf2(x::Float64) = truncsfhf2(Float32(x))
if !isapple()
@ccallable UInt16 __truncsfhf2(val::Float32) = truncsfhf2(val)
@ccallable UInt16 __gnu_f2h_ieee(val::Float32) = truncsfhf2(val)
@ccallable UInt16 __truncdfhf2(val::Float64) = truncdfhf2(val)
end

# extension
function extendhfsf2(val::Float16)
local ival::UInt32 = reinterpret(UInt16, val)
function extendhfsf2(val::UInt16)
local ival::UInt32 = val
local sign::UInt32 = (ival & 0x8000) >> 15
local exp::UInt32 = (ival & 0x7c00) >> 10
local sig::UInt32 = (ival & 0x3ff) >> 0
Expand Down Expand Up @@ -143,12 +149,13 @@ function extendhfsf2(val::Float16)
sig = sig << (23 - 10)
ret = sign | exp | sig
end
return reinterpret(Float32, ret)
reinterpret(Float32, ret)
end
if !Sys.isapple()
@ccallable Float32 __extendhfsf2(val::Float16) = extendhfsf2(val)
@ccallable Float32 __gnu_h2f_ieee(val::Float16) = extendhfsf2(val)
extendhfdf2(x::UInt16) = Float64(extendhfsf2(x))
if !isapple()
@ccallable Float32 __extendhfsf2(val::UInt16) = extendhfsf2(val)
@ccallable Float32 __gnu_h2f_ieee(val::UInt16) = extendhfsf2(val)
end
@ccallable Float64 __extendhfdf2(x::Float16) = Float64(extendhfdf2(x))
@ccallable Float32 __extendhfdf2(val::UInt16) = extendhfdf2(val)

end
177 changes: 110 additions & 67 deletions test/runtime.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,98 +3,141 @@
using Base: Runtime

@testset "truncdfhf2" begin
@test Runtime.__truncdfhf2(NaN) === NaN16
@test Runtime.__truncdfhf2(Inf) === Inf16
@test Runtime.__truncdfhf2(-Inf) === -Inf16
@test Runtime.__truncdfhf2(0.0) === reinterpret(Float16, 0x0000)
@test Runtime.__truncdfhf2(-0.0) === reinterpret(Float16, 0x8000)
@test Runtime.__truncdfhf2(3.1415926535) === reinterpret(Float16, 0x4248)
@test Runtime.__truncdfhf2(-3.1415926535) === reinterpret(Float16, 0xc248)
@test Runtime.__truncdfhf2(0x1.987124876876324p+1000) === reinterpret(Float16, 0x7c00)
@test Runtime.__truncdfhf2(0x1.987124876876324p+12) === reinterpret(Float16, 0x6e62)
@test Runtime.__truncdfhf2(0x1.0p+0) === reinterpret(Float16, 0x3c00)
@test Runtime.__truncdfhf2(0x1.0p-14) === reinterpret(Float16, 0x0400)
test_truncdfhf2(a, expected) =
@test Runtime.truncdfhf2(Float64(a)) === reinterpret(UInt16, expected)
# NaN
test_truncdfhf2(NaN, NaN16)
# inf
test_truncdfhf2(Inf, Inf16)
test_truncdfhf2(-Inf, -Inf16)
# zero
test_truncdfhf2(0.0, 0x0000)
test_truncdfhf2(-0.0, 0x8000)
test_truncdfhf2(3.1415926535, 0x4248)
test_truncdfhf2(-3.1415926535, 0xc248)
test_truncdfhf2(0x1.987124876876324p+1000, 0x7c00)
test_truncdfhf2(0x1.987124876876324p+12, 0x6e62)
test_truncdfhf2(0x1.0p+0, 0x3c00)
test_truncdfhf2(0x1.0p-14, 0x0400)
# denormal
@test Runtime.__truncdfhf2(0x1.0p-20) === reinterpret(Float16, 0x0010)
@test Runtime.__truncdfhf2(0x1.0p-24) === reinterpret(Float16, 0x0001)
@test Runtime.__truncdfhf2(-0x1.0p-24) === reinterpret(Float16, 0x8001)
@test Runtime.__truncdfhf2(0x1.5p-25) === reinterpret(Float16, 0x0001)
test_truncdfhf2(0x1.0p-20, 0x0010)
test_truncdfhf2(0x1.0p-24, 0x0001)
test_truncdfhf2(-0x1.0p-24, 0x8001)
test_truncdfhf2(0x1.5p-25, 0x0001)
# and back to zero
@test Runtime.__truncdfhf2(0x1.0p-25) === reinterpret(Float16, 0x0000)
@test Runtime.__truncdfhf2(-0x1.0p-25) === reinterpret(Float16, 0x8000)
test_truncdfhf2(0x1.0p-25, 0x0000)
test_truncdfhf2(-0x1.0p-25, 0x8000)
# max (precise)
@test Runtime.__truncdfhf2(65504.0) === reinterpret(Float16, 0x7bff)
test_truncdfhf2(65504.0, 0x7bff)
# max (rounded)
@test Runtime.__truncdfhf2(65519.0) === reinterpret(Float16, 0x7bff)
test_truncdfhf2(65519.0, 0x7bff)
# max (to +inf)
@test Runtime.__truncdfhf2(65520.0) === reinterpret(Float16, 0x7c00)
@test Runtime.__truncdfhf2(-65520.0) === reinterpret(Float16, 0xfc00)
@test Runtime.__truncdfhf2(65536.0) === reinterpret(Float16, 0x7c00)
test_truncdfhf2(65520.0, 0x7c00)
test_truncdfhf2(-65520.0, 0xfc00)
test_truncdfhf2(65536.0, 0x7c00)
end

@testset "truncsfhf2" begin
test_truncsfhf2(a, expected) =
@test Runtime.truncsfhf2(Float32(a)) === reinterpret(UInt16, expected)
# NaN
@test Runtime.__truncsfhf2(NaN32) === reinterpret(Float16, 0x7e00)
test_truncsfhf2(NaN32, NaN16)
# inf
@test Runtime.__truncsfhf2(Inf32) === reinterpret(Float16, 0x7c00)
@test Runtime.__truncsfhf2(-Inf32) === reinterpret(Float16, 0xfc00)
test_truncsfhf2(Inf32, Inf16)
test_truncsfhf2(-Inf32, -Inf16)
# zero
@test Runtime.__truncsfhf2(0.0f0) === reinterpret(Float16, 0x0000)
@test Runtime.__truncsfhf2(-0.0f0) === reinterpret(Float16, 0x8000)
@test Runtime.__truncsfhf2(3.1415926535f0) === reinterpret(Float16, 0x4248)
@test Runtime.__truncsfhf2(-3.1415926535f0) === reinterpret(Float16, 0xc248)
@test Runtime.__truncsfhf2(Float32(0x1.987124876876324p+100)) === reinterpret(Float16, 0x7c00)
@test Runtime.__truncsfhf2(Float32(0x1.987124876876324p+12)) === reinterpret(Float16, 0x6e62)
@test Runtime.__truncsfhf2(Float32(0x1.0p+0)) === reinterpret(Float16, 0x3c00)
@test Runtime.__truncsfhf2(Float32(0x1.0p-14)) === reinterpret(Float16, 0x0400)
test_truncsfhf2(0.0f0, 0x0000)
test_truncsfhf2(-0.0f0, 0x8000)
test_truncsfhf2(3.1415926535f0, 0x4248)
test_truncsfhf2(-3.1415926535f0, 0xc248)
test_truncsfhf2(0x1.987124876876324p+100, 0x7c00)
test_truncsfhf2(0x1.987124876876324p+12, 0x6e62)
test_truncsfhf2(0x1.0p+0, 0x3c00)
test_truncsfhf2(0x1.0p-14, 0x0400)
# denormal
@test Runtime.__truncsfhf2(Float32(0x1.0p-20)) === reinterpret(Float16, 0x0010)
@test Runtime.__truncsfhf2(Float32(0x1.0p-24)) === reinterpret(Float16, 0x0001)
@test Runtime.__truncsfhf2(Float32(-0x1.0p-24)) === reinterpret(Float16, 0x8001)
@test Runtime.__truncsfhf2(Float32(0x1.5p-25)) === reinterpret(Float16, 0x0001)
test_truncsfhf2(0x1.0p-20, 0x0010)
test_truncsfhf2(0x1.0p-24, 0x0001)
test_truncsfhf2(-0x1.0p-24, 0x8001)
test_truncsfhf2(0x1.5p-25, 0x0001)
# and back to zero
@test Runtime.__truncsfhf2(Float32(0x1.0p-25)) === reinterpret(Float16, 0x0000)
@test Runtime.__truncsfhf2(Float32(-0x1.0p-25)) === reinterpret(Float16, 0x8000)
test_truncsfhf2(0x1.0p-25, 0x0000)
test_truncsfhf2(-0x1.0p-25, 0x8000)
# max (precise)
@test Runtime.__truncsfhf2(65504.0f0) === reinterpret(Float16, 0x7bff)
test_truncsfhf2(65504.0f0, 0x7bff)
# max (rounded)
@test Runtime.__truncsfhf2(65519.0f0) === reinterpret(Float16, 0x7bff)
test_truncsfhf2(65519.0f0, 0x7bff)
# max (to +inf)
@test Runtime.__truncsfhf2(65520.0f0) === reinterpret(Float16, 0x7c00)
@test Runtime.__truncsfhf2(65536.0f0) === reinterpret(Float16, 0x7c00)
@test Runtime.__truncsfhf2(-65520.0f0) === reinterpret(Float16, 0xfc00)
test_truncsfhf2(65520.0f0, 0x7c00)
test_truncsfhf2(65536.0f0, 0x7c00)
test_truncsfhf2(-65520.0f0, 0xfc00)
end

@testset "extendhfsf2" begin
# These tests are taken fromt the compiler-rt testsuite. Were as of 3.9.0
# the test are done with compareResultH (so with after casting to UInt16)
# Tests that are marked broken fail as === Float32 comparisons.
function test_extendhfsf2(a::UInt16, expected)
b = Runtime.extendhfsf2(a)
b16 = Float16(b)
expected16 = Float16(expected)
@test reinterpret(UInt16, b16) == reinterpret(UInt16, expected16)
end
# NaN
test_extendhfsf2(0x7e00, NaN32)
# inf
test_extendhfsf2(0x7c00, Inf32)
test_extendhfsf2(0xfc00, -Inf32)
# zero
test_extendhfsf2(0x0000, 0.0f0)
test_extendhfsf2(0x8000, -0.0f0)
test_extendhfsf2(0x4248, π)
test_extendhfsf2(0xc248, -π)
test_extendhfsf2(0x7c00, 0x1.987124876876324p+100)
test_extendhfsf2(0x6e62, 0x1.988p+12)
test_extendhfsf2(0x3c00, 0x1.0p+0)
test_extendhfsf2(0x0400, 0x1.0p-14)
# denormal
test_extendhfsf2(0x0010, 0x1.0p-20)
test_extendhfsf2(0x0001, 0x1.0p-24)
test_extendhfsf2(0x8001, -0x1.0p-24)
test_extendhfsf2(0x0001, 0x1.5p-25)
# and back to zero
test_extendhfsf2(0x0000, 0x1.0p-25)
test_extendhfsf2(0x8000, -0x1.0p-25)
# max (precise)
test_extendhfsf2(0x7bff, 65504.0f0)
# max (rounded)
test_extendhfsf2(0x7bff, 65504.0f0)
end

##
@testset "extendhfdf2" begin
function test_extendhfdf2(a::UInt16, expected)
b = Runtime.extendhfdf2(a)
b16 = Float16(reinterpret(Float64, b))
expected16 = Float16(expected)
@test reinterpret(UInt16, b16) == reinterpret(UInt16, expected16)
end
# NaN
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x7e00)) === NaN32
test_extendhfdf2(0x7e00, NaN64)
# inf
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x7c00)) === Inf32
@test Runtime.__extendhfsf2(reinterpret(Float16, 0xfc00)) === -Inf32
test_extendhfdf2(0x7c00, Inf64)
test_extendhfdf2(0xfc00, -Inf64)
# zero
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x0000)) === 0.0f0
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x8000)) === -0.0f0
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x4248)) Float32(π)
@test Runtime.__extendhfsf2(reinterpret(Float16, 0xc248)) Float32(-π)
# @test Runtime.__extendhfsf2(reinterpret(Float16, 0x7c00)) === Float32(0x1.987124876876324p+100)
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x6e62)) === Float32(0x1.988p+12)
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x3c00)) === Float32(0x1.0p+0)
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x0400)) === Float32(0x1.0p-14)
test_extendhfdf2(0x0000, 0.0)
test_extendhfdf2(0x8000, -0.0)
test_extendhfdf2(0x4248, π)
test_extendhfdf2(0xc248, -π)
test_extendhfdf2(0x7c00, 0x1.987124876876324p+100)
test_extendhfdf2(0x6e62, 0x1.988p+12)
test_extendhfdf2(0x3c00, 0x1.0p+0)
test_extendhfdf2(0x0400, 0x1.0p-14)
# denormal
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x0010)) === Float32(0x1.0p-20)
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x0001)) === Float32(0x1.0p-24)
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x8001)) === Float32(-0x1.0p-24)
@test_broken Runtime.__extendhfsf2(reinterpret(Float16, 0x0001)) === Float32(0x1.5p-25)
test_extendhfdf2(0x0010, 0x1.0p-20)
test_extendhfdf2(0x0001, 0x1.0p-24)
test_extendhfdf2(0x8001, -0x1.0p-24)
test_extendhfdf2(0x0001, 0x1.5p-25)
# and back to zero
# @test Runtime.__extendhfsf2(reinterpret(Float16, 0x0000)) === Float32(0x1.0p-25)
# @test Runtime.__extendhfsf2(reinterpret(Float16, 0x8000)) === Float32(-0x1.0p-25)
test_extendhfdf2(0x0000, 0x1.0p-25)
test_extendhfdf2(0x8000, -0x1.0p-25)
# max (precise)
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x7bff)) === 65504.0f0
test_extendhfdf2(0x7bff, 65504.0)
# max (rounded)
@test Runtime.__extendhfsf2(reinterpret(Float16, 0x7bff)) === 65504.0f0
test_extendhfdf2(0x7bff, 65504.0)
end

0 comments on commit 3cf2f31

Please sign in to comment.