From ef720bf71d422a28bf26d9bcbab6d61382ab47fc Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Thu, 28 Jan 2021 14:06:42 -0600 Subject: [PATCH 1/4] float16 cbrt, 50% faster --- base/special/cbrt.jl | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/base/special/cbrt.jl b/base/special/cbrt.jl index 23b518a87a9a7..6a3de31809d98 100644 --- a/base/special/cbrt.jl +++ b/base/special/cbrt.jl @@ -147,3 +147,20 @@ function cbrt(x::Union{Float32,Float64}) t = _approx_cbrt(x) return _improve_cbrt(x, t) end + +function Base.cbrt(a::Float16) + if !isfinite(a) || iszero(a) + return a + end + x=Float32(a) + + # 5 bit approximation. Simpler than _approx_cbrt since subnormals can not appear + u = highword(x) & 0x7fff_ffff + v = div(u, UInt32(3)) + 0x2a5119f2 + t = copysign(fromhighword(Float32, v), x) + + # 2 newton iterations + t = 0.33333334f0 * (2f0*t + x/(t*t)) + t = 0.33333334f0 * (2f0*t + x/(t*t)) + return Float16(t) +end From a8ad3121343cf99d8740f8781c894b379ea6de67 Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Thu, 28 Jan 2021 17:47:44 -0600 Subject: [PATCH 2/4] fix --- base/special/cbrt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/special/cbrt.jl b/base/special/cbrt.jl index 6a3de31809d98..53a351fb5475a 100644 --- a/base/special/cbrt.jl +++ b/base/special/cbrt.jl @@ -148,7 +148,7 @@ function cbrt(x::Union{Float32,Float64}) return _improve_cbrt(x, t) end -function Base.cbrt(a::Float16) +function cbrt(a::Float16) if !isfinite(a) || iszero(a) return a end From a5b7e739ec1f16008bac0117d67631fb99705a7e Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Sun, 31 Jan 2021 21:19:52 -0600 Subject: [PATCH 3/4] fix whitespace --- base/special/cbrt.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/special/cbrt.jl b/base/special/cbrt.jl index 53a351fb5475a..a811e0a81cc65 100644 --- a/base/special/cbrt.jl +++ b/base/special/cbrt.jl @@ -153,12 +153,12 @@ function cbrt(a::Float16) return a end x=Float32(a) - + # 5 bit approximation. Simpler than _approx_cbrt since subnormals can not appear u = highword(x) & 0x7fff_ffff v = div(u, UInt32(3)) + 0x2a5119f2 t = copysign(fromhighword(Float32, v), x) - + # 2 newton iterations t = 0.33333334f0 * (2f0*t + x/(t*t)) t = 0.33333334f0 * (2f0*t + x/(t*t)) From 430b64dec64481262d52d8f7aba5bde8c8659561 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 17 Feb 2021 08:05:26 +0100 Subject: [PATCH 4/4] Update base/special/cbrt.jl --- base/special/cbrt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/special/cbrt.jl b/base/special/cbrt.jl index a811e0a81cc65..1de088ee66383 100644 --- a/base/special/cbrt.jl +++ b/base/special/cbrt.jl @@ -152,7 +152,7 @@ function cbrt(a::Float16) if !isfinite(a) || iszero(a) return a end - x=Float32(a) + x = Float32(a) # 5 bit approximation. Simpler than _approx_cbrt since subnormals can not appear u = highword(x) & 0x7fff_ffff