Skip to content

Commit

Permalink
faster hypot(::IEEEFloat...) (re-do) (#48645)
Browse files Browse the repository at this point in the history
Co-authored-by: mikmoore <mikmoore@users.noreply.github.com>
Co-authored-by: Dilum Aluthge <dilum@aluthge.com>
Co-authored-by: N5N3 <2642243996@qq.com>
  • Loading branch information
4 people authored Feb 21, 2023
1 parent aacfcf0 commit 1083815
Showing 1 changed file with 48 additions and 0 deletions.
48 changes: 48 additions & 0 deletions base/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,20 @@ function _hypot(x::NTuple{N,<:Number}) where {N}
end
end

function _hypot(x::NTuple{N,<:IEEEFloat}) where {N}
T = eltype(x)
infT = convert(T, Inf)
x = abs.(x) # doesn't change result but enables computational shortcuts
# note: any() was causing this to not inline for N=3 but mapreduce() was not
mapreduce(==(infT), |, x) && return infT # return Inf even if an argument is NaN
maxabs = reinterpret(T, maximum(z -> reinterpret(Signed, z), x)) # for abs(::IEEEFloat) values, a ::BitInteger cast does not change the result
maxabs > zero(T) || return maxabs # catch NaN before the @fastmath below, but also shortcut 0 since we can (remove if no more @fastmath below)
scale,invscale = scaleinv(maxabs)
# @fastmath(+) to allow reassociation (see #48129)
add_fast(x, y) = Core.Intrinsics.add_float_fast(x, y) # @fastmath is not available during bootstrap
return scale * sqrt(mapreduce(y -> abs2(y * invscale), add_fast, x))
end

atan(y::Real, x::Real) = atan(promote(float(y),float(x))...)
atan(y::T, x::T) where {T<:AbstractFloat} = Base.no_op_err("atan", T)

Expand Down Expand Up @@ -1070,6 +1084,40 @@ function frexp(x::T) where T<:IEEEFloat
return reinterpret(T, xu), k
end

"""
$(@__MODULE__).scaleinv(x)
Compute `(scale, invscale)` where `scale` and `invscale` are non-subnormal
(https://en.wikipedia.org/wiki/Subnormal_number) finite powers of two such that
`scale * invscale == 1` and `scale` is roughly on the order of `abs(x)`.
Inf, NaN, and zero inputs also result in finite nonzero outputs.
These values are useful to scale computations to prevent overflow and underflow
without round-off errors or division.
UNSTABLE DETAIL: For `x isa IEEEFLoat`, `scale` is chosen to be the
`prevpow(2,abs(x))` when possible, but is never less than floatmin(x) or greater
than inv(floatmin(x)). `Inf` and `NaN` resolve to `inv(floatmin(x))`. This
behavior is subject to change.
# Examples
```jldoctest
julia> $(@__MODULE__).scaleinv(7.5)
(4.0, 0.25)
```
"""
function scaleinv(x::T) where T<:IEEEFloat
# by removing the sign and significand and restricting values to a limited range,
# we can invert a number using bit-twiddling instead of division
U = uinttype(T)
umin = reinterpret(U, floatmin(T))
umax = reinterpret(U, inv(floatmin(T)))
emask = exponent_mask(T) # used to strip sign and significand
u = clamp(reinterpret(U, x) & emask, umin, umax)
scale = reinterpret(T, u)
invscale = reinterpret(T, umin + umax - u) # inv(scale)
return scale, invscale
end

# NOTE: This `rem` method is adapted from the msun `remainder` and `remainderf`
# functions, which are under the following license:
#
Expand Down

0 comments on commit 1083815

Please sign in to comment.