diff --git a/base/atomics.jl b/base/atomics.jl index 1a980eb6561ec..97405d88fd408 100644 --- a/base/atomics.jl +++ b/base/atomics.jl @@ -335,7 +335,7 @@ const llvmtypes = IdDict{Any,String}( Int32 => "i32", UInt32 => "i32", Int64 => "i64", UInt64 => "i64", Int128 => "i128", UInt128 => "i128", - Float16 => "i16", # half + Float16 => "half", Float32 => "float", Float64 => "double", ) diff --git a/base/boot.jl b/base/boot.jl index 7c8d05cc132c4..54d852ca96416 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -779,11 +779,11 @@ Unsigned(x::Int64) = UInt64(x) Signed(x::UInt128) = Int128(x) Unsigned(x::Int128) = UInt128(x) -Signed(x::Union{Float32, Float64, Bool}) = Int(x) -Unsigned(x::Union{Float32, Float64, Bool}) = UInt(x) +Signed(x::Union{Float16, Float32, Float64, Bool}) = Int(x) +Unsigned(x::Union{Float16, Float32, Float64, Bool}) = UInt(x) Integer(x::Integer) = x -Integer(x::Union{Float32, Float64}) = Int(x) +Integer(x::Union{Float16, Float32, Float64}) = Int(x) # Binding for the julia parser, called as # diff --git a/base/essentials.jl b/base/essentials.jl index fb360ea6482db..1158fa7483d73 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -420,8 +420,6 @@ julia> reinterpret(Float32, UInt32[1 2 3 4 5]) ``` """ reinterpret(::Type{T}, x) where {T} = bitcast(T, x) -reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16,x) -reinterpret(::Type{Signed}, x::Float16) = reinterpret(Int16,x) """ sizeof(T::DataType) diff --git a/base/float.jl b/base/float.jl index 242fddfca8946..ec28e4f741f14 100644 --- a/base/float.jl +++ b/base/float.jl @@ -47,14 +47,48 @@ A not-a-number value of type [`Float64`](@ref). """ NaN, NaN64 +# bit patterns +reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64, x) +reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32, x) +reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16, x) +reinterpret(::Type{Signed}, x::Float64) = reinterpret(Int64, x) +reinterpret(::Type{Signed}, x::Float32) = reinterpret(Int32, x) +reinterpret(::Type{Signed}, x::Float16) = reinterpret(Int16, x) + +sign_mask(::Type{Float64}) = 0x8000_0000_0000_0000 +exponent_mask(::Type{Float64}) = 0x7ff0_0000_0000_0000 +exponent_one(::Type{Float64}) = 0x3ff0_0000_0000_0000 +exponent_half(::Type{Float64}) = 0x3fe0_0000_0000_0000 +significand_mask(::Type{Float64}) = 0x000f_ffff_ffff_ffff + +sign_mask(::Type{Float32}) = 0x8000_0000 +exponent_mask(::Type{Float32}) = 0x7f80_0000 +exponent_one(::Type{Float32}) = 0x3f80_0000 +exponent_half(::Type{Float32}) = 0x3f00_0000 +significand_mask(::Type{Float32}) = 0x007f_ffff + +sign_mask(::Type{Float16}) = 0x8000 +exponent_mask(::Type{Float16}) = 0x7c00 +exponent_one(::Type{Float16}) = 0x3c00 +exponent_half(::Type{Float16}) = 0x3800 +significand_mask(::Type{Float16}) = 0x03ff + +for T in (Float16, Float32, Float64) + @eval significand_bits(::Type{$T}) = $(trailing_ones(significand_mask(T))) + @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - significand_bits(T) - 1) + @eval exponent_bias(::Type{$T}) = $(Int(exponent_one(T) >> significand_bits(T))) + # maximum float exponent + @eval exponent_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T)) + # maximum float exponent without bias + @eval exponent_raw_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T))) +end + ## conversions to floating-point ## + +# TODO: deprecate in 2.0 Float16(x::Integer) = convert(Float16, convert(Float32, x)::Float32) -for t in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128) - @eval promote_rule(::Type{Float16}, ::Type{$t}) = Float16 -end -promote_rule(::Type{Float16}, ::Type{Bool}) = Float16 -for t1 in (Float32, Float64) +for t1 in (Float16, Float32, Float64) for st in (Int8, Int16, Int32, Int64) @eval begin (::Type{$t1})(x::($st)) = sitofp($t1, x) @@ -68,7 +102,6 @@ for t1 in (Float32, Float64) end end end -(::Type{T})(x::Float16) where {T<:Integer} = T(Float32(x)) Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x)) @@ -76,6 +109,8 @@ promote_rule(::Type{Float64}, ::Type{UInt128}) = Float64 promote_rule(::Type{Float64}, ::Type{Int128}) = Float64 promote_rule(::Type{Float32}, ::Type{UInt128}) = Float32 promote_rule(::Type{Float32}, ::Type{Int128}) = Float32 +promote_rule(::Type{Float16}, ::Type{UInt128}) = Float16 +promote_rule(::Type{Float16}, ::Type{Int128}) = Float16 function Float64(x::UInt128) x == 0 && return 0.0 @@ -137,123 +172,17 @@ function Float32(x::Int128) reinterpret(Float32, s | d + y) end -# Float32 -> Float16 algorithm from: -# "Fast Half Float Conversion" by Jeroen van der Zijp -# ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf -# -# With adjustments for round-to-nearest, ties to even. -# -let _basetable = Vector{UInt16}(undef, 512), - _shifttable = Vector{UInt8}(undef, 512) - for i = 0:255 - e = i - 127 - if e < -25 # Very small numbers map to zero - _basetable[i|0x000+1] = 0x0000 - _basetable[i|0x100+1] = 0x8000 - _shifttable[i|0x000+1] = 25 - _shifttable[i|0x100+1] = 25 - elseif e < -14 # Small numbers map to denorms - _basetable[i|0x000+1] = 0x0000 - _basetable[i|0x100+1] = 0x8000 - _shifttable[i|0x000+1] = -e-1 - _shifttable[i|0x100+1] = -e-1 - elseif e <= 15 # Normal numbers just lose precision - _basetable[i|0x000+1] = ((e+15)<<10) - _basetable[i|0x100+1] = ((e+15)<<10) | 0x8000 - _shifttable[i|0x000+1] = 13 - _shifttable[i|0x100+1] = 13 - elseif e < 128 # Large numbers map to Infinity - _basetable[i|0x000+1] = 0x7C00 - _basetable[i|0x100+1] = 0xFC00 - _shifttable[i|0x000+1] = 24 - _shifttable[i|0x100+1] = 24 - else # Infinity and NaN's stay Infinity and NaN's - _basetable[i|0x000+1] = 0x7C00 - _basetable[i|0x100+1] = 0xFC00 - _shifttable[i|0x000+1] = 13 - _shifttable[i|0x100+1] = 13 - end - end - global const shifttable = (_shifttable...,) - global const basetable = (_basetable...,) -end - -function Float16(val::Float32) - f = reinterpret(UInt32, val) - if isnan(val) - t = 0x8000 ⊻ (0x8000 & ((f >> 0x10) % UInt16)) - return reinterpret(Float16, t ⊻ ((f >> 0xd) % UInt16)) - end - i = ((f & ~significand_mask(Float32)) >> significand_bits(Float32)) + 1 - @inbounds sh = shifttable[i] - f &= significand_mask(Float32) - # If `val` is subnormal, the tables are set up to force the - # result to 0, so the significand has an implicit `1` in the - # cases we care about. - f |= significand_mask(Float32) + 0x1 - @inbounds h = (basetable[i] + (f >> sh) & significand_mask(Float16)) % UInt16 - # round - # NOTE: we maybe should ignore NaNs here, but the payload is - # getting truncated anyway so "rounding" it might not matter - nextbit = (f >> (sh-1)) & 1 - if nextbit != 0 && (h & 0x7C00) != 0x7C00 - # Round halfway to even or check lower bits - if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0 - h += UInt16(1) - end - end - reinterpret(Float16, h) -end - -function Float32(val::Float16) - local ival::UInt32 = reinterpret(UInt16, val) - local sign::UInt32 = (ival & 0x8000) >> 15 - local exp::UInt32 = (ival & 0x7c00) >> 10 - local sig::UInt32 = (ival & 0x3ff) >> 0 - local ret::UInt32 - - if exp == 0 - if sig == 0 - sign = sign << 31 - ret = sign | exp | sig - else - n_bit = 1 - bit = 0x0200 - while (bit & sig) == 0 - n_bit = n_bit + 1 - bit = bit >> 1 - end - sign = sign << 31 - exp = ((-14 - n_bit + 127) << 23) % UInt32 - sig = ((sig & (~bit)) << n_bit) << (23 - 10) - ret = sign | exp | sig - end - elseif exp == 0x1f - if sig == 0 # Inf - if sign == 0 - ret = 0x7f800000 - else - ret = 0xff800000 - end - else # NaN - ret = 0x7fc00000 | (sign<<31) | (sig<<(23-10)) - end - else - sign = sign << 31 - exp = ((exp - 15 + 127) << 23) % UInt32 - sig = sig << (23 - 10) - ret = sign | exp | sig - end - return reinterpret(Float32, ret) -end +# TODO: optimize +Float16(x::UInt128) = convert(Float16, Float32(x)) +Float16(x::Int128) = convert(Float16, Float32(x)) -#convert(::Type{Float16}, x::Float32) = fptrunc(Float16, x) +Float16(x::Float32) = fptrunc(Float16, x) +Float16(x::Float64) = fptrunc(Float16, x) Float32(x::Float64) = fptrunc(Float32, x) -Float16(x::Float64) = Float16(Float32(x)) -#convert(::Type{Float32}, x::Float16) = fpext(Float32, x) +Float32(x::Float16) = fpext(Float32, x) Float64(x::Float32) = fpext(Float64, x) -Float64(x::Float16) = Float64(Float32(x)) +Float64(x::Float16) = fpext(Float64, x) AbstractFloat(x::Bool) = Float64(x) AbstractFloat(x::Int8) = Float64(x) @@ -305,14 +234,14 @@ function unsafe_trunc end for Ti in (Int8, Int16, Int32, Int64) @eval begin - unsafe_trunc(::Type{$Ti}, x::Float16) = unsafe_trunc($Ti, Float32(x)) + unsafe_trunc(::Type{$Ti}, x::Float16) = fptosi($Ti, x) unsafe_trunc(::Type{$Ti}, x::Float32) = fptosi($Ti, x) unsafe_trunc(::Type{$Ti}, x::Float64) = fptosi($Ti, x) end end for Ti in (UInt8, UInt16, UInt32, UInt64) @eval begin - unsafe_trunc(::Type{$Ti}, x::Float16) = unsafe_trunc($Ti, Float32(x)) + unsafe_trunc(::Type{$Ti}, x::Float16) = fptoui($Ti, x) unsafe_trunc(::Type{$Ti}, x::Float32) = fptoui($Ti, x) unsafe_trunc(::Type{$Ti}, x::Float64) = fptoui($Ti, x) end @@ -351,35 +280,33 @@ unsafe_trunc(::Type{Int128}, x::Float16) = unsafe_trunc(Int128, Float32(x)) # matches convert methods # also determines floor, ceil, round +trunc(::Type{Signed}, x::Float16) = trunc(Int,x) trunc(::Type{Signed}, x::Float32) = trunc(Int,x) trunc(::Type{Signed}, x::Float64) = trunc(Int,x) +trunc(::Type{Unsigned}, x::Float16) = trunc(UInt,x) trunc(::Type{Unsigned}, x::Float32) = trunc(UInt,x) trunc(::Type{Unsigned}, x::Float64) = trunc(UInt,x) +trunc(::Type{Integer}, x::Float16) = trunc(Int,x) trunc(::Type{Integer}, x::Float32) = trunc(Int,x) trunc(::Type{Integer}, x::Float64) = trunc(Int,x) -trunc(::Type{T}, x::Float16) where {T<:Integer} = trunc(T, Float32(x)) # fallbacks floor(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundDown)) -floor(::Type{T}, x::Float16) where {T<:Integer} = floor(T, Float32(x)) ceil(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundUp)) -ceil(::Type{T}, x::Float16) where {T<:Integer} = ceil(T, Float32(x)) round(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundNearest)) -round(::Type{T}, x::Float16) where {T<:Integer} = round(T, Float32(x)) round(x::Float64, r::RoundingMode{:ToZero}) = trunc_llvm(x) round(x::Float32, r::RoundingMode{:ToZero}) = trunc_llvm(x) +round(x::Float16, r::RoundingMode{:ToZero}) = trunc_llvm(x) round(x::Float64, r::RoundingMode{:Down}) = floor_llvm(x) round(x::Float32, r::RoundingMode{:Down}) = floor_llvm(x) +round(x::Float16, r::RoundingMode{:Down}) = floor_llvm(x) round(x::Float64, r::RoundingMode{:Up}) = ceil_llvm(x) round(x::Float32, r::RoundingMode{:Up}) = ceil_llvm(x) +round(x::Float16, r::RoundingMode{:Up}) = ceil_llvm(x) round(x::Float64, r::RoundingMode{:Nearest}) = rint_llvm(x) round(x::Float32, r::RoundingMode{:Nearest}) = rint_llvm(x) - -round(x::Float16, r::RoundingMode{:ToZero}) = Float16(round(Float32(x), r)) -round(x::Float16, r::RoundingMode{:Down}) = Float16(round(Float32(x), r)) -round(x::Float16, r::RoundingMode{:Up}) = Float16(round(Float32(x), r)) -round(x::Float16, r::RoundingMode{:Nearest}) = Float16(round(Float32(x), r)) +round(x::Float16, r::RoundingMode{:Nearest}) = rint_llvm(x) ## floating point promotions ## promote_rule(::Type{Float32}, ::Type{Float16}) = Float32 @@ -392,36 +319,30 @@ widen(::Type{Float32}) = Float64 ## floating point arithmetic ## -(x::Float64) = neg_float(x) -(x::Float32) = neg_float(x) --(x::Float16) = reinterpret(Float16, reinterpret(UInt16, x) ⊻ 0x8000) +-(x::Float16) = neg_float(x) -for op in (:+, :-, :*, :/, :\, :^) - @eval ($op)(a::Float16, b::Float16) = Float16(($op)(Float32(a), Float32(b))) -end ++(x::Float16, y::Float16) = add_float(x, y) +(x::Float32, y::Float32) = add_float(x, y) +(x::Float64, y::Float64) = add_float(x, y) +-(x::Float16, y::Float16) = sub_float(x, y) -(x::Float32, y::Float32) = sub_float(x, y) -(x::Float64, y::Float64) = sub_float(x, y) +*(x::Float16, y::Float16) = mul_float(x, y) *(x::Float32, y::Float32) = mul_float(x, y) *(x::Float64, y::Float64) = mul_float(x, y) +/(x::Float16, y::Float16) = div_float(x, y) /(x::Float32, y::Float32) = div_float(x, y) /(x::Float64, y::Float64) = div_float(x, y) +muladd(x::Float16, y::Float16, z::Float16) = muladd_float(x, y, z) muladd(x::Float32, y::Float32, z::Float32) = muladd_float(x, y, z) muladd(x::Float64, y::Float64, z::Float64) = muladd_float(x, y, z) -function muladd(a::Float16, b::Float16, c::Float16) - Float16(muladd(Float32(a), Float32(b), Float32(c))) -end # TODO: faster floating point div? # TODO: faster floating point fld? # TODO: faster floating point mod? -for func in (:div,:fld,:cld,:rem,:mod) - @eval begin - $func(a::Float16,b::Float16) = Float16($func(Float32(a),Float32(b))) - end -end - +rem(x::Float16, y::Float16) = rem_float(x, y) rem(x::Float32, y::Float32) = rem_float(x, y) rem(x::Float64, y::Float64) = rem_float(x, y) @@ -439,33 +360,25 @@ function mod(x::T, y::T) where T<:AbstractFloat end ## floating point comparisons ## -function ==(x::Float16, y::Float16) - ix = reinterpret(UInt16,x) - iy = reinterpret(UInt16,y) - if (ix|iy)&0x7fff > 0x7c00 #isnan(x) || isnan(y) - return false - end - if (ix|iy)&0x7fff == 0x0000 - return true - end - return ix == iy -end +==(x::Float16, y::Float16) = eq_float(x, y) ==(x::Float32, y::Float32) = eq_float(x, y) ==(x::Float64, y::Float64) = eq_float(x, y) +!=(x::Float16, y::Float16) = ne_float(x, y) !=(x::Float32, y::Float32) = ne_float(x, y) !=(x::Float64, y::Float64) = ne_float(x, y) +<( x::Float16, y::Float16) = lt_float(x, y) <( x::Float32, y::Float32) = lt_float(x, y) <( x::Float64, y::Float64) = lt_float(x, y) +<=(x::Float16, y::Float16) = le_float(x, y) <=(x::Float32, y::Float32) = le_float(x, y) <=(x::Float64, y::Float64) = le_float(x, y) +isequal(x::Float16, y::Float16) = fpiseq(x, y) isequal(x::Float32, y::Float32) = fpiseq(x, y) isequal(x::Float64, y::Float64) = fpiseq(x, y) +isless( x::Float16, y::Float16) = fpislt(x, y) isless( x::Float32, y::Float32) = fpislt(x, y) isless( x::Float64, y::Float64) = fpislt(x, y) -for op in (:<, :<=, :isless) - @eval ($op)(a::Float16, b::Float16) = ($op)(Float32(a), Float32(b)) -end # Exact Float (Tf) vs Integer (Ti) comparisons # Assumes: @@ -481,7 +394,7 @@ end # b. unsafe_convert undefined behaviour if fy == Tf(typemax(Ti)) # (but consequently x == fy > y) for Ti in (Int64,UInt64,Int128,UInt128) - for Tf in (Float32,Float64) + for Tf in (Float16,Float32,Float64) @eval begin function ==(x::$Tf, y::$Ti) fy = ($Tf)(y) @@ -523,7 +436,7 @@ for op in (:(==), :<, :<=) end -abs(x::Float16) = reinterpret(Float16, reinterpret(UInt16, x) & 0x7fff) +abs(x::Float16) = abs_float(x) abs(x::Float32) = abs_float(x) abs(x::Float64) = abs_float(x) @@ -534,11 +447,9 @@ Test whether a number value is a NaN, an indeterminate value which is neither an nor a finite number ("not a number"). """ isnan(x::AbstractFloat) = (x != x)::Bool -isnan(x::Float16) = reinterpret(UInt16,x)&0x7fff > 0x7c00 isnan(x::Real) = false isfinite(x::AbstractFloat) = x - x == 0 -isfinite(x::Float16) = reinterpret(UInt16,x)&0x7c00 != 0x7c00 isfinite(x::Real) = decompose(x)[3] != 0 isfinite(x::Integer) = true @@ -655,7 +566,7 @@ such `y` exists (e.g. if `x` is `-Inf` or `NaN`), then return `x`. prevfloat(x::AbstractFloat) = nextfloat(x,-1) for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128) - for Tf in (Float32, Float64) + for Tf in (Float16, Float32, Float64) if Ti <: Unsigned || sizeof(Ti) < sizeof(Tf) # Here `Tf(typemin(Ti))-1` is exact, so we can compare the lower-bound # directly. `Tf(typemax(Ti))+1` is either always exactly representable, or @@ -853,47 +764,11 @@ eps(::AbstractFloat) ## byte order swaps for arbitrary-endianness serialization/deserialization ## bswap(x::IEEEFloat) = bswap_int(x) -# bit patterns -reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64, x) -reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32, x) -reinterpret(::Type{Signed}, x::Float64) = reinterpret(Int64, x) -reinterpret(::Type{Signed}, x::Float32) = reinterpret(Int32, x) - -sign_mask(::Type{Float64}) = 0x8000_0000_0000_0000 -exponent_mask(::Type{Float64}) = 0x7ff0_0000_0000_0000 -exponent_one(::Type{Float64}) = 0x3ff0_0000_0000_0000 -exponent_half(::Type{Float64}) = 0x3fe0_0000_0000_0000 -significand_mask(::Type{Float64}) = 0x000f_ffff_ffff_ffff - -sign_mask(::Type{Float32}) = 0x8000_0000 -exponent_mask(::Type{Float32}) = 0x7f80_0000 -exponent_one(::Type{Float32}) = 0x3f80_0000 -exponent_half(::Type{Float32}) = 0x3f00_0000 -significand_mask(::Type{Float32}) = 0x007f_ffff - -sign_mask(::Type{Float16}) = 0x8000 -exponent_mask(::Type{Float16}) = 0x7c00 -exponent_one(::Type{Float16}) = 0x3c00 -exponent_half(::Type{Float16}) = 0x3800 -significand_mask(::Type{Float16}) = 0x03ff - -for T in (Float16, Float32, Float64) - @eval significand_bits(::Type{$T}) = $(trailing_ones(significand_mask(T))) - @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - significand_bits(T) - 1) - @eval exponent_bias(::Type{$T}) = $(Int(exponent_one(T) >> significand_bits(T))) - # maximum float exponent - @eval exponent_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T)) - # maximum float exponent without bias - @eval exponent_raw_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T))) -end - # integer size of float uinttype(::Type{Float64}) = UInt64 uinttype(::Type{Float32}) = UInt32 uinttype(::Type{Float16}) = UInt16 -Base.iszero(x::Float16) = reinterpret(UInt16, x) & ~sign_mask(Float16) == 0x0000 - ## Array operations on floating point numbers ## float(A::AbstractArray{<:AbstractFloat}) = A diff --git a/base/math.jl b/base/math.jl index ab77d95b97df6..94d001c0f31dc 100644 --- a/base/math.jl +++ b/base/math.jl @@ -898,6 +898,8 @@ end end z end +@inline ^(x::Float16, y::Float16) = Float16(Float32(x)^Float32(y)) # TODO: optimize + @inline function ^(x::Float64, y::Integer) y == -1 && return inv(x) y == 0 && return one(x) diff --git a/src/APInt-C.cpp b/src/APInt-C.cpp index 0e0ffbfa73713..bc0a62e21dd3e 100644 --- a/src/APInt-C.cpp +++ b/src/APInt-C.cpp @@ -9,6 +9,7 @@ #include "APInt-C.h" #include "julia.h" #include "julia_assert.h" +#include "julia_internal.h" using namespace llvm; @@ -312,14 +313,16 @@ void LLVMByteSwap(unsigned numbits, integerPart *pa, integerPart *pr) { ASSIGN(r, a) } -void LLVMFPtoInt(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr, bool isSigned, bool *isExact) { +void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr, bool isSigned, bool *isExact) { double Val; - if (numbits == 32) + if (numbits == 16) + Val = __gnu_h2f_ieee(*(uint16_t*)pa); + else if (numbits == 32) Val = *(float*)pa; else if (numbits == 64) Val = *(double*)pa; else - jl_error("FPtoSI: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64"); + jl_error("FPtoSI: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit; if (onumbits <= 64) { // fast-path, if possible if (isSigned) { @@ -387,12 +390,14 @@ void LLVMSItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPar CREATE(a) val = a.roundToDouble(true); } - if (onumbits == 32) + if (onumbits == 16) + *(uint16_t*)pr = __gnu_f2h_ieee(val); + else if (onumbits == 32) *(float*)pr = val; else if (onumbits == 64) *(double*)pr = val; else - jl_error("SItoFP: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64"); + jl_error("SItoFP: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); } extern "C" JL_DLLEXPORT @@ -402,7 +407,9 @@ void LLVMUItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPar CREATE(a) val = a.roundToDouble(false); } - if (onumbits == 32) + if (onumbits == 16) + *(uint16_t*)pr = __gnu_f2h_ieee(val); + else if (onumbits == 32) *(float*)pr = val; else if (onumbits == 64) *(double*)pr = val; diff --git a/src/Makefile b/src/Makefile index 835c2bf60b55a..00d7d3fc0de8b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -56,7 +56,7 @@ RUNTIME_SRCS += jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering \ llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \ llvm-multiversioning llvm-alloc-opt cgmemmgr llvm-api llvm-remove-addrspaces \ - llvm-remove-ni llvm-julia-licm + llvm-remove-ni llvm-julia-licm llvm-demote-float16 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir) LLVM_LIBS := all ifeq ($(USE_POLLY),1) diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index b0539786315b5..17df8b6a8a67b 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -508,8 +508,10 @@ void jl_dump_native(void *native_code, if (unopt_bc_fname) PM.add(createBitcodeWriterPass(unopt_bc_OS)); - if (bc_fname || obj_fname || asm_fname) + if (bc_fname || obj_fname || asm_fname) { addOptimizationPasses(&PM, jl_options.opt_level, true, true); + addMachinePasses(&PM, TM.get()); + } if (bc_fname) PM.add(createBitcodeWriterPass(bc_OS)); if (obj_fname) @@ -604,6 +606,15 @@ void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine *TM) PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); } + +void addMachinePasses(legacy::PassManagerBase *PM, TargetMachine *TM) +{ + // TODO: don't do this on CPUs that natively support Float16 + PM->add(createDemoteFloat16Pass()); + PM->add(createGVNPass()); +} + + // this defines the set of optimization passes defined for Julia at various optimization levels. // it assumes that the TLI and TTI wrapper passes have already been added. void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, @@ -809,6 +820,7 @@ class JuliaPipeline : public Pass { TPMAdapter Adapter(TPM); addTargetPasses(&Adapter, jl_TargetMachine); addOptimizationPasses(&Adapter, OptLevel); + addMachinePasses(&Adapter, jl_TargetMachine); } JuliaPipeline() : Pass(PT_PassManager, ID) {} Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const override { @@ -846,6 +858,7 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper, PM = new legacy::PassManager(); addTargetPasses(PM, jl_TargetMachine); addOptimizationPasses(PM, jl_options.opt_level); + addMachinePasses(PM, jl_TargetMachine); } // get the source code for this function diff --git a/src/cgutils.cpp b/src/cgutils.cpp index b516a75257dee..e6bd8a0744115 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -516,7 +516,7 @@ static Type *bitstype_to_llvm(jl_value_t *bt, bool llvmcall = false) return T_int32; if (bt == (jl_value_t*)jl_int64_type) return T_int64; - if (llvmcall && (bt == (jl_value_t*)jl_float16_type)) + if (bt == (jl_value_t*)jl_float16_type) return T_float16; if (bt == (jl_value_t*)jl_float32_type) return T_float32; diff --git a/src/codegen.cpp b/src/codegen.cpp index b8dac42970d7e..013c0697b38f4 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -145,8 +145,6 @@ extern void _chkstk(void); #define __alignof__ __alignof #endif -#define DISABLE_FLOAT16 - // llvm state extern JITEventListener *CreateJuliaJITEventListener(); diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index ede8031dda5cf..ada6166c1ceb8 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -106,10 +106,8 @@ static Type *FLOATT(Type *t) return T_float64; if (nb == 32) return T_float32; -#ifndef DISABLE_FLOAT16 if (nb == 16) return T_float16; -#endif if (nb == 128) return T_float128; return NULL; @@ -1040,8 +1038,7 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg } -// Implements IEEE negate. See issue #7868 - case neg_float: return math_builder(ctx)().CreateFSub(ConstantFP::get(t, -0.0), x); + case neg_float: return math_builder(ctx)().CreateFNeg(x); case neg_float_fast: return math_builder(ctx, true)().CreateFNeg(x); case add_float: return math_builder(ctx)().CreateFAdd(x, y); case sub_float: return math_builder(ctx)().CreateFSub(x, y); @@ -1301,3 +1298,200 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg } assert(0 && "unreachable"); } + + +// float16 intrinsics +// TODO: use LLVM's compiler-rt + +static inline float half_to_float(uint16_t ival) +{ + uint32_t sign = (ival & 0x8000) >> 15; + uint32_t exp = (ival & 0x7c00) >> 10; + uint32_t sig = (ival & 0x3ff) >> 0; + uint32_t ret; + + if (exp == 0) { + if (sig == 0) { + sign = sign << 31; + ret = sign | exp | sig; + } + else { + int n_bit = 1; + uint16_t bit = 0x0200; + while ((bit & sig) == 0) { + n_bit = n_bit + 1; + bit = bit >> 1; + } + sign = sign << 31; + exp = ((-14 - n_bit + 127) << 23); + sig = ((sig & (~bit)) << n_bit) << (23 - 10); + ret = sign | exp | sig; + } + } + else if (exp == 0x1f) { + if (sig == 0) { // Inf + if (sign == 0) + ret = 0x7f800000; + else + ret = 0xff800000; + } + else // NaN + ret = 0x7fc00000 | (sign << 31) | (sig << (23 - 10)); + } + else { + sign = sign << 31; + exp = ((exp - 15 + 127) << 23); + sig = sig << (23 - 10); + ret = sign | exp | sig; + } + + float fret; + memcpy(&fret, &ret, sizeof(float)); + return fret; +} + +// float to half algorithm from: +// "Fast Half Float Conversion" by Jeroen van der Zijp +// ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf +// +// With adjustments for round-to-nearest, ties to even. + +static uint16_t basetable[512] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, + 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, + 0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, + 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, + 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, + 0xf000, 0xf400, 0xf800, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00}; + +static uint8_t shifttable[512] = { + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, + 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x0d, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, + 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, + 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d}; + +static inline uint16_t float_to_half(float param) +{ + uint32_t f; + memcpy(&f, ¶m, sizeof(float)); + if (isnan(param)) { + uint32_t t = 0x8000 ^ (0x8000 & ((uint16_t)(f >> 0x10))); + return t ^ ((uint16_t)(f >> 0xd)); + } + int i = ((f & ~0x007fffff) >> 23); + uint8_t sh = shifttable[i]; + f &= 0x007fffff; + // If `val` is subnormal, the tables are set up to force the + // result to 0, so the significand has an implicit `1` in the + // cases we care about. + f |= 0x007fffff + 0x1; + uint16_t h = (uint16_t)(basetable[i] + ((f >> sh) & 0x03ff)); + // round + // NOTE: we maybe should ignore NaNs here, but the payload is + // getting truncated anyway so "rounding" it might not matter + int nextbit = (f >> (sh - 1)) & 1; + if (nextbit != 0 && (h & 0x7C00) != 0x7C00) { + // Round halfway to even or check lower bits + if ((h & 1) == 1 || (f & ((1 << (sh - 1)) - 1)) != 0) + h += UINT16_C(1); + } + return h; +} + +#if !defined(_OS_DARWIN_) // xcode already links compiler-rt + +extern "C" JL_DLLEXPORT float __gnu_h2f_ieee(uint16_t param) +{ + return half_to_float(param); +} + +extern "C" JL_DLLEXPORT float __extendhfsf2(uint16_t param) +{ + return half_to_float(param); +} + +extern "C" JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param) +{ + return float_to_half(param); +} + +extern "C" JL_DLLEXPORT uint16_t __truncdfhf2(double param) +{ + return float_to_half((float)param); +} + +#endif diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 6658a26e92d52..3481db683a95c 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -523,6 +523,7 @@ static void addPassesForOptLevel(legacy::PassManager &PM, TargetMachine &TM, raw { addTargetPasses(&PM, &TM); addOptimizationPasses(&PM, optlevel); + addMachinePasses(&PM, &TM); if (TM.addPassesToEmitMC(PM, Ctx, ObjStream)) llvm_unreachable("Target does not support MC emission."); } diff --git a/src/jitlayers.h b/src/jitlayers.h index 8dd45c1f939f5..10f371f610cb3 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -24,6 +24,7 @@ extern bool imaging_mode; void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine *TM); void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, bool dump_native=false); +void addMachinePasses(legacy::PassManagerBase *PM, TargetMachine *TM); void jl_finalize_module(std::unique_ptr m); void jl_merge_module(Module *dest, std::unique_ptr src); Module *jl_create_llvm_module(StringRef name); @@ -241,6 +242,7 @@ Pass *createRemoveNIPass(); Pass *createJuliaLICMPass(); Pass *createMultiVersioningPass(); Pass *createAllocOptPass(); +Pass *createDemoteFloat16Pass(); // Whether the Function is an llvm or julia intrinsic. static inline bool isIntrinsicFunction(Function *F) { diff --git a/src/julia.expmap b/src/julia.expmap index aa77f4c8cff31..baf3220f147ed 100644 --- a/src/julia.expmap +++ b/src/julia.expmap @@ -42,6 +42,12 @@ environ; __progname; + /* compiler run-time intrinsics */ + __gnu_h2f_ieee; + __extendhfsf2; + __gnu_f2h_ieee; + __truncdfhf2; + local: *; }; diff --git a/src/julia_internal.h b/src/julia_internal.h index 410a3e9a15988..369781b79bed3 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1311,6 +1311,9 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT; #define JL_GC_ASSERT_LIVE(x) (void)(x) #endif +float __gnu_h2f_ieee(uint16_t param) JL_NOTSAFEPOINT; +uint16_t __gnu_f2h_ieee(float param) JL_NOTSAFEPOINT; + #ifdef __cplusplus } #endif diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp new file mode 100644 index 0000000000000..6ed06fb12508a --- /dev/null +++ b/src/llvm-demote-float16.cpp @@ -0,0 +1,146 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +// This pass finds floating-point operations on 16-bit (half precision) values, and replaces +// them by equivalent operations on 32-bit (single precision) values surrounded by a fpext +// and fptrunc. This ensures that the exact semantics of IEEE floating-point are preserved. +// +// Without this pass, back-ends that do not natively support half-precision (e.g. x86_64) +// similarly pattern-match half-precision operations with single-precision equivalents, but +// without truncating after every operation. Doing so breaks floating-point operations that +// assume precise semantics, such as Dekker arithmetic (as used in twiceprecision.jl). +// +// This pass is intended to run late in the pipeline, and should not be followed by +// instcombine. A run of GVN is recommended to clean-up identical conversions. + +#include "llvm-version.h" + +#define DEBUG_TYPE "demote_float16" + +#include "support/dtypes.h" + +#include +#include +#include +#include + +using namespace llvm; + +namespace { + +struct DemoteFloat16Pass : public FunctionPass { + static char ID; + DemoteFloat16Pass() : FunctionPass(ID){}; + +private: + bool runOnFunction(Function &F) override; +}; + +bool DemoteFloat16Pass::runOnFunction(Function &F) +{ + auto &ctx = F.getContext(); + auto T_float16 = Type::getHalfTy(ctx); + auto T_float32 = Type::getFloatTy(ctx); + + SmallVector erase; + for (auto &BB : F) { + for (auto &I : BB) { + switch (I.getOpcode()) { + case Instruction::FNeg: + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::FCmp: + break; + default: + continue; + } + + IRBuilder<> builder(&I); + + // extend Float16 operands to Float32 + bool OperandsChanged = false; + SmallVector Operands(I.getNumOperands()); + for (size_t i = 0; i < I.getNumOperands(); i++) { + Value *Op = I.getOperand(i); + if (Op->getType() == T_float16) { + Op = builder.CreateFPExt(Op, T_float32); + OperandsChanged = true; + } + Operands[i] = (Op); + } + + // recreate the instruction if any operands changed, + // truncating the result back to Float16 + if (OperandsChanged) { + Value *NewI; + switch (I.getOpcode()) { + case Instruction::FNeg: + assert(Operands.size() == 1); + NewI = builder.CreateFNeg(Operands[0]); + break; + case Instruction::FAdd: + assert(Operands.size() == 2); + NewI = builder.CreateFAdd(Operands[0], Operands[1]); + break; + case Instruction::FSub: + assert(Operands.size() == 2); + NewI = builder.CreateFSub(Operands[0], Operands[1]); + break; + case Instruction::FMul: + assert(Operands.size() == 2); + NewI = builder.CreateFMul(Operands[0], Operands[1]); + break; + case Instruction::FDiv: + assert(Operands.size() == 2); + NewI = builder.CreateFDiv(Operands[0], Operands[1]); + break; + case Instruction::FRem: + assert(Operands.size() == 2); + NewI = builder.CreateFRem(Operands[0], Operands[1]); + break; + case Instruction::FCmp: + assert(Operands.size() == 2); + NewI = builder.CreateFCmp(cast(&I)->getPredicate(), + Operands[0], Operands[1]); + break; + default: + abort(); + } + cast(NewI)->copyMetadata(I); + cast(NewI)->copyFastMathFlags(&I); + if (NewI->getType() != I.getType()) + NewI = builder.CreateFPTrunc(NewI, I.getType()); + I.replaceAllUsesWith(NewI); + erase.push_back(&I); + } + } + } + + if (erase.size() > 0) { + for (auto V : erase) + V->eraseFromParent(); + return true; + } + else + return false; +} + +char DemoteFloat16Pass::ID = 0; +static RegisterPass + Y("DemoteFloat16", + "Demote Float16 operations to Float32 equivalents.", + false, + false); +} + +Pass *createDemoteFloat16Pass() +{ + return new DemoteFloat16Pass(); +} + +extern "C" JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass(LLVMPassManagerRef PM) +{ + unwrap(PM)->add(createDemoteFloat16Pass()); +} diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 17d9c60911022..2337abe7d5704 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -211,6 +211,20 @@ static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \ OP((c_type*)pr, a); \ } +#define un_fintrinsic_half(OP, name) \ +static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \ +{ \ + uint16_t a = *(uint16_t*)pa; \ + float A = __gnu_h2f_ieee(a); \ + if (osize == 16) { \ + float R; \ + OP(&R, A); \ + *(uint16_t*)pr = __gnu_f2h_ieee(R); \ + } else { \ + OP((uint16_t*)pr, A); \ + } \ + } + // float or integer inputs // OP::Function macro(inputa, inputb) // name::unique string @@ -224,6 +238,18 @@ static void jl_##name##nbits(unsigned runtime_nbits, void *pa, void *pb, void *p *(c_type*)pr = (c_type)OP(a, b); \ } +#define bi_intrinsic_half(OP, name) \ +static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pr) JL_NOTSAFEPOINT \ +{ \ + uint16_t a = *(uint16_t*)pa; \ + uint16_t b = *(uint16_t*)pb; \ + float A = __gnu_h2f_ieee(a); \ + float B = __gnu_h2f_ieee(b); \ + runtime_nbits = 16; \ + float R = OP(A, B); \ + *(uint16_t*)pr = __gnu_f2h_ieee(R); \ +} + // float or integer inputs, bool output // OP::Function macro(inputa, inputb) // name::unique string @@ -237,6 +263,18 @@ static int jl_##name##nbits(unsigned runtime_nbits, void *pa, void *pb) JL_NOTSA return OP(a, b); \ } +#define bool_intrinsic_half(OP, name) \ +static int jl_##name##16(unsigned runtime_nbits, void *pa, void *pb) JL_NOTSAFEPOINT \ +{ \ + uint16_t a = *(uint16_t*)pa; \ + uint16_t b = *(uint16_t*)pb; \ + float A = __gnu_h2f_ieee(a); \ + float B = __gnu_h2f_ieee(b); \ + runtime_nbits = 16; \ + return OP(A, B); \ +} + + // integer inputs, with precondition test // OP::Function macro(inputa, inputb) // name::unique string @@ -265,6 +303,20 @@ static void jl_##name##nbits(unsigned runtime_nbits, void *pa, void *pb, void *p *(c_type*)pr = (c_type)OP(a, b, c); \ } +#define ter_intrinsic_half(OP, name) \ +static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pc, void *pr) JL_NOTSAFEPOINT \ +{ \ + uint16_t a = *(uint16_t*)pa; \ + uint16_t b = *(uint16_t*)pb; \ + uint16_t c = *(uint16_t*)pc; \ + float A = __gnu_h2f_ieee(a); \ + float B = __gnu_h2f_ieee(b); \ + float C = __gnu_h2f_ieee(c); \ + runtime_nbits = 16; \ + float R = OP(A, B, C); \ + *(uint16_t*)pr = __gnu_f2h_ieee(R); \ +} + // unary operator generator // @@ -407,11 +459,12 @@ static inline jl_value_t *jl_intrinsic_cvt(jl_value_t *ty, jl_value_t *a, const // floating point #define un_fintrinsic_withtype(OP, name) \ +un_fintrinsic_half(OP, jl_##name##16) \ un_fintrinsic_ctype(OP, jl_##name##32, float) \ un_fintrinsic_ctype(OP, jl_##name##64, double) \ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *ty, jl_value_t *a) \ { \ - return jl_fintrinsic_1(ty, a, #name, jl_##name##32, jl_##name##64); \ + return jl_fintrinsic_1(ty, a, #name, jl_##name##16, jl_##name##32, jl_##name##64); \ } #define un_fintrinsic(OP, name) \ @@ -423,7 +476,7 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a) \ typedef void (fintrinsic_op1)(unsigned, void*, void*); -static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name, fintrinsic_op1 *floatop, fintrinsic_op1 *doubleop) +static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name, fintrinsic_op1 *halfop, fintrinsic_op1 *floatop, fintrinsic_op1 *doubleop) { jl_ptls_t ptls = jl_get_ptls_states(); if (!jl_is_primitivetype(jl_typeof(a))) @@ -436,6 +489,9 @@ static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const c unsigned sz = jl_datatype_size(jl_typeof(a)); switch (sz) { /* choose the right size c-type operation based on the input */ + case 2: + halfop(sz2 * host_char_bit, pa, pr); + break; case 4: floatop(sz2 * host_char_bit, pa, pr); break; @@ -443,7 +499,7 @@ static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const c doubleop(sz2 * host_char_bit, pa, pr); break; default: - jl_errorf("%s: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64", name); + jl_errorf("%s: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64", name); } return newv; } @@ -612,6 +668,7 @@ static inline jl_value_t *jl_intrinsiclambda_checkeddiv(jl_value_t *ty, void *pa // floating point #define bi_fintrinsic(OP, name) \ + bi_intrinsic_half(OP, name) \ bi_intrinsic_ctype(OP, name, 32, float) \ bi_intrinsic_ctype(OP, name, 64, double) \ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \ @@ -627,6 +684,9 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \ void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b), *pr = jl_data_ptr(newv); \ switch (sz) { \ /* choose the right size c-type operation */ \ + case 2: \ + jl_##name##16(16, pa, pb, pr); \ + break; \ case 4: \ jl_##name##32(32, pa, pb, pr); \ break; \ @@ -634,12 +694,13 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \ jl_##name##64(64, pa, pb, pr); \ break; \ default: \ - jl_error(#name ": runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64"); \ + jl_error(#name ": runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); \ } \ return newv; \ } #define bool_fintrinsic(OP, name) \ + bool_intrinsic_half(OP, name) \ bool_intrinsic_ctype(OP, name, 32, float) \ bool_intrinsic_ctype(OP, name, 64, double) \ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \ @@ -654,6 +715,9 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \ int cmp; \ switch (sz) { \ /* choose the right size c-type operation */ \ + case 2: \ + cmp = jl_##name##16(16, pa, pb); \ + break; \ case 4: \ cmp = jl_##name##32(32, pa, pb); \ break; \ @@ -667,6 +731,7 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \ } #define ter_fintrinsic(OP, name) \ + ter_intrinsic_half(OP, name) \ ter_intrinsic_ctype(OP, name, 32, float) \ ter_intrinsic_ctype(OP, name, 64, double) \ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c) \ @@ -682,6 +747,9 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c) void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b), *pc = jl_data_ptr(c), *pr = jl_data_ptr(newv); \ switch (sz) { \ /* choose the right size c-type operation */ \ + case 2: \ + jl_##name##16(16, pa, pb, pc, pr); \ + break; \ case 4: \ jl_##name##32(32, pa, pb, pc, pr); \ break; \ @@ -689,7 +757,7 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c) jl_##name##64(64, pa, pb, pc, pr); \ break; \ default: \ - jl_error(#name ": runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64"); \ + jl_error(#name ": runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); \ } \ return newv; \ } @@ -834,15 +902,17 @@ cvt_iintrinsic(LLVMFPtoUI, fptoui) #define fptrunc(pr, a) \ if (!(osize < 8 * sizeof(a))) \ jl_error("fptrunc: output bitsize must be < input bitsize"); \ - if (osize == 32) \ + else if (osize == 16) \ + *(uint16_t*)pr = __gnu_f2h_ieee(a); \ + else if (osize == 32) \ *(float*)pr = a; \ else if (osize == 64) \ *(double*)pr = a; \ else \ - jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64"); + jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); #define fpext(pr, a) \ - if (!(osize > 8 * sizeof(a))) \ - jl_error("fpext: output bitsize must be > input bitsize"); \ + if (!(osize >= 8 * sizeof(a))) \ + jl_error("fpext: output bitsize must be >= input bitsize"); \ if (osize == 32) \ *(float*)pr = a; \ else if (osize == 64) \ diff --git a/test/intrinsics.jl b/test/intrinsics.jl index a5f3308c68639..47560d7dbd626 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -13,8 +13,7 @@ include("testenv.jl") @testset "runtime intrinsics" begin @test Core.Intrinsics.add_int(1, 1) == 2 @test Core.Intrinsics.sub_int(1, 1) == 0 - @test_throws ErrorException("fpext: output bitsize must be > input bitsize") Core.Intrinsics.fpext(Int32, 0x0000_0000) - @test_throws ErrorException("fpext: output bitsize must be > input bitsize") Core.Intrinsics.fpext(Int32, 0x0000_0000_0000_0000) + @test_throws ErrorException("fpext: output bitsize must be >= input bitsize") Core.Intrinsics.fpext(Int32, 0x0000_0000_0000_0000) @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Int32, 0x0000_0000) @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize") Core.Intrinsics.fptrunc(Int64, 0x0000_0000) @test_throws ErrorException("ZExt: output bitsize must be > input bitsize") Core.Intrinsics.zext_int(Int8, 0x00) @@ -106,3 +105,50 @@ end @test unsafe_load(Ptr{Nothing}(0)) === nothing struct GhostStruct end @test unsafe_load(Ptr{GhostStruct}(rand(Int))) === GhostStruct() + +# macro to verify and compare the compiled output of an intrinsic with its runtime version +macro test_intrinsic(intr, args...) + output = args[end] + inputs = args[1:end-1] + quote + function f() + $intr($(inputs...)) + end + @test f() === Base.invokelatest($intr, $(inputs...)) + @test f() == $output + end +end + +@testset "Float16 intrinsics" begin + # unary + @test_intrinsic Core.Intrinsics.neg_float Float16(3.3) Float16(-3.3) + @test_intrinsic Core.Intrinsics.fpext Float32 Float16(3.3) 3.3007812f0 + @test_intrinsic Core.Intrinsics.fpext Float64 Float16(3.3) 3.30078125 + @test_intrinsic Core.Intrinsics.fptrunc Float16 Float32(3.3) Float16(3.3) + @test_intrinsic Core.Intrinsics.fptrunc Float16 Float64(3.3) Float16(3.3) + + # binary + @test_intrinsic Core.Intrinsics.add_float Float16(3.3) Float16(2) Float16(5.3) + @test_intrinsic Core.Intrinsics.sub_float Float16(3.3) Float16(2) Float16(1.301) + @test_intrinsic Core.Intrinsics.mul_float Float16(3.3) Float16(2) Float16(6.6) + @test_intrinsic Core.Intrinsics.div_float Float16(3.3) Float16(2) Float16(1.65) + @test_intrinsic Core.Intrinsics.rem_float Float16(3.3) Float16(2) Float16(1.301) + + # ternary + @test_intrinsic Core.Intrinsics.fma_float Float16(3.3) Float16(4.4) Float16(5.5) Float16(20.02) + @test_intrinsic Core.Intrinsics.muladd_float Float16(3.3) Float16(4.4) Float16(5.5) Float16(20.02) + + # boolean + @test_intrinsic Core.Intrinsics.eq_float Float16(3.3) Float16(3.3) true + @test_intrinsic Core.Intrinsics.eq_float Float16(3.3) Float16(2) false + @test_intrinsic Core.Intrinsics.ne_float Float16(3.3) Float16(3.3) false + @test_intrinsic Core.Intrinsics.ne_float Float16(3.3) Float16(2) true + @test_intrinsic Core.Intrinsics.le_float Float16(3.3) Float16(3.3) true + @test_intrinsic Core.Intrinsics.le_float Float16(3.3) Float16(2) false + + # conversions + @test_intrinsic Core.Intrinsics.sitofp Float16 3 Float16(3f0) + @test_intrinsic Core.Intrinsics.uitofp Float16 UInt(3) Float16(3f0) + @test_intrinsic Core.Intrinsics.fptosi Int Float16(3.3) 3 + @test_intrinsic Core.Intrinsics.fptoui UInt Float16(3.3) UInt(3) +end diff --git a/test/llvmpasses/llvmcall.jl b/test/llvmpasses/llvmcall.jl index c9cdf4db1fc38..7da2dbec36a8f 100644 --- a/test/llvmpasses/llvmcall.jl +++ b/test/llvmpasses/llvmcall.jl @@ -28,5 +28,5 @@ emit(foo, Core.LLVMPtr{Float32, 3}) # CHECK: call { i32, i32 } @foo({ i32, i32 } %{{[0-9]+}}) emit(foo, Foo) -# CHECK: define <2 x i16> @julia_bar_{{[0-9]+}}([2 x i16] +# CHECK: define <2 x half> @julia_bar_{{[0-9]+}}([2 x half] emit(bar, NTuple{2, Float16})