Skip to content

Commit

Permalink
Merge pull request #43 from tkf/powi
Browse files Browse the repository at this point in the history
Use powi from x^p
  • Loading branch information
eschnett authored Jan 7, 2019
2 parents 0e4d17c + e2fec95 commit 89ca5b4
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 14 deletions.
57 changes: 44 additions & 13 deletions src/SIMD.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,18 @@ for sz in (8, 16, 32, 64, 128)
Base.convert(::Type{Bool}, b::$Boolsz) = b.int != 0
Base. ~(b::$Boolsz) = $Boolsz(~b.int)
Base. !(b::$Boolsz) = ~b
Base. &(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int & b2.int)
Base. |(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int | b2.int)
Base.:~(b::$Boolsz) = $Boolsz(~b.int)
Base.:!(b::$Boolsz) = ~b
Base.:&(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int & b2.int)
Base.:|(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int | b2.int)
Base.$(:$)(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int $ b2.int)
Base. ==(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int == b2.int)
Base. !=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int != b2.int)
Base. <(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int < b2.int)
Base. <=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int <= b2.int)
Base. >(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int > b2.int)
Base. >=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int >= b2.int)
Base.:==(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int == b2.int)
Base.:!=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int != b2.int)
Base.:<(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int < b2.int)
Base.:<=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int <= b2.int)
Base.:>(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int > b2.int)
Base.:>=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int >= b2.int)
end
end
Base.convert(::Type{Bool}, b::Boolean) = error("impossible")
Expand Down Expand Up @@ -115,7 +115,7 @@ Vec(xs::NTuple{N,T}) where {N,T<:ScalarTypes} = Vec{N,T}(xs)
@inline Tuple(v::Vec{N}) where {N} = ntuple(i -> v.elts[i].value, Val(N))
@inline NTuple{N, T}(v::Vec{N}) where{N, T} = ntuple(i -> convert(T, v.elts[i].value), Val(N))

@generated function Base. %(v::Vec{N,T}, ::Type{Vec{N,R}}) where {N,R,T}
@generated function Base.:%(v::Vec{N,T}, ::Type{Vec{N,R}}) where {N,R,T}
quote
$(Expr(:meta, :inline))
Vec{N,R}(tuple($([:(v.elts[$i].value % R) for i in 1:N]...)))
Expand Down Expand Up @@ -560,6 +560,33 @@ end
end
end

# Functions taking two arguments, second argument is a scalar
@generated function llvmwrap(::Type{Val{Op}}, v1::Vec{N,T1},
s2::ScalarTypes, ::Type{R} = T1) where {Op,N,T1,R}
@assert isa(Op, Symbol)
typ1 = llvmtype(T1)
vtyp1 = "<$N x $typ1>"
typ2 = llvmtype(s2)
typr = llvmtype(R)
vtypr = "<$N x $typr>"
ins = llvmins(Val{Op}, N, T1)
decls = []
instrs = []
if ins[1] == '@'
push!(decls, "declare $vtypr $ins($vtyp1, $typ2)")
push!(instrs, "%res = call $vtypr $ins($vtyp1 %0, $typ2 %1)")
else
push!(instrs, "%res = $ins $vtyp1 %0, %1")
end
push!(instrs, "ret $vtypr %res")
quote
$(Expr(:meta, :inline))
Vec{N,R}(Base.llvmcall($((join(decls, "\n"), join(instrs, "\n"))),
NTuple{N,VE{R}}, Tuple{NTuple{N,VE{T1}}, $s2},
v1.elts, s2))
end
end

# Functions taking two arguments, returning Bool
@generated function llvmwrap(::Type{Val{Op}}, v1::Vec{N,T1},
v2::Vec{N,T2}, ::Type{Bool}) where {Op,N,T1,T2}
Expand Down Expand Up @@ -900,7 +927,7 @@ for op in (:~, :+, :-)
llvmwrap(Val{$(QuoteNode(op))}, v1)
end
end
@inline Base. !(v1::Vec{N,Bool}) where {N} = ~v1
@inline Base.:!(v1::Vec{N,Bool}) where {N} = ~v1
@inline function Base.abs(v1::Vec{N,T}) where {N,T<:IntTypes}
# s = -Vec{N,T}(signbit(v1))
s = v1 >> Val{8*sizeof(T)}
Expand Down Expand Up @@ -986,7 +1013,11 @@ for op in (:+, :-, :*, :/, :^, :copysign, :max, :min, :rem)
llvmwrap(Val{$(QuoteNode(op))}, v1, v2)
end
end
@inline Base. ^(v1::Vec{N,T}, x2::Integer) where {N,T<:FloatingTypes} =
# Using `IntegerTypes` here so that this definition "wins" against
# `^(::ScalarTypes, v2::Vec)`.
@inline Base.:^(v1::Vec{N,T}, x2::IntegerTypes) where {N,T<:FloatingTypes} =
llvmwrap(Val{:powi}, v1, Int(x2))
@inline Base.:^(v1::Vec{N,T}, x2::Integer) where {N,T<:FloatingTypes} =
llvmwrap(Val{:powi}, v1, Int(x2))
@inline Base.flipsign(v1::Vec{N,T}, v2::Vec{N,T}) where {N,T<:FloatingTypes} =
vifelse(signbit(v2), -v1, v1)
Expand Down
10 changes: 9 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,14 @@ llvm_ir(f, args) = sprint(code_llvm, f, Base.typesof(args...))
@test Tuple(op(V4F64(v4f64), V4F64(v4f64b), V4F64(v4f64c))) ===
map(op, v4f64, v4f64b, v4f64c)
end

v = V4F64(v4f64)
@test v^5 === v * v * v * v * v

# Make sure our dispatching rule does not select floating point `pow`.
# See: https://github.com/eschnett/SIMD.jl/pull/43
ir = llvm_ir(^, (V4F64(v4f64), 2))
@test occursin("@llvm.powi.v4f64", ir)
end

@testset "Type promotion" begin
Expand Down Expand Up @@ -242,7 +250,7 @@ llvm_ir(f, args) = sprint(code_llvm, f, Base.typesof(args...))

for op in (
==, !=, <, <=, >, >=,
+, -, *, /, ^, copysign, flipsign, max, min, rem)
+, -, *, /, copysign, flipsign, max, min, rem)
@test op(42, V4F64(v4f64)) === op(V4F64(42), V4F64(v4f64))
@test op(V4F64(v4f64), 42) === op(V4F64(v4f64), V4F64(42))
end
Expand Down

0 comments on commit 89ca5b4

Please sign in to comment.