Merge pull request #43 from tkf/powi

Use powi from x^p
eschnett · Jan 7, 2019 · 89ca5b4 · 89ca5b4
2 parents 0e4d17c + e2fec95
commit 89ca5b4
Show file tree

Hide file tree

Showing 2 changed files with 53 additions and 14 deletions.
diff --git a/src/SIMD.jl b/src/SIMD.jl
@@ -25,18 +25,18 @@ for sz in (8, 16, 32, 64, 128)
 
         Base.convert(::Type{Bool}, b::$Boolsz) = b.int != 0
 
-        Base. ~(b::$Boolsz) = $Boolsz(~b.int)
-        Base. !(b::$Boolsz) = ~b
-        Base. &(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int & b2.int)
-        Base. |(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int | b2.int)
+        Base.:~(b::$Boolsz) = $Boolsz(~b.int)
+        Base.:!(b::$Boolsz) = ~b
+        Base.:&(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int & b2.int)
+        Base.:|(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int | b2.int)
         Base.$(:$)(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int $ b2.int)
 
-        Base. ==(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int == b2.int)
-        Base. !=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int != b2.int)
-        Base. <(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int < b2.int)
-        Base. <=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int <= b2.int)
-        Base. >(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int > b2.int)
-        Base. >=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int >= b2.int)
+        Base.:==(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int == b2.int)
+        Base.:!=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int != b2.int)
+        Base.:<(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int < b2.int)
+        Base.:<=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int <= b2.int)
+        Base.:>(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int > b2.int)
+        Base.:>=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int >= b2.int)
     end
 end
 Base.convert(::Type{Bool}, b::Boolean) = error("impossible")
@@ -115,7 +115,7 @@ Vec(xs::NTuple{N,T}) where {N,T<:ScalarTypes} = Vec{N,T}(xs)
 @inline Tuple(v::Vec{N}) where {N} = ntuple(i -> v.elts[i].value, Val(N))
 @inline NTuple{N, T}(v::Vec{N}) where{N, T} = ntuple(i -> convert(T, v.elts[i].value), Val(N))
 
-@generated function Base. %(v::Vec{N,T}, ::Type{Vec{N,R}}) where {N,R,T}
+@generated function Base.:%(v::Vec{N,T}, ::Type{Vec{N,R}}) where {N,R,T}
     quote
         $(Expr(:meta, :inline))
         Vec{N,R}(tuple($([:(v.elts[$i].value % R) for i in 1:N]...)))
@@ -560,6 +560,33 @@ end
     end
 end
 
+# Functions taking two arguments, second argument is a scalar
+@generated function llvmwrap(::Type{Val{Op}}, v1::Vec{N,T1},
+        s2::ScalarTypes, ::Type{R} = T1) where {Op,N,T1,R}
+    @assert isa(Op, Symbol)
+    typ1 = llvmtype(T1)
+    vtyp1 = "<$N x $typ1>"
+    typ2 = llvmtype(s2)
+    typr = llvmtype(R)
+    vtypr = "<$N x $typr>"
+    ins = llvmins(Val{Op}, N, T1)
+    decls = []
+    instrs = []
+    if ins[1] == '@'
+        push!(decls, "declare $vtypr $ins($vtyp1, $typ2)")
+        push!(instrs, "%res = call $vtypr $ins($vtyp1 %0, $typ2 %1)")
+    else
+        push!(instrs, "%res = $ins $vtyp1 %0, %1")
+    end
+    push!(instrs, "ret $vtypr %res")
+    quote
+        $(Expr(:meta, :inline))
+        Vec{N,R}(Base.llvmcall($((join(decls, "\n"), join(instrs, "\n"))),
+            NTuple{N,VE{R}}, Tuple{NTuple{N,VE{T1}}, $s2},
+            v1.elts, s2))
+    end
+end
+
 # Functions taking two arguments, returning Bool
 @generated function llvmwrap(::Type{Val{Op}}, v1::Vec{N,T1},
         v2::Vec{N,T2}, ::Type{Bool}) where {Op,N,T1,T2}
@@ -900,7 +927,7 @@ for op in (:~, :+, :-)
             llvmwrap(Val{$(QuoteNode(op))}, v1)
     end
 end
-@inline Base. !(v1::Vec{N,Bool}) where {N} = ~v1
+@inline Base.:!(v1::Vec{N,Bool}) where {N} = ~v1
 @inline function Base.abs(v1::Vec{N,T}) where {N,T<:IntTypes}
     # s = -Vec{N,T}(signbit(v1))
     s = v1 >> Val{8*sizeof(T)}
@@ -986,7 +1013,11 @@ for op in (:+, :-, :*, :/, :^, :copysign, :max, :min, :rem)
             llvmwrap(Val{$(QuoteNode(op))}, v1, v2)
     end
 end
-@inline Base. ^(v1::Vec{N,T}, x2::Integer) where {N,T<:FloatingTypes} =
+# Using `IntegerTypes` here so that this definition "wins" against
+# `^(::ScalarTypes, v2::Vec)`.
+@inline Base.:^(v1::Vec{N,T}, x2::IntegerTypes) where {N,T<:FloatingTypes} =
+    llvmwrap(Val{:powi}, v1, Int(x2))
+@inline Base.:^(v1::Vec{N,T}, x2::Integer) where {N,T<:FloatingTypes} =
     llvmwrap(Val{:powi}, v1, Int(x2))
 @inline Base.flipsign(v1::Vec{N,T}, v2::Vec{N,T}) where {N,T<:FloatingTypes} =
     vifelse(signbit(v2), -v1, v1)

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -211,6 +211,14 @@ llvm_ir(f, args) = sprint(code_llvm, f, Base.typesof(args...))
             @test Tuple(op(V4F64(v4f64), V4F64(v4f64b), V4F64(v4f64c))) ===
                 map(op, v4f64, v4f64b, v4f64c)
         end
+
+        v = V4F64(v4f64)
+        @test v^5 === v * v * v * v * v
+
+        # Make sure our dispatching rule does not select floating point `pow`.
+        # See: https://github.com/eschnett/SIMD.jl/pull/43
+        ir = llvm_ir(^, (V4F64(v4f64), 2))
+        @test occursin("@llvm.powi.v4f64", ir)
     end
 
     @testset "Type promotion" begin
@@ -242,7 +250,7 @@ llvm_ir(f, args) = sprint(code_llvm, f, Base.typesof(args...))
 
         for op in (
                 ==, !=, <, <=, >, >=,
-                +, -, *, /, ^, copysign, flipsign, max, min, rem)
+                +, -, *, /, copysign, flipsign, max, min, rem)
             @test op(42, V4F64(v4f64)) === op(V4F64(42), V4F64(v4f64))
             @test op(V4F64(v4f64), 42) === op(V4F64(v4f64), V4F64(42))
         end