From 6160a5e4948220d18b1152a7afe919f2223c08d8 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sat, 5 Jan 2019 14:52:45 -0800 Subject: [PATCH 1/4] Use powi from x^p --- src/SIMD.jl | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/SIMD.jl b/src/SIMD.jl index 3ae0b78..b60d60c 100644 --- a/src/SIMD.jl +++ b/src/SIMD.jl @@ -560,6 +560,33 @@ end end end +# Functions taking two arguments, second argument is a scalar +@generated function llvmwrap(::Type{Val{Op}}, v1::Vec{N,T1}, + s2::ScalarTypes, ::Type{R} = T1) where {Op,N,T1,R} + @assert isa(Op, Symbol) + typ1 = llvmtype(T1) + vtyp1 = "<$N x $typ1>" + typ2 = llvmtype(s2) + typr = llvmtype(R) + vtypr = "<$N x $typr>" + ins = llvmins(Val{Op}, N, T1) + decls = [] + instrs = [] + if ins[1] == '@' + push!(decls, "declare $vtypr $ins($vtyp1, $typ2)") + push!(instrs, "%res = call $vtypr $ins($vtyp1 %0, $typ2 %1)") + else + push!(instrs, "%res = $ins $vtyp1 %0, %1") + end + push!(instrs, "ret $vtypr %res") + quote + $(Expr(:meta, :inline)) + Vec{N,R}(Base.llvmcall($((join(decls, "\n"), join(instrs, "\n"))), + NTuple{N,VE{R}}, Tuple{NTuple{N,VE{T1}}, $s2}, + v1.elts, s2)) + end +end + # Functions taking two arguments, returning Bool @generated function llvmwrap(::Type{Val{Op}}, v1::Vec{N,T1}, v2::Vec{N,T2}, ::Type{Bool}) where {Op,N,T1,T2} @@ -986,7 +1013,11 @@ for op in (:+, :-, :*, :/, :^, :copysign, :max, :min, :rem) llvmwrap(Val{$(QuoteNode(op))}, v1, v2) end end -@inline Base. ^(v1::Vec{N,T}, x2::Integer) where {N,T<:FloatingTypes} = +# Using `IntegerTypes` here so that this definition "wins" against +# `^(::ScalarTypes, v2::Vec)`. +@inline Base.:^(v1::Vec{N,T}, x2::IntegerTypes) where {N,T<:FloatingTypes} = + llvmwrap(Val{:powi}, v1, Int(x2)) +@inline Base.:^(v1::Vec{N,T}, x2::Integer) where {N,T<:FloatingTypes} = llvmwrap(Val{:powi}, v1, Int(x2)) @inline Base.flipsign(v1::Vec{N,T}, v2::Vec{N,T}) where {N,T<:FloatingTypes} = vifelse(signbit(v2), -v1, v1) From 8c9d84416f98449eee12208e95165f3d25281851 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sun, 6 Jan 2019 12:38:03 -0800 Subject: [PATCH 2/4] Remove ^ from promotion rule test See: https://github.com/eschnett/SIMD.jl/pull/43#discussion_r245494809 --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 9354197..97429a6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -242,7 +242,7 @@ llvm_ir(f, args) = sprint(code_llvm, f, Base.typesof(args...)) for op in ( ==, !=, <, <=, >, >=, - +, -, *, /, ^, copysign, flipsign, max, min, rem) + +, -, *, /, copysign, flipsign, max, min, rem) @test op(42, V4F64(v4f64)) === op(V4F64(42), V4F64(v4f64)) @test op(V4F64(v4f64), 42) === op(V4F64(v4f64), V4F64(42)) end From f970f28f7b3adfd361bf10aec0ac620c02d3a42a Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sun, 6 Jan 2019 12:45:54 -0800 Subject: [PATCH 3/4] More tests for ^ --- test/runtests.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 97429a6..d816ebd 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -211,6 +211,14 @@ llvm_ir(f, args) = sprint(code_llvm, f, Base.typesof(args...)) @test Tuple(op(V4F64(v4f64), V4F64(v4f64b), V4F64(v4f64c))) === map(op, v4f64, v4f64b, v4f64c) end + + v = V4F64(v4f64) + @test v^5 === v * v * v * v * v + + # Make sure our dispatching rule does not select floating point `pow`. + # See: https://github.com/eschnett/SIMD.jl/pull/43 + ir = llvm_ir(^, (V4F64(v4f64), 2)) + @test occursin("@llvm.powi.v4f64", ir) end @testset "Type promotion" begin From e2fec959fd918a53a65391acf38ec9c1203dbf09 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sun, 6 Jan 2019 14:13:57 -0800 Subject: [PATCH 4/4] Replace: "Base. op" -> "Base.:op" --- src/SIMD.jl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/SIMD.jl b/src/SIMD.jl index b60d60c..22a7f61 100644 --- a/src/SIMD.jl +++ b/src/SIMD.jl @@ -25,18 +25,18 @@ for sz in (8, 16, 32, 64, 128) Base.convert(::Type{Bool}, b::$Boolsz) = b.int != 0 - Base. ~(b::$Boolsz) = $Boolsz(~b.int) - Base. !(b::$Boolsz) = ~b - Base. &(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int & b2.int) - Base. |(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int | b2.int) + Base.:~(b::$Boolsz) = $Boolsz(~b.int) + Base.:!(b::$Boolsz) = ~b + Base.:&(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int & b2.int) + Base.:|(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int | b2.int) Base.$(:$)(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int $ b2.int) - Base. ==(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int == b2.int) - Base. !=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int != b2.int) - Base. <(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int < b2.int) - Base. <=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int <= b2.int) - Base. >(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int > b2.int) - Base. >=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int >= b2.int) + Base.:==(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int == b2.int) + Base.:!=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int != b2.int) + Base.:<(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int < b2.int) + Base.:<=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int <= b2.int) + Base.:>(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int > b2.int) + Base.:>=(b1::$Boolsz, b2::$Boolsz) = $Boolsz(b1.int >= b2.int) end end Base.convert(::Type{Bool}, b::Boolean) = error("impossible") @@ -115,7 +115,7 @@ Vec(xs::NTuple{N,T}) where {N,T<:ScalarTypes} = Vec{N,T}(xs) @inline Tuple(v::Vec{N}) where {N} = ntuple(i -> v.elts[i].value, Val(N)) @inline NTuple{N, T}(v::Vec{N}) where{N, T} = ntuple(i -> convert(T, v.elts[i].value), Val(N)) -@generated function Base. %(v::Vec{N,T}, ::Type{Vec{N,R}}) where {N,R,T} +@generated function Base.:%(v::Vec{N,T}, ::Type{Vec{N,R}}) where {N,R,T} quote $(Expr(:meta, :inline)) Vec{N,R}(tuple($([:(v.elts[$i].value % R) for i in 1:N]...))) @@ -927,7 +927,7 @@ for op in (:~, :+, :-) llvmwrap(Val{$(QuoteNode(op))}, v1) end end -@inline Base. !(v1::Vec{N,Bool}) where {N} = ~v1 +@inline Base.:!(v1::Vec{N,Bool}) where {N} = ~v1 @inline function Base.abs(v1::Vec{N,T}) where {N,T<:IntTypes} # s = -Vec{N,T}(signbit(v1)) s = v1 >> Val{8*sizeof(T)}