From 977528d86a12e3e2c299f56c63c77f1e20418be5 Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Wed, 4 Mar 2020 08:03:04 -0500 Subject: [PATCH] dgemmjit looks identical to regular MKL, suggesting (a) the JIT isn't working (compiler warnings about ignored preprocessor directives reinforce this), and (b) a way to benchmark both OpenBLAS and MKL. --- docs/src/assets/bench_AmulB_v1.svg | 4 +++- docs/src/assets/bench_AmulBt_v1.svg | 4 +++- docs/src/assets/bench_AplusAt_v1.svg | 4 +++- docs/src/assets/bench_AtmulB_v1.svg | 4 +++- docs/src/assets/bench_AtmulBt_v1.svg | 4 +++- docs/src/assets/bench_aplusBc_v1.svg | 4 +++- docs/src/assets/bench_dot3_v1.svg | 4 +++- docs/src/assets/bench_dot_v1.svg | 4 +++- docs/src/assets/bench_exp_v1.svg | 4 +++- docs/src/assets/bench_logdettriangle_v1.svg | 4 +++- docs/src/assets/bench_random_access_v1.svg | 4 +++- docs/src/assets/bench_selfdot_v1.svg | 4 +++- docs/src/assets/bench_sse_v1.svg | 4 +++- src/LoopVectorization.jl | 2 +- src/costs.jl | 4 ++-- 15 files changed, 42 insertions(+), 16 deletions(-) diff --git a/docs/src/assets/bench_AmulB_v1.svg b/docs/src/assets/bench_AmulB_v1.svg index 510c0eade..4da91333f 100644 --- a/docs/src/assets/bench_AmulB_v1.svg +++ b/docs/src/assets/bench_AmulB_v1.svg @@ -1 +1,3 @@ -Clang++ & Eigen-3Clang-PollyGFort-intrinsicGFortranIntelMKLJuliaLoopVectorizationiccicpc & Eigen-3ifortifort-intrinsicMethod0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120130GFLOPS \ No newline at end of file + + +0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120130GFLOPSClang++ & Eigen-3Clang-PollyGFort-intrinsicGFortranIntelMKLJuliaLoopVectorizationMKL JITiccicpc & Eigen-3ifortifort-intrinsicMethod diff --git a/docs/src/assets/bench_AmulBt_v1.svg b/docs/src/assets/bench_AmulBt_v1.svg index 43127e8ec..d717c9b26 100644 --- a/docs/src/assets/bench_AmulBt_v1.svg +++ b/docs/src/assets/bench_AmulBt_v1.svg @@ -1 +1,3 @@ -Clang++ & Eigen-3Clang-PollyGFort-intrinsicGFortranIntelMKLJuliaLoopVectorizationiccicpc & Eigen-3ifortifort-intrinsicMethod0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120130GFLOPS \ No newline at end of file + + +0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120130GFLOPSClang++ & Eigen-3Clang-PollyGFort-intrinsicGFortranIntelMKLJuliaLoopVectorizationMKL JITiccicpc & Eigen-3ifortifort-intrinsicMethod diff --git a/docs/src/assets/bench_AplusAt_v1.svg b/docs/src/assets/bench_AplusAt_v1.svg index 4ffc3699d..3449ed926 100644 --- a/docs/src/assets/bench_AplusAt_v1.svg +++ b/docs/src/assets/bench_AplusAt_v1.svg @@ -1 +1,3 @@ -Clang++ & Eigen-3Clang-PollyGFortranGFortran-builtinJuliaLoopVectorizationiccicpc & Eigen-3ifortifort-builtinMethod0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.51.01.52.02.53.03.54.04.55.0GFLOPS \ No newline at end of file + + +0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.51.01.52.02.53.03.54.04.55.0GFLOPSClang++ & Eigen-3Clang-PollyGFortranGFortran-builtinJuliaLoopVectorizationiccicpc & Eigen-3ifortifort-builtinMethod diff --git a/docs/src/assets/bench_AtmulB_v1.svg b/docs/src/assets/bench_AtmulB_v1.svg index 078efa62e..687c44f0a 100644 --- a/docs/src/assets/bench_AtmulB_v1.svg +++ b/docs/src/assets/bench_AtmulB_v1.svg @@ -1 +1,3 @@ -Clang++ & Eigen-3Clang-PollyGFort-intrinsicGFortranIntelMKLJuliaLoopVectorizationiccicpc & Eigen-3ifortifort-intrinsicMethod0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120GFLOPS \ No newline at end of file + + +0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120GFLOPSClang++ & Eigen-3Clang-PollyGFort-intrinsicGFortranIntelMKLJuliaLoopVectorizationMKL JITiccicpc & Eigen-3ifortifort-intrinsicMethod diff --git a/docs/src/assets/bench_AtmulBt_v1.svg b/docs/src/assets/bench_AtmulBt_v1.svg index 4ca82ff0b..c24ce904a 100644 --- a/docs/src/assets/bench_AtmulBt_v1.svg +++ b/docs/src/assets/bench_AtmulBt_v1.svg @@ -1 +1,3 @@ -Clang++ & Eigen-3Clang-PollyGFort-intrinsicGFortranIntelMKLJuliaLoopVectorizationiccicpc & Eigen-3ifortifort-intrinsicMethod0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120GFLOPS \ No newline at end of file + + +0102030405060708090100110120130140150160170180190200210220230240250260Size0102030405060708090100110120GFLOPSClang++ & Eigen-3Clang-PollyGFort-intrinsicGFortranIntelMKLJuliaLoopVectorizationMKL JITiccicpc & Eigen-3ifortifort-intrinsicMethod diff --git a/docs/src/assets/bench_aplusBc_v1.svg b/docs/src/assets/bench_aplusBc_v1.svg index bd811cb32..8931a8414 100644 --- a/docs/src/assets/bench_aplusBc_v1.svg +++ b/docs/src/assets/bench_aplusBc_v1.svg @@ -1 +1,3 @@ -Clang++ & Eigen-3Clang-PollyGFortranJuliaLoopVectorizationiccicpc & Eigen-3ifortMethod0102030405060708090100110120130140150160170180190200210220230240250260Size024681012141618202224262830GFLOPS \ No newline at end of file + + +0102030405060708090100110120130140150160170180190200210220230240250260Size024681012141618202224262830GFLOPSClang++ & Eigen-3Clang-PollyGFortranJuliaLoopVectorizationiccicpc & Eigen-3ifortMethod diff --git a/docs/src/assets/bench_dot3_v1.svg b/docs/src/assets/bench_dot3_v1.svg index 8b48bb661..1523783c0 100644 --- a/docs/src/assets/bench_dot3_v1.svg +++ b/docs/src/assets/bench_dot3_v1.svg @@ -1 +1,3 @@ -Clang++ & Eigen-3Clang-PollyGFortranIntelMKLJuliaLoopVectorizationiccicpc & Eigen-3ifortMethod0102030405060708090100110120130140150160170180190200210220230240250260Size05101520253035404550556065707580GFLOPS \ No newline at end of file + + +0102030405060708090100110120130140150160170180190200210220230240250260Size05101520253035404550556065707580GFLOPSClang++ & Eigen-3Clang-PollyGFortranIntelMKLJuliaLoopVectorizationiccicpc & Eigen-3ifortMethod diff --git a/docs/src/assets/bench_dot_v1.svg b/docs/src/assets/bench_dot_v1.svg index d2574c19f..89a0355ae 100644 --- a/docs/src/assets/bench_dot_v1.svg +++ b/docs/src/assets/bench_dot_v1.svg @@ -1 +1,3 @@ -Clang++ & Eigen-3Clang-PollyGFortranIntelMKLJuliaLoopVectorizationiccicpc & Eigen-3ifortMethod0102030405060708090100110120130140150160170180190200210220230240250260Size05101520253035404550GFLOPS \ No newline at end of file + + +0102030405060708090100110120130140150160170180190200210220230240250260Size02468101214161820222426283032343638404244GFLOPSClang++ & Eigen-3Clang-PollyGFortranIntelMKLJuliaLoopVectorizationiccicpc & Eigen-3ifortMethod diff --git a/docs/src/assets/bench_exp_v1.svg b/docs/src/assets/bench_exp_v1.svg index 4f212eca9..cd2dd4d06 100644 --- a/docs/src/assets/bench_exp_v1.svg +++ b/docs/src/assets/bench_exp_v1.svg @@ -1 +1,3 @@ -Clang-PollyGFortranJuliaLoopVectorizationiccifortMethod0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.20.40.60.81.01.21.41.61.82.02.22.42.62.8GFLOPS \ No newline at end of file + + +0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.20.40.60.81.01.21.41.61.82.02.22.42.6GFLOPSClang-PollyGFortranJuliaLoopVectorizationiccifortMethod diff --git a/docs/src/assets/bench_logdettriangle_v1.svg b/docs/src/assets/bench_logdettriangle_v1.svg index 6505273fb..04cd3f817 100644 --- a/docs/src/assets/bench_logdettriangle_v1.svg +++ b/docs/src/assets/bench_logdettriangle_v1.svg @@ -1 +1,3 @@ -Clang-PollyGFortranJuliaJulia-builtinLoopVectorizationiccifortMethod0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.10.20.30.40.50.60.70.80.91.01.11.21.3GFLOPS \ No newline at end of file + + +0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.10.20.30.40.50.60.70.80.91.01.11.21.3GFLOPSClang-PollyGFortranJuliaJulia-builtinLoopVectorizationiccifortMethod diff --git a/docs/src/assets/bench_random_access_v1.svg b/docs/src/assets/bench_random_access_v1.svg index dbc435c0d..00231fa6c 100644 --- a/docs/src/assets/bench_random_access_v1.svg +++ b/docs/src/assets/bench_random_access_v1.svg @@ -1 +1,3 @@ -Clang-PollyGFortranJuliaLoopVectorizationiccifortMethod0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.20.40.60.81.01.21.41.61.82.02.22.42.62.83.03.23.43.63.84.0GFLOPS \ No newline at end of file + + +0102030405060708090100110120130140150160170180190200210220230240250260Size0.00.20.40.60.81.01.21.41.61.82.02.22.42.62.83.03.23.43.63.84.0GFLOPSClang-PollyGFortranJuliaLoopVectorizationiccifortMethod diff --git a/docs/src/assets/bench_selfdot_v1.svg b/docs/src/assets/bench_selfdot_v1.svg index 228987649..5840fc262 100644 --- a/docs/src/assets/bench_selfdot_v1.svg +++ b/docs/src/assets/bench_selfdot_v1.svg @@ -1 +1,3 @@ -Clang++ & Eigen-3Clang-PollyGFortranIntelMKLJuliaLoopVectorizationiccicpc & Eigen-3ifortMethod0102030405060708090100110120130140150160170180190200210220230240250260Size05101520253035404550556065GFLOPS \ No newline at end of file + + +0102030405060708090100110120130140150160170180190200210220230240250260Size051015202530354045505560GFLOPSClang++ & Eigen-3Clang-PollyGFortranIntelMKLJuliaLoopVectorizationiccicpc & Eigen-3ifortMethod diff --git a/docs/src/assets/bench_sse_v1.svg b/docs/src/assets/bench_sse_v1.svg index db2f7ce01..c44b4b96e 100644 --- a/docs/src/assets/bench_sse_v1.svg +++ b/docs/src/assets/bench_sse_v1.svg @@ -1 +1,3 @@ -Clang++ & Eigen-3Clang-PollyGFortranIntelMKLJuliaLoopVectorizationiccicpc & Eigen-3ifortMethod0102030405060708090100110120130140150160170180190200210220230240250260Size05101520253035404550556065707580GFLOPS \ No newline at end of file + + +0102030405060708090100110120130140150160170180190200210220230240250260Size0510152025303540455055606570GFLOPSClang++ & Eigen-3Clang-PollyGFortranIntelMKLJuliaLoopVectorizationiccicpc & Eigen-3ifortMethod diff --git a/src/LoopVectorization.jl b/src/LoopVectorization.jl index 62d4a305a..88c661da6 100644 --- a/src/LoopVectorization.jl +++ b/src/LoopVectorization.jl @@ -8,7 +8,7 @@ using VectorizationBase: REGISTER_SIZE, REGISTER_COUNT, extract_data, num_vector PackedStridedPointer, SparseStridedPointer, RowMajorStridedPointer, StaticStridedPointer, StaticStridedStruct using SIMDPirates: VECTOR_SYMBOLS, evadd, evmul, vrange, reduced_add, reduced_prod, reduce_to_add, reduce_to_prod, sizeequivalentfloat, sizeequivalentint, vadd!, vsub!, vmul!, vfdiv!, vfmadd!, vfnmadd!, vfmsub!, vfnmsub!, - vfmadd231, vfmsub231, vfnmadd231, vfnmsub231, + vfmadd231, vfmsub231, vfnmadd231, vfnmsub231, #prefetch, vmullog2, vmullog10, vdivlog2, vdivlog10, vmullog2add!, vmullog10add!, vdivlog2add!, vdivlog10add!, vfmaddaddone using Base.Broadcast: Broadcasted, DefaultArrayStyle using LinearAlgebra: Adjoint, Transpose diff --git a/src/costs.jl b/src/costs.jl index b7c3b5d02..9eca32327 100644 --- a/src/costs.jl +++ b/src/costs.jl @@ -194,8 +194,8 @@ const COST = Dict{Instruction,InstructionCost}( Instruction(:sincospi_fast) => InstructionCost(25,22.0,70.0,26), Instruction(:identity) => InstructionCost(0,0.0,0.0,0), Instruction(:adjoint) => InstructionCost(0,0.0,0.0,0), - Instruction(:transpose) => InstructionCost(0,0.0,0.0,0) - # Symbol("##CONSTANT##") => InstructionCost(0,0.0) + Instruction(:transpose) => InstructionCost(0,0.0,0.0,0), + Instruction(:prefetch) => InstructionCost(0,0.0,0.0,0) ) # const KNOWNINSTRUCTIONS = keys(COST)