From 042a53292cc027d70b4e3f5d04a60d384371d709 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Wed, 27 Jan 2021 15:11:50 +0100 Subject: [PATCH 1/3] Drop Julia 1.5. --- .buildkite/pipeline.yml | 17 ++--------------- Project.toml | 2 +- README.md | 4 ---- 3 files changed, 3 insertions(+), 20 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 653373ca..4e0c19c3 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,30 +1,17 @@ steps: - - label: "Julia 1.5" - plugins: - - JuliaCI/julia#v0.6: - version: '1.5' - - JuliaCI/julia-test#v0.3: ~ - - JuliaCI/julia-coverage#v0.3: - codecov: true - agents: - queue: "juliagpu" - cuda: "*" - cap: "recent" - if: build.message !~ /\[skip tests\]/ - timeout_in_minutes: 60 - - label: "Julia 1.6-nightly" plugins: - JuliaCI/julia#v0.6: version: "1.6-nightly" - JuliaCI/julia-test#v0.3: ~ + - JuliaCI/julia-coverage#v0.3: agents: queue: "juliagpu" cuda: "*" cap: "recent" if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 60 - + - label: "Julia nightly" plugins: - JuliaCI/julia#v0.6: diff --git a/Project.toml b/Project.toml index 5db98f83..f06cf8e7 100644 --- a/Project.toml +++ b/Project.toml @@ -17,4 +17,4 @@ ForwardDiff = "0.10" GPUifyLoops = "0.2" LLVM = "3" StaticArrays = "0.12, 1.0" -julia = "1.5" +julia = "1.6" diff --git a/README.md b/README.md index 2b1ca071..93e3fedb 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ _Flexible and performant GEMM kernels in Julia_ | Julia | CI | | ----------- | -- | -| 1.5 | [![Continuous Integration][buildkite-julia1-image]][buildkite-julia1-url] | | 1.6-nightly | [![Continuous Integration][buildkite-julia16nightly-image]][buildkite-julia16nightly-url] | | nightly | [![Continuous Integration][buildkite-julianightly-image]][buildkite-julianightly-url] | @@ -56,9 +55,6 @@ We compare our kernels with the state-of-the-art libraries cuBLAS and CUTLASS on For more details on the implementation and performance results, please see our accompanying paper (pre-print available on [arXiv][arxiv-paper]). The [`CITATION.bib`](CITATION.bib) file in the root of this repository contains a citation in BibTeX format. -[buildkite-julia1-image]: https://badge.buildkite.com/92f2ead968bafc516afa354576cccb7ab2f5b42a272d9cb0f0.svg?branch=master&step=Julia%201.5 -[buildkite-julia1-url]: https://buildkite.com/julialang/gemmkernels-dot-jl - [buildkite-julia16nightly-image]: https://badge.buildkite.com/92f2ead968bafc516afa354576cccb7ab2f5b42a272d9cb0f0.svg?branch=master&step=Julia%201.6-nightly [buildkite-julia16nightly-url]: https://buildkite.com/julialang/gemmkernels-dot-jl [buildkite-julianightly-image]: https://badge.buildkite.com/92f2ead968bafc516afa354576cccb7ab2f5b42a272d9cb0f0.svg?branch=master&step=Julia%20nightly From 2983ca07d82717e6097c0151a7378a1afdde8eb0 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Fri, 29 Jan 2021 12:27:44 +0100 Subject: [PATCH 2/3] Parallelized testing using XUnit.jl. --- test/Project.toml | 5 ++-- test/blas.jl | 21 +++++++------- test/matmul.jl | 72 ++++++++++++++++++++++++++--------------------- test/runtests.jl | 18 ++++-------- test/tiling.jl | 18 ++++++------ 5 files changed, 69 insertions(+), 65 deletions(-) diff --git a/test/Project.toml b/test/Project.toml index c11f3159..1ce1eb55 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,6 +1,7 @@ [deps] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +XUnit = "3e3c03f2-1a94-11e9-2981-050a4ca824ab" diff --git a/test/blas.jl b/test/blas.jl index 16a3ef09..b950080d 100644 --- a/test/blas.jl +++ b/test/blas.jl @@ -4,11 +4,11 @@ using LinearAlgebra CUDA.CUBLAS.cublasSetMathMode(CUBLAS.handle(), CUBLAS.CUBLAS_TENSOR_OP_MATH) -@test_if "blas" @testset "BLAS API" begin - @testset "WMMA GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true], - transpose_b = [false, true] - - @testset "(M = $M, N = $N, K = $K)" for M in [128, 256], +@testset "BLAS API" begin + @testset "WMMA GEMM" begin + for transpose_a = [false, true], transpose_b = [false, true], + M in [128, 256], N in [128, 256], K in [128, 256] + @testcase "$( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ); M = $M, N = $N, K = $K" begin N in [128, 256], K in [128, 256] @@ -35,12 +35,12 @@ CUDA.CUBLAS.cublasSetMathMode(CUBLAS.handle(), CUBLAS.CUBLAS_TENSOR_OP_MATH) @test all(isapprox.(Array(c_gemmkernels), Array(c_cublas); rtol=sqrt(eps(Float16)))); end end + end - @testset "WMMA GEMM (A = diagonal, B = $( !transpose_b ? 'N' : 'T' ))" for transpose_b = [false, true] - @testset "(M = $M, N = $N, K = $K)" for M in [128, 256], - N in [128, 256], - K in [M] - + @testset "WMMA GEMM diagonal" begin + for transpose_b = [false, true], + M in [128, 256], N in [128, 256], K in [M] + @testcase "A = diagonal, B = $( !transpose_b ? 'N' : 'T' ); M = $M, N = $N, K = $K" begin transpose_a = false alpha = rand(Float32) @@ -66,4 +66,5 @@ CUDA.CUBLAS.cublasSetMathMode(CUBLAS.handle(), CUBLAS.CUBLAS_TENSOR_OP_MATH) @test all(isapprox.(Array(c_gemmkernels), Array(c_cublas); rtol=sqrt(eps(Float16)))); end end + end end diff --git a/test/matmul.jl b/test/matmul.jl index c359db18..fce7b45c 100644 --- a/test/matmul.jl +++ b/test/matmul.jl @@ -6,10 +6,10 @@ using LinearAlgebra ################################################################################ @testset "Matmul API" begin - @test_if "wmma" @testset "WMMA GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true], - transpose_b = [false, true] - - @testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 128), (128, 128, 256), (256, 256, 256), (2048, 2048, 2048)] + @testset "WMMA GEMM" begin + for transpose_a = [false, true], transpose_b = [false, true], + (M, N, K) in [(128, 128, 128), (256, 256, 128), (128, 128, 256), (256, 256, 256), (2048, 2048, 2048)] + @testcase "$( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ); M = $M, N = $N, K = $K" begin alpha = 2 beta = 3 @@ -51,12 +51,13 @@ using LinearAlgebra @test all(isapprox.(alpha * Float32.(new_a_h) * Float32.(new_b_h) + beta * c_h, Array(d); rtol = sqrt(eps(Float16)))) end + end end - @test_if "bias" @testset "WMMA GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) + bias" for transpose_a = [false, true], - transpose_b = [false, true] - - @testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)] + @testset "WMMA GEMM + bias + bias" begin + for transpose_a = [false, true], transpose_b = [false, true], + (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)] + @testcase "$( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ); M = $M, N = $N, K = $K" begin a_h = rand(Float16, (M, K)) / sqrt(Float16(K)) b_h = rand(Float16, (K, N)) / sqrt(Float16(K)) c_h = rand(Float32, (M, N)) @@ -102,10 +103,13 @@ using LinearAlgebra @test all(isapprox.(Float32.(new_a_h) * Float32.(new_b_h) + c_h .+ Array(bias), Array(d); rtol = sqrt(eps(Float16)))) end + end end - @test_if "diagonal" @testset "WMMA GEMM (A = diagonal, B = $( !transpose_b ? 'N' : 'T' ))" for transpose_b = [false, true] - @testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)] + @testset "WMMA GEMM diagonal" begin + for transpose_b = [false, true], + (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)] + @testcase "A = diagonal, B = $( !transpose_b ? 'N' : 'T' ); M = $M, N = $N, K = $K" begin @assert M == K "Diagonal only supports square A matrix (M == K)" transpose_a = false @@ -146,24 +150,25 @@ using LinearAlgebra @test all(isapprox.(Float32.(Diagonal(new_a_h)) * Float32.(new_b_h) + c_h, Array(d); rtol = sqrt(eps(Float16)))) end + end end - @test_if "complex" @testset "WMMA Complex GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true], - transpose_b = [false, true] - - @testset "(M = $M, N = $N, K = $K)" for (M, N, K) = [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)] - a_h = rand(Complex{Float16}, (M, K)) / sqrt(Float16(K)); - b_h = rand(Complex{Float16}, (K, N)) / sqrt(Float16(K)); - c_h = rand(Complex{Float32}, (M, N)); + @testset "WMMA Complex GEMM" begin + for transpose_a = [false, true], transpose_b = [false, true], + (M, N, K) = [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)] + @testcase "$( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ); M = $M, N = $N, K = $K" begin + a_h = rand(Complex{Float16}, (M, K)) / sqrt(Float16(K)) + b_h = rand(Complex{Float16}, (K, N)) / sqrt(Float16(K)) + c_h = rand(Complex{Float32}, (M, N)) # Transpose input if necessary a_h = transpose_a ? transpose(a_h) : a_h b_h = transpose_b ? transpose(b_h) : b_h - a = CuArray(a_h); - b = CuArray(b_h); - c = CuArray(c_h); - d = similar(c); + a = CuArray(a_h) + b = CuArray(b_h) + c = CuArray(c_h) + d = similar(c) conf = GemmKernels.get_config( gemm_shape = (M = M, N = N, K = K), @@ -209,20 +214,22 @@ using LinearAlgebra # TODO: Figure out why changing this to a * b + c = d instead of a * b = d - c # makes tests fail for CC (see #19). - @test all(isapprox.(Complex{Float32}.(new_a_h) * Complex{Float32}.(new_b_h), Array(d) - c_h; rtol=sqrt(eps(Float16)))); + @test all(isapprox.(Complex{Float32}.(new_a_h) * Complex{Float32}.(new_b_h), Array(d) - c_h; rtol=sqrt(eps(Float16)))) + end end end - @test_if "dual" @testset "WMMA Dual GEMM" begin - @testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)] - a_h = rand(Complex{Float16}, (M, K)) / sqrt(Float16(K)); - b_h = rand(Complex{Float16}, (K, N)) / sqrt(Float16(K)); - c_h = rand(Complex{Float32}, (M, N)); + @testset "WMMA Dual GEMM" begin + for (M, N, K) in [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)] + @testcase "M = $M, N = $N, K = $K" begin + a_h = rand(Complex{Float16}, (M, K)) / sqrt(Float16(K)) + b_h = rand(Complex{Float16}, (K, N)) / sqrt(Float16(K)) + c_h = rand(Complex{Float32}, (M, N)) - a = CuArray(a_h); - b = CuArray(b_h); - c = CuArray(c_h); - d = similar(c); + a = CuArray(a_h) + b = CuArray(b_h) + c = CuArray(c_h) + d = similar(c) conf = GemmKernels.get_config( gemm_shape = (M = M, N = N, K = K), @@ -261,7 +268,8 @@ using LinearAlgebra c_dual = reinterpret(ForwardDiff.Dual{Float32,Float32,1}, c_h) d_dual = reinterpret(ForwardDiff.Dual{Float32,Float32,1}, Array(d)) - @test all(isapprox.(a_dual * b_dual + c_dual, d_dual; rtol=sqrt(eps(Float16)))); + @test all(isapprox.(a_dual * b_dual + c_dual, d_dual; rtol=sqrt(eps(Float16)))) + end end end end diff --git a/test/runtests.jl b/test/runtests.jl index d49b1f72..bd91641e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,9 @@ using GemmKernels -using Test + +using Pkg +Pkg.add(PackageSpec(name="XUnit", rev="9b756fcda72d813dbf017f8400d7c55251ef7d1b")) + +using XUnit import CUDA import InteractiveUtils @@ -7,19 +11,9 @@ import InteractiveUtils @info "Julia details\n\n" * sprint(io->InteractiveUtils.versioninfo(io)) @info "CUDA details\n\n" * sprint(io->CUDA.versioninfo(io)) -macro test_if(label, expr) - return quote - if isempty(ARGS) || $(label) in ARGS - $(esc(expr)) - else - nothing - end - end -end - CUDA.allowscalar(false) -@testset "GemmKernels.jl" begin +@testset runner=ParallelTestRunner() "GemmKernels.jl" begin include("tiling.jl") include("matmul.jl") include("blas.jl") diff --git a/test/tiling.jl b/test/tiling.jl index ebd4b200..0b101f6c 100644 --- a/test/tiling.jl +++ b/test/tiling.jl @@ -2,39 +2,39 @@ using GemmKernels.Tiling ################################################################################ -@test_if "tiling" @testset "Tiling API" begin +@testset "Tiling API" begin @testset "Tiles" begin - @testset "Index" begin + @testcase "Index" begin @test Tile(M = 4, N = 4, K = 4).index == (M = 0, N = 0, K = 0) end - @testset "Projection" begin + @testcase "Projection" begin @test Tile(M = 1, N = 2, K = 3).MN == Tile(M = 1, N = 2) @test Tile(M = 1, N = 2, K = 3).NM == Tile(N = 2, M = 1) @test Tile(M = 1, N = 2, K = 3).M == Tile(M = 1) @test Tile(M = 1, N = 2, K = 3).KMN == Tile(K = 3, M = 1, N = 2) end - @testset "Transposition" begin + @testcase "Transposition" begin @test transpose(Tile(M = 1, N = 2)) == Tile(N = 2, M = 1) @test transpose(Tile(M = 1, N = 2, K = 3)) == Tile(K = 3, N = 2, M = 1) end - @testset "Translate base" begin + @testcase "Translate base" begin tile = translate_base(Tile(M = 10, N = 20), (M = 1, N = 2)) @test tile.size == (M = 10, N = 20) @test tile.base == (M = 1, N = 2) @test tile.offset == (M = 0, N = 0) end - @testset "Translate offset" begin + @testcase "Translate offset" begin tile = translate_offset(Tile(M = 10, N = 20), (M = 1, N = 2)) @test tile.size == (M = 10, N = 20) @test tile.base == (M = 0, N = 0) @test tile.offset == (M = 1, N = 2) end - @testset "Linearise" begin + @testcase "Linearise" begin tile = Tile(M = 3, N = 5) for i = 0 : 2, j = 0 : 4 tile_t = translate_offset(tile, (M = i, N = j)) @@ -45,7 +45,7 @@ using GemmKernels.Tiling end @testset "Tile iteration" begin - @testset "Subdivide" begin + @testcase "Subdivide" begin tile_size = (M = 8, N = 4) num_tiles = (M = 2, N = 4) tile = Tile(M = num_tiles.M * tile_size.M, N = num_tiles.N * tile_size.N) @@ -59,7 +59,7 @@ using GemmKernels.Tiling end end - @testset "Parallellise" begin + @testcase "Parallellise" begin tile_size = (M = 8, N = 4) num_tiles = (M = 2, N = 8) tile = Tile(M = num_tiles.M * tile_size.M, N = num_tiles.N * tile_size.N) From 605321e48fa48dcccaf86d0529b3252248503362 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 2 Feb 2021 10:35:38 +0100 Subject: [PATCH 3/3] Update test/matmul.jl Co-authored-by: Thomas Faingnaert --- test/matmul.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/matmul.jl b/test/matmul.jl index fce7b45c..b04e117d 100644 --- a/test/matmul.jl +++ b/test/matmul.jl @@ -54,7 +54,7 @@ using LinearAlgebra end end - @testset "WMMA GEMM + bias + bias" begin + @testset "WMMA GEMM + bias" begin for transpose_a = [false, true], transpose_b = [false, true], (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)] @testcase "$( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ); M = $M, N = $N, K = $K" begin