Skip to content

Commit

Permalink
Parallelized testing using XUnit.jl. (#71)
Browse files Browse the repository at this point in the history
Co-authored-by: Thomas Faingnaert <thomas.faingnaert@hotmail.com>
  • Loading branch information
maleadt and thomasfaingnaert authored Feb 2, 2021
1 parent a375e77 commit e4cc99d
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 85 deletions.
17 changes: 2 additions & 15 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,30 +1,17 @@
steps:
- label: "Julia 1.5"
plugins:
- JuliaCI/julia#v1:
version: '1.5'
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
codecov: true
agents:
queue: "juliagpu"
cuda: "*"
cap: "recent"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 60

- label: "Julia 1.6-nightly"
plugins:
- JuliaCI/julia#v1:
version: "1.6-nightly"
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
agents:
queue: "juliagpu"
cuda: "*"
cap: "recent"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 60

- label: "Julia nightly"
plugins:
- JuliaCI/julia#v1:
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ ForwardDiff = "0.10"
GPUifyLoops = "0.2"
LLVM = "3"
StaticArrays = "0.12, 1.0"
julia = "1.5"
julia = "1.6"
4 changes: 0 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ _Flexible and performant GEMM kernels in Julia_

| Julia | CI |
| ----------- | -- |
| 1.5 | [![Continuous Integration][buildkite-julia1-image]][buildkite-julia1-url] |
| 1.6-nightly | [![Continuous Integration][buildkite-julia16nightly-image]][buildkite-julia16nightly-url] |
| nightly | [![Continuous Integration][buildkite-julianightly-image]][buildkite-julianightly-url] |

Expand Down Expand Up @@ -56,9 +55,6 @@ We compare our kernels with the state-of-the-art libraries cuBLAS and CUTLASS on
For more details on the implementation and performance results, please see our accompanying paper (pre-print available on [arXiv][arxiv-paper]).
The [`CITATION.bib`](CITATION.bib) file in the root of this repository contains a citation in BibTeX format.

[buildkite-julia1-image]: https://badge.buildkite.com/92f2ead968bafc516afa354576cccb7ab2f5b42a272d9cb0f0.svg?branch=master&step=Julia%201.5
[buildkite-julia1-url]: https://buildkite.com/julialang/gemmkernels-dot-jl

[buildkite-julia16nightly-image]: https://badge.buildkite.com/92f2ead968bafc516afa354576cccb7ab2f5b42a272d9cb0f0.svg?branch=master&step=Julia%201.6-nightly
[buildkite-julia16nightly-url]: https://buildkite.com/julialang/gemmkernels-dot-jl
[buildkite-julianightly-image]: https://badge.buildkite.com/92f2ead968bafc516afa354576cccb7ab2f5b42a272d9cb0f0.svg?branch=master&step=Julia%20nightly
Expand Down
5 changes: 3 additions & 2 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[deps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
XUnit = "3e3c03f2-1a94-11e9-2981-050a4ca824ab"
21 changes: 11 additions & 10 deletions test/blas.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ using LinearAlgebra

CUDA.CUBLAS.cublasSetMathMode(CUBLAS.handle(), CUBLAS.CUBLAS_TENSOR_OP_MATH)

@test_if "blas" @testset "BLAS API" begin
@testset "WMMA GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true],
transpose_b = [false, true]

@testset "(M = $M, N = $N, K = $K)" for M in [128, 256],
@testset "BLAS API" begin
@testset "WMMA GEMM" begin
for transpose_a = [false, true], transpose_b = [false, true],
M in [128, 256], N in [128, 256], K in [128, 256]
@testcase "$( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ); M = $M, N = $N, K = $K" begin
N in [128, 256],
K in [128, 256]

Expand All @@ -35,12 +35,12 @@ CUDA.CUBLAS.cublasSetMathMode(CUBLAS.handle(), CUBLAS.CUBLAS_TENSOR_OP_MATH)
@test all(isapprox.(Array(c_gemmkernels), Array(c_cublas); rtol=sqrt(eps(Float16))));
end
end
end

@testset "WMMA GEMM (A = diagonal, B = $( !transpose_b ? 'N' : 'T' ))" for transpose_b = [false, true]
@testset "(M = $M, N = $N, K = $K)" for M in [128, 256],
N in [128, 256],
K in [M]

@testset "WMMA GEMM diagonal" begin
for transpose_b = [false, true],
M in [128, 256], N in [128, 256], K in [M]
@testcase "A = diagonal, B = $( !transpose_b ? 'N' : 'T' ); M = $M, N = $N, K = $K" begin
transpose_a = false

alpha = rand(Float32)
Expand All @@ -66,4 +66,5 @@ CUDA.CUBLAS.cublasSetMathMode(CUBLAS.handle(), CUBLAS.CUBLAS_TENSOR_OP_MATH)
@test all(isapprox.(Array(c_gemmkernels), Array(c_cublas); rtol=sqrt(eps(Float16))));
end
end
end
end
72 changes: 40 additions & 32 deletions test/matmul.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ using LinearAlgebra
################################################################################

@testset "Matmul API" begin
@test_if "wmma" @testset "WMMA GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true],
transpose_b = [false, true]

@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 128), (128, 128, 256), (256, 256, 256), (2048, 2048, 2048)]
@testset "WMMA GEMM" begin
for transpose_a = [false, true], transpose_b = [false, true],
(M, N, K) in [(128, 128, 128), (256, 256, 128), (128, 128, 256), (256, 256, 256), (2048, 2048, 2048)]
@testcase "$( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ); M = $M, N = $N, K = $K" begin
alpha = 2
beta = 3

Expand Down Expand Up @@ -51,12 +51,13 @@ using LinearAlgebra

@test all(isapprox.(alpha * Float32.(new_a_h) * Float32.(new_b_h) + beta * c_h, Array(d); rtol = sqrt(eps(Float16))))
end
end
end

@test_if "bias" @testset "WMMA GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) + bias" for transpose_a = [false, true],
transpose_b = [false, true]

@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)]
@testset "WMMA GEMM + bias" begin
for transpose_a = [false, true], transpose_b = [false, true],
(M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)]
@testcase "$( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ); M = $M, N = $N, K = $K" begin
a_h = rand(Float16, (M, K)) / sqrt(Float16(K))
b_h = rand(Float16, (K, N)) / sqrt(Float16(K))
c_h = rand(Float32, (M, N))
Expand Down Expand Up @@ -102,10 +103,13 @@ using LinearAlgebra

@test all(isapprox.(Float32.(new_a_h) * Float32.(new_b_h) + c_h .+ Array(bias), Array(d); rtol = sqrt(eps(Float16))))
end
end
end

@test_if "diagonal" @testset "WMMA GEMM (A = diagonal, B = $( !transpose_b ? 'N' : 'T' ))" for transpose_b = [false, true]
@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)]
@testset "WMMA GEMM diagonal" begin
for transpose_b = [false, true],
(M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)]
@testcase "A = diagonal, B = $( !transpose_b ? 'N' : 'T' ); M = $M, N = $N, K = $K" begin
@assert M == K "Diagonal only supports square A matrix (M == K)"

transpose_a = false
Expand Down Expand Up @@ -146,24 +150,25 @@ using LinearAlgebra

@test all(isapprox.(Float32.(Diagonal(new_a_h)) * Float32.(new_b_h) + c_h, Array(d); rtol = sqrt(eps(Float16))))
end
end
end

@test_if "complex" @testset "WMMA Complex GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true],
transpose_b = [false, true]

@testset "(M = $M, N = $N, K = $K)" for (M, N, K) = [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)]
a_h = rand(Complex{Float16}, (M, K)) / sqrt(Float16(K));
b_h = rand(Complex{Float16}, (K, N)) / sqrt(Float16(K));
c_h = rand(Complex{Float32}, (M, N));
@testset "WMMA Complex GEMM" begin
for transpose_a = [false, true], transpose_b = [false, true],
(M, N, K) = [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)]
@testcase "$( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ); M = $M, N = $N, K = $K" begin
a_h = rand(Complex{Float16}, (M, K)) / sqrt(Float16(K))
b_h = rand(Complex{Float16}, (K, N)) / sqrt(Float16(K))
c_h = rand(Complex{Float32}, (M, N))

# Transpose input if necessary
a_h = transpose_a ? transpose(a_h) : a_h
b_h = transpose_b ? transpose(b_h) : b_h

a = CuArray(a_h);
b = CuArray(b_h);
c = CuArray(c_h);
d = similar(c);
a = CuArray(a_h)
b = CuArray(b_h)
c = CuArray(c_h)
d = similar(c)

conf = GemmKernels.get_config(
gemm_shape = (M = M, N = N, K = K),
Expand Down Expand Up @@ -209,20 +214,22 @@ using LinearAlgebra

# TODO: Figure out why changing this to a * b + c = d instead of a * b = d - c
# makes tests fail for CC (see #19).
@test all(isapprox.(Complex{Float32}.(new_a_h) * Complex{Float32}.(new_b_h), Array(d) - c_h; rtol=sqrt(eps(Float16))));
@test all(isapprox.(Complex{Float32}.(new_a_h) * Complex{Float32}.(new_b_h), Array(d) - c_h; rtol=sqrt(eps(Float16))))
end
end
end

@test_if "dual" @testset "WMMA Dual GEMM" begin
@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)]
a_h = rand(Complex{Float16}, (M, K)) / sqrt(Float16(K));
b_h = rand(Complex{Float16}, (K, N)) / sqrt(Float16(K));
c_h = rand(Complex{Float32}, (M, N));
@testset "WMMA Dual GEMM" begin
for (M, N, K) in [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)]
@testcase "M = $M, N = $N, K = $K" begin
a_h = rand(Complex{Float16}, (M, K)) / sqrt(Float16(K))
b_h = rand(Complex{Float16}, (K, N)) / sqrt(Float16(K))
c_h = rand(Complex{Float32}, (M, N))

a = CuArray(a_h);
b = CuArray(b_h);
c = CuArray(c_h);
d = similar(c);
a = CuArray(a_h)
b = CuArray(b_h)
c = CuArray(c_h)
d = similar(c)

conf = GemmKernels.get_config(
gemm_shape = (M = M, N = N, K = K),
Expand Down Expand Up @@ -261,7 +268,8 @@ using LinearAlgebra
c_dual = reinterpret(ForwardDiff.Dual{Float32,Float32,1}, c_h)
d_dual = reinterpret(ForwardDiff.Dual{Float32,Float32,1}, Array(d))

@test all(isapprox.(a_dual * b_dual + c_dual, d_dual; rtol=sqrt(eps(Float16))));
@test all(isapprox.(a_dual * b_dual + c_dual, d_dual; rtol=sqrt(eps(Float16))))
end
end
end
end
Expand Down
18 changes: 6 additions & 12 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
using GemmKernels
using Test

using Pkg
Pkg.add(PackageSpec(name="XUnit", rev="9b756fcda72d813dbf017f8400d7c55251ef7d1b"))

using XUnit

import CUDA
import InteractiveUtils

@info "Julia details\n\n" * sprint(io->InteractiveUtils.versioninfo(io))
@info "CUDA details\n\n" * sprint(io->CUDA.versioninfo(io))

macro test_if(label, expr)
return quote
if isempty(ARGS) || $(label) in ARGS
$(esc(expr))
else
nothing
end
end
end

CUDA.allowscalar(false)

@testset "GemmKernels.jl" begin
@testset runner=ParallelTestRunner() "GemmKernels.jl" begin
include("tiling.jl")
include("matmul.jl")
include("blas.jl")
Expand Down
18 changes: 9 additions & 9 deletions test/tiling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,39 @@ using GemmKernels.Tiling

################################################################################

@test_if "tiling" @testset "Tiling API" begin
@testset "Tiling API" begin
@testset "Tiles" begin
@testset "Index" begin
@testcase "Index" begin
@test Tile(M = 4, N = 4, K = 4).index == (M = 0, N = 0, K = 0)
end

@testset "Projection" begin
@testcase "Projection" begin
@test Tile(M = 1, N = 2, K = 3).MN == Tile(M = 1, N = 2)
@test Tile(M = 1, N = 2, K = 3).NM == Tile(N = 2, M = 1)
@test Tile(M = 1, N = 2, K = 3).M == Tile(M = 1)
@test Tile(M = 1, N = 2, K = 3).KMN == Tile(K = 3, M = 1, N = 2)
end

@testset "Transposition" begin
@testcase "Transposition" begin
@test transpose(Tile(M = 1, N = 2)) == Tile(N = 2, M = 1)
@test transpose(Tile(M = 1, N = 2, K = 3)) == Tile(K = 3, N = 2, M = 1)
end

@testset "Translate base" begin
@testcase "Translate base" begin
tile = translate_base(Tile(M = 10, N = 20), (M = 1, N = 2))
@test tile.size == (M = 10, N = 20)
@test tile.base == (M = 1, N = 2)
@test tile.offset == (M = 0, N = 0)
end

@testset "Translate offset" begin
@testcase "Translate offset" begin
tile = translate_offset(Tile(M = 10, N = 20), (M = 1, N = 2))
@test tile.size == (M = 10, N = 20)
@test tile.base == (M = 0, N = 0)
@test tile.offset == (M = 1, N = 2)
end

@testset "Linearise" begin
@testcase "Linearise" begin
tile = Tile(M = 3, N = 5)
for i = 0 : 2, j = 0 : 4
tile_t = translate_offset(tile, (M = i, N = j))
Expand All @@ -45,7 +45,7 @@ using GemmKernels.Tiling
end

@testset "Tile iteration" begin
@testset "Subdivide" begin
@testcase "Subdivide" begin
tile_size = (M = 8, N = 4)
num_tiles = (M = 2, N = 4)
tile = Tile(M = num_tiles.M * tile_size.M, N = num_tiles.N * tile_size.N)
Expand All @@ -59,7 +59,7 @@ using GemmKernels.Tiling
end
end

@testset "Parallellise" begin
@testcase "Parallellise" begin
tile_size = (M = 8, N = 4)
num_tiles = (M = 2, N = 8)
tile = Tile(M = num_tiles.M * tile_size.M, N = num_tiles.N * tile_size.N)
Expand Down

0 comments on commit e4cc99d

Please sign in to comment.