Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use XUnit.jl for parallel testing. #121

Merged
merged 2 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,14 @@ steps:
julia --project -e '
# make sure the 1.6-era Manifest works on this Julia version
using Pkg
Pkg.resolve()'
Pkg.resolve()

# work around XUnit.jl bug
try
Pkg.add(url="https://github.com/maleadt/XUnit.jl", rev="pass_compat")
catch err
@warn "Could not install patched version of XUnit.jl"
end'
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 120
matrix:
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1 @@

test/Manifest.toml
7 changes: 5 additions & 2 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
[deps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
XUnit = "3e3c03f2-1a94-11e9-2981-050a4ca824ab"
4 changes: 2 additions & 2 deletions test/blas.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ using LinearAlgebra

CUDA.CUBLAS.cublasSetMathMode(CUBLAS.handle(), CUBLAS.CUBLAS_TENSOR_OP_MATH)

@test_if "blas" @testset "BLAS API" begin
@testset "BLAS API" begin
@testset "WMMA GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true],
transpose_b = [false, true],
(A_type, B_type, CD_type, min_dimension) in [(Float16, Float16, Float16, 256), (Float16, Float16, Float32, 128)]

@testset "(M = $M, N = $N, K = $K)" for M in min_dimension .* [1, 2],
@testcase "(M = $M, N = $N, K = $K)" for M in min_dimension .* [1, 2],
N in min_dimension .* [1, 2],
K in min_dimension .* [1, 2]

Expand Down
58 changes: 29 additions & 29 deletions test/matmul.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ using LinearAlgebra
################################################################################

@testset "Matmul API" begin
@test_if "fpu compute and data types" @testset "FPU GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) OP ($(OP_M), $(OP_N), $(OP_K))" for
@testset "FPU GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) OP ($(OP_M), $(OP_N), $(OP_K))" for
(A_type, B_type, CD_type, min_dimension) in [
(Float16, Float16, Float32, 128), (Float32, Float32, Float32, 128), (Float32, Float32, Float64, 128), (Float64, Float64, Float64, 128),
(Int16, Int16, Int16, 128), (Int32, Int32, Int32, 128), (Int64, Int64, Int64, 128),
],
transpose_a = [false, true],
transpose_b = [false, true],
(Int16, Int16, Int16, 128), (Int32, Int32, Int32, 128), (Int64, Int64, Int64, 128),
],
transpose_a = [false, true],
transpose_b = [false, true],
(OP_M, OP_N, OP_K) in [(8, 16, 2)]
@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in vcat(min_dimension.*[[1,1,1], [2, 2, 1], [1, 1, 2], [2, 2, 2]], [[2048, 2048, 2048]])
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) in vcat(min_dimension.*[[1,1,1], [2, 2, 1], [1, 1, 2], [2, 2, 2]], [[2048, 2048, 2048]])
alpha = convert(A_type, 2)
beta = convert(CD_type, 3)

Expand Down Expand Up @@ -59,7 +59,7 @@ using LinearAlgebra
# Transpose outputs, if necessary
new_a_h = transpose_a ? transpose(a_h) : a_h
new_b_h = transpose_b ? transpose(b_h) : b_h

if A_type <: Integer
@test all(isapprox.(alpha * CD_type.(new_a_h) * CD_type.(new_b_h) + beta * c_h, Array(d)))
else
Expand All @@ -68,13 +68,13 @@ using LinearAlgebra
end
end

@test_if "fpu operator shape" @testset "FPU GEMM OPERATOR SHAPE ($(OP_M), $(OP_N), $(OP_K)) (NN, NT, TN, TT)" for (OP_M, OP_N, OP_K) in [
(4, 8, 1), (8, 8, 1), (4, 16, 1), (4, 8, 2), (8, 16, 2)
@testset "FPU GEMM OPERATOR SHAPE ($(OP_M), $(OP_N), $(OP_K)) (NN, NT, TN, TT)" for (OP_M, OP_N, OP_K) in [
(4, 8, 1), (8, 8, 1), (4, 16, 1), (4, 8, 2), (8, 16, 2)
]
@testset "NN, NT, TN, TT" for (transpose_a, transpose_b) in [(false, false), (false, true), (true, false), (true, true)]
@testcase "NN, NT, TN, TT" for (transpose_a, transpose_b) in [(false, false), (false, true), (true, false), (true, true)]
(M, N, K) = (128, 128, 128)
(A_type, B_type, CD_type) = (Float32, Float32, Float32)

alpha = convert(A_type, 2)
beta = convert(CD_type, 3)

Expand Down Expand Up @@ -114,18 +114,18 @@ using LinearAlgebra
# Transpose outputs, if necessary
new_a_h = transpose_a ? transpose(a_h) : a_h
new_b_h = transpose_b ? transpose(b_h) : b_h

@test all(isapprox.(alpha * CD_type.(new_a_h) * CD_type.(new_b_h) + beta * c_h, Array(d); rtol = sqrt(eps(A_type))))
end
end

@test_if "tropical fpu" @testset "TROPICAL GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) OP ($(OP_M), $(OP_N), $(OP_K))" for
(A_type, B_type, CD_type, min_dimension) in [(Float32, Float32, Float32, 128)],
transpose_a = [false, true],
transpose_b = [false, true],
@testset "TROPICAL GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) OP ($(OP_M), $(OP_N), $(OP_K))" for
(A_type, B_type, CD_type, min_dimension) in [(Float32, Float32, Float32, 128)],
transpose_a = [false, true],
transpose_b = [false, true],
(OP_M, OP_N, OP_K) in [(8, 16, 2)]

@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in vcat(min_dimension.*[[1,1,1], [2, 2, 1], [1, 1, 2], [2, 2, 2]])
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) in vcat(min_dimension.*[[1,1,1], [2, 2, 1], [1, 1, 2], [2, 2, 2]])
a_h = rand(A_type, (M, K)) / sqrt(A_type(K))
b_h = rand(B_type, (K, N)) / sqrt(B_type(K))
c_h = rand(CD_type, (M, N))
Expand All @@ -135,7 +135,7 @@ using LinearAlgebra
for j in 1 : N
d_h[i, j] = c_h[i, j]
for k in 1 : K
d_h[i, j] = max(a_h[i, k] + b_h[k, j], d_h[i, j])
d_h[i, j] = max(a_h[i, k] + b_h[k, j], d_h[i, j])
end
end
end
Expand Down Expand Up @@ -164,16 +164,16 @@ using LinearAlgebra
)

GemmKernels.matmul(a, b, c, d, conf; kernel = Kernel.matmul_pipelined)

@test all(isapprox.(d_h, Array(d); rtol = sqrt(eps(A_type))))
end
end


@test_if "wmma" @testset "WMMA GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true],
@testset "WMMA GEMM $(A_type)*$(B_type)+$(CD_type)=$(CD_type) ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true],
transpose_b = [false, true],
(A_type, B_type, CD_type, min_dimension) in [(Float16, Float16, Float16, 256), (Float16, Float16, Float32, 128)]
@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in vcat(min_dimension.*[[1,1,1], [2,2,1], [1,1,2], [2,2,2]], [[2048, 2048, 2048]])
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) in vcat(min_dimension.*[[1,1,1], [2,2,1], [1,1,2], [2,2,2]], [[2048, 2048, 2048]])
alpha = convert(A_type, 2)
beta = convert(CD_type, 3)

Expand Down Expand Up @@ -217,10 +217,10 @@ using LinearAlgebra
end
end

@test_if "bias" @testset "WMMA GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) + bias" for transpose_a = [false, true],
@testset "WMMA GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' )) + bias" for transpose_a = [false, true],
transpose_b = [false, true]

@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)]
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)]
a_h = rand(Float16, (M, K)) / sqrt(Float16(K))
b_h = rand(Float16, (K, N)) / sqrt(Float16(K))
c_h = rand(Float32, (M, N))
Expand Down Expand Up @@ -268,8 +268,8 @@ using LinearAlgebra
end
end

@test_if "diagonal" @testset "WMMA GEMM (A = diagonal, B = $( !transpose_b ? 'N' : 'T' ))" for transpose_b = [false, true]
@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)]
@testset "WMMA GEMM (A = diagonal, B = $( !transpose_b ? 'N' : 'T' ))" for transpose_b = [false, true]
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (4096, 4096, 4096)]
@assert M == K "Diagonal only supports square A matrix (M == K)"

transpose_a = false
Expand Down Expand Up @@ -312,10 +312,10 @@ using LinearAlgebra
end
end

@test_if "complex" @testset "WMMA Complex GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true],
@testset "WMMA Complex GEMM ($( !transpose_a ? 'N' : 'T' )$( !transpose_b ? 'N' : 'T' ))" for transpose_a = [false, true],
transpose_b = [false, true]

@testset "(M = $M, N = $N, K = $K)" for (M, N, K) = [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)]
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) = [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)]
a_h = rand(Complex{Float16}, (M, K)) / sqrt(Float16(K));
b_h = rand(Complex{Float16}, (K, N)) / sqrt(Float16(K));
c_h = rand(Complex{Float32}, (M, N));
Expand Down Expand Up @@ -377,8 +377,8 @@ using LinearAlgebra
end
end

@test_if "dual" @testset "WMMA Dual GEMM" begin
@testset "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)]
@testset "WMMA Dual GEMM" begin
@testcase "(M = $M, N = $N, K = $K)" for (M, N, K) in [(128, 128, 128), (256, 256, 256), (2048, 2048, 2048)]
a_h = rand(Complex{Float16}, (M, K)) / sqrt(Float16(K));
b_h = rand(Complex{Float16}, (K, N)) / sqrt(Float16(K));
c_h = rand(Complex{Float32}, (M, N));
Expand Down
41 changes: 20 additions & 21 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
using GemmKernels
using Test

import CUDA
import InteractiveUtils
using Distributed

@info "Julia details\n\n" * sprint(io->InteractiveUtils.versioninfo(io))
@info "CUDA details\n\n" * sprint(io->CUDA.versioninfo(io))
# determine parallelism
cpu_jobs = Sys.CPU_THREADS
memory_jobs = Int(Sys.free_memory()) ÷ (2 * 2^30)
jobs = min(cpu_jobs, memory_jobs)
@info "Running $jobs tests in parallel. If this is too many, set the `JULIA_CPU_THREADS` environment variable."

macro test_if(label, expr)
return quote
if isempty(ARGS) || $(label) in ARGS
$(esc(expr))
else
nothing
end
end
# add workers
exeflags = Base.julia_cmd()
filter!(exeflags.exec) do c
return !(startswith(c, "--depwarn") || startswith(c, "--check-bounds"))
end

CUDA.allowscalar(false)

@testset "GemmKernels.jl" begin
include("tiling.jl")
include("matmul.jl")
include("blas.jl")
push!(exeflags.exec, "--check-bounds=yes")
push!(exeflags.exec, "--startup-file=no")
push!(exeflags.exec, "--depwarn=yes")
push!(exeflags.exec, "--project=$(Base.active_project())")
exename = popfirst!(exeflags.exec)
withenv("JULIA_NUM_THREADS" => 1, "OPENBLAS_NUM_THREADS" => 1) do
addprocs(jobs; exename, exeflags)
end

@everywhere using XUnit
runtests("tests.jl")
25 changes: 25 additions & 0 deletions test/tests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
using Distributed, XUnit, Dates
using CUDA, GemmKernels
CUDA.allowscalar(false)

if myid() == 1
using InteractiveUtils
@info "Julia details:\n" * sprint(io->InteractiveUtils.versioninfo(io))
@info "CUDA details:\n" * sprint(io->CUDA.versioninfo(io))
end

t0 = now()
try
@testset runner=DistributedTestRunner() "GemmKernels.jl" begin
include("tiling.jl")
include("matmul.jl")
include("blas.jl")
end
finally
if myid() == 1
t1 = now()
elapsed = canonicalize(Dates.CompoundPeriod(t1-t0))
println("Testing finished in $elapsed")
end
end

18 changes: 9 additions & 9 deletions test/tiling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,39 @@ using GemmKernels.Tiling

################################################################################

@test_if "tiling" @testset "Tiling API" begin
@testset "Tiling API" begin
@testset "Tiles" begin
@testset "Index" begin
@testcase "Index" begin
@test Tile(M = 4, N = 4, K = 4).index == (M = 0, N = 0, K = 0)
end

@testset "Projection" begin
@testcase "Projection" begin
@test Tile(M = 1, N = 2, K = 3).MN == Tile(M = 1, N = 2)
@test Tile(M = 1, N = 2, K = 3).NM == Tile(N = 2, M = 1)
@test Tile(M = 1, N = 2, K = 3).M == Tile(M = 1)
@test Tile(M = 1, N = 2, K = 3).KMN == Tile(K = 3, M = 1, N = 2)
end

@testset "Transposition" begin
@testcase "Transposition" begin
@test transpose(Tile(M = 1, N = 2)) == Tile(N = 2, M = 1)
@test transpose(Tile(M = 1, N = 2, K = 3)) == Tile(K = 3, N = 2, M = 1)
end

@testset "Translate base" begin
@testcase "Translate base" begin
tile = translate_base(Tile(M = 10, N = 20), (M = 1, N = 2))
@test tile.size == (M = 10, N = 20)
@test tile.base == (M = 1, N = 2)
@test tile.offset == (M = 0, N = 0)
end

@testset "Translate offset" begin
@testcase "Translate offset" begin
tile = translate_offset(Tile(M = 10, N = 20), (M = 1, N = 2))
@test tile.size == (M = 10, N = 20)
@test tile.base == (M = 0, N = 0)
@test tile.offset == (M = 1, N = 2)
end

@testset "Linearise" begin
@testcase "Linearise" begin
tile = Tile(M = 3, N = 5)
for i = 0 : 2, j = 0 : 4
tile_t = translate_offset(tile, (M = i, N = j))
Expand All @@ -45,7 +45,7 @@ using GemmKernels.Tiling
end

@testset "Tile iteration" begin
@testset "Subdivide" begin
@testcase "Subdivide" begin
tile_size = (M = 8, N = 4)
num_tiles = (M = 2, N = 4)
tile = Tile(M = num_tiles.M * tile_size.M, N = num_tiles.N * tile_size.N)
Expand All @@ -59,7 +59,7 @@ using GemmKernels.Tiling
end
end

@testset "Parallellise" begin
@testcase "Parallellise" begin
tile_size = (M = 8, N = 4)
num_tiles = (M = 2, N = 8)
tile = Tile(M = num_tiles.M * tile_size.M, N = num_tiles.N * tile_size.N)
Expand Down