From 509664aa58be9ae0cd0f09c077657cc7c003d34f Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sat, 5 Oct 2024 15:27:09 -0300 Subject: [PATCH 1/2] Use CPU copy with SharedStorage [only special] --- src/array.jl | 18 ++++++++++++++++++ test/array.jl | 23 +++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/src/array.jl b/src/array.jl index a288a271..3f61cd4c 100644 --- a/src/array.jl +++ b/src/array.jl @@ -401,6 +401,12 @@ function Base.unsafe_copyto!(dev::MTLDevice, dest::MtlArray{T}, doffs, src::Arra end return dest end +function Base.unsafe_copyto!(::MTLDevice, dest::MtlArray{T,<:Any,Metal.SharedStorage}, doffs, src::Array{T}, soffs, n) where T + # these copies are implemented using pure memcpy's, not API calls, so aren't ordered. + synchronize() + GC.@preserve src dest unsafe_copyto!(pointer(unsafe_wrap(Array,dest), doffs), pointer(src, soffs), n) + return dest +end # GPU -> CPU function Base.unsafe_copyto!(dev::MTLDevice, dest::Array{T}, doffs, src::MtlArray{T}, soffs, n) where T @@ -414,6 +420,12 @@ function Base.unsafe_copyto!(dev::MTLDevice, dest::Array{T}, doffs, src::MtlArra end return dest end +function Base.unsafe_copyto!(::MTLDevice, dest::Array{T}, doffs, src::MtlArray{T,<:Any,Metal.SharedStorage}, soffs, n) where T + # these copies are implemented using pure memcpy's, not API calls, so aren't ordered. + synchronize() + GC.@preserve src dest unsafe_copyto!(pointer(dest, doffs), pointer(unsafe_wrap(Array,src), soffs), n) + return dest +end # GPU -> GPU function Base.unsafe_copyto!(dev::MTLDevice, dest::MtlArray{T}, doffs, src::MtlArray{T}, soffs, n) where T @@ -427,6 +439,12 @@ function Base.unsafe_copyto!(dev::MTLDevice, dest::MtlArray{T}, doffs, src::MtlA end return dest end +function Base.unsafe_copyto!(::MTLDevice, dest::MtlArray{T,<:Any,Metal.SharedStorage}, doffs, src::MtlArray{T,<:Any,Metal.SharedStorage}, soffs, n) where T + # these copies are implemented using pure memcpy's, not API calls, so aren't ordered. + synchronize() + GC.@preserve src dest unsafe_copyto!(pointer(unsafe_wrap(Array,dest), doffs), pointer(unsafe_wrap(Array,src), soffs), n) + return dest +end ## regular gpu array adaptor diff --git a/test/array.jl b/test/array.jl index 3333f5da..05dbfb5e 100644 --- a/test/array.jl +++ b/test/array.jl @@ -69,6 +69,29 @@ end @test collect(Metal.fill(1, 2, 2)) == ones(Float32, 2, 2) end +@testset "copyto!: $T, $S" for S in [Metal.PrivateStorage, Metal.SharedStorage], T in [Float16, Float32, Bool, Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8] + function testcopyto!(out, in) + copyto!(out,in) + return Array(in) == Array(out) + end + + dim = (1000,17,10) + A = rand(T,dim) + mtlA = mtl(A;storage=S) + + #cpu -> gpu + res = Metal.zeros(T,dim;storage=S) + @test testcopyto!(res,A) + + #gpu -> cpu + res = zeros(T,dim) + @test testcopyto!(res,mtlA) + + #gpu -> gpu + res = Metal.zeros(T,dim;storage=S) + @test testcopyto!(res,mtlA) +end + check_storagemode(arr, smode) = Metal.storagemode(arr) == smode # There is some repetition to the GPUArrays tests to test for different storagemodes From 20832ac5a6ab0ff3b238c48fe6175ca7b90fee46 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 7 Oct 2024 14:16:36 -0300 Subject: [PATCH 2/2] Better test error display --- test/array.jl | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/test/array.jl b/test/array.jl index 05dbfb5e..e684a6d1 100644 --- a/test/array.jl +++ b/test/array.jl @@ -69,27 +69,28 @@ end @test collect(Metal.fill(1, 2, 2)) == ones(Float32, 2, 2) end -@testset "copyto!: $T, $S" for S in [Metal.PrivateStorage, Metal.SharedStorage], T in [Float16, Float32, Bool, Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8] - function testcopyto!(out, in) - copyto!(out,in) - return Array(in) == Array(out) +@testset "copyto!" begin + @testset "$T, $S" for S in [Metal.PrivateStorage, Metal.SharedStorage], + T in [Float16, Float32, Bool, Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8] + dim = (1000,17,10) + A = rand(T,dim) + mtlA = mtl(A;storage=S) + + #cpu -> gpu + res = Metal.zeros(T,dim;storage=S) + copyto!(res,A) + @test Array(res) == Array(A) + + #gpu -> cpu + res = zeros(T,dim) + copyto!(res,mtlA) + @test Array(res) == Array(mtlA) + + #gpu -> gpu + res = Metal.zeros(T,dim;storage=S) + copyto!(res,mtlA) + @test Array(res) == Array(mtlA) end - - dim = (1000,17,10) - A = rand(T,dim) - mtlA = mtl(A;storage=S) - - #cpu -> gpu - res = Metal.zeros(T,dim;storage=S) - @test testcopyto!(res,A) - - #gpu -> cpu - res = zeros(T,dim) - @test testcopyto!(res,mtlA) - - #gpu -> gpu - res = Metal.zeros(T,dim;storage=S) - @test testcopyto!(res,mtlA) end check_storagemode(arr, smode) = Metal.storagemode(arr) == smode