Skip to content
This repository has been archived by the owner on Nov 4, 2024. It is now read-only.

Commit

Permalink
feat: offload matrix multiply routines to Octavian.jl
Browse files Browse the repository at this point in the history
  • Loading branch information
avik-pal committed Aug 2, 2024
1 parent 854ba3f commit e31aa74
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 23 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "LuxLib"
uuid = "82251201-b29d-42c6-8e01-566dec8acb11"
authors = ["Avik Pal <avikpal@mit.edu> and contributors"]
version = "0.3.38"
version = "0.3.39"

[deps]
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
Expand All @@ -17,6 +17,7 @@ LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40"
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
SLEEFPirates = "476501e8-09a2-5ece-8869-fb82de89a1fa"
Expand Down Expand Up @@ -63,6 +64,7 @@ LuxTestUtils = "1.1"
MLDataDevices = "1.0.0"
Markdown = "1.10"
NNlib = "0.9.21"
Octavian = "0.3.28"
Pkg = "1.10"
Preferences = "1.4"
Random = "1.10"
Expand Down
1 change: 1 addition & 0 deletions src/LuxLib.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ using Markdown: @doc_str
using MLDataDevices: get_device_type, AMDGPUDevice, CUDADevice, CPUDevice,
AbstractGPUDevice, AbstractDevice
using NNlib: NNlib, ConvDims, conv, conv!, relu, gelu, σ, ∇conv_data, ∇conv_filter
using Octavian: Octavian
using Random: Random, AbstractRNG, rand!
using Reexport: @reexport
using Setfield: @set!
Expand Down
44 changes: 22 additions & 22 deletions src/impl/matmul.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,21 @@ function matmuladd!(C::AbstractMatrix, ::AbstractInternalArrayOpMode,
end
function matmuladd!(C::AbstractMatrix, ::LoopedArrayOp, A::AbstractMatrix,
B::AbstractMatrix, bias::AbstractVector)
if unrolled_any((256), (size(C, 1), size(A, 2), size(B, 2))) &&
dims = (size(C, 1), size(A, 2), size(B, 2))
if unrolled_any((2048), dims) &&
unrolled_all((10_000), dims) &&
LoopVectorization.check_args(C, A, B)
__matmuladd_loopvec!(C, A, B, bias)
__matmuladd_octavian!(C, A, B, bias)
return
end
__matmuladd_generic!(C, A, B, bias)
return
end

function __matmuladd_loopvec!(
function __matmuladd_octavian!(
C::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix, bias::AbstractVector)
# NOTE: Octavian doesn't do size checks.
# See https://github.com/JuliaLinearAlgebra/Octavian.jl/issues/109
if size(A, 2) != size(B, 1)
throw(DimensionMismatch(lazy"A has shape ($(size(A, 1)), $(size(A, 2))) but B has shape ($(size(B, 1)), $(size(B, 2)))"))
end
Expand All @@ -51,13 +55,11 @@ function __matmuladd_loopvec!(
throw(DimensionMismatch(lazy"bias has length $(length(bias)) but A has shape ($(size(A, 1)), $(size(A, 2)))"))
end

@tturbo for n in indices((C, B), 2), m in indices((C, A), 1)
Cmn = zero(eltype(C))
for k in indices((A, B), (2, 1))
Cmn += A[m, k] * B[k, n]
end
C[m, n] = Cmn + bias[m]
@tturbo for n in indices(C, 2), m in indices(C, 1)
C[m, n] = bias[m]
end
Octavian.matmul!(C, A, B, true, true)
return
end

function __matmuladd_generic!(
Expand Down Expand Up @@ -91,27 +93,25 @@ function matmul!(C::AbstractMatrix, ::AbstractInternalArrayOpMode,
return
end
function matmul!(C::AbstractMatrix, ::LoopedArrayOp, A::AbstractMatrix, B::AbstractMatrix)
if unrolled_any((256), (size(C, 1), size(A, 2), size(B, 2))) &&
dims = (size(C, 1), size(A, 2), size(B, 2))
if unrolled_any((2048), dims) &&
unrolled_all((10_000), dims) &&
LoopVectorization.check_args(C, A, B)
__matmul_loopvec!(C, A, B)
__matmul_octavian!(C, A, B)
return
end
__matmul_generic!(C, A, B)
return
end

function __matmul_loopvec!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix)
function __matmul_octavian!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix)
# NOTE: Octavian doesn't do size checks.
# See https://github.com/JuliaLinearAlgebra/Octavian.jl/issues/109
if size(A, 2) != size(B, 1)
throw(DimensionMismatch(lazy"A has shape ($(size(A, 1)), $(size(A, 2))) but B has shape ($(size(B, 1)), $(size(B, 2)))"))
end

@tturbo for n in indices((C, B), 2), m in indices((C, A), 1)
Cmn = zero(eltype(C))
for k in indices((A, B), (2, 1))
Cmn += A[m, k] * B[k, n]
end
C[m, n] = Cmn
end
Octavian.matmul!(C, A, B)
return
end

function __matmul_generic!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix)
Expand Down Expand Up @@ -151,6 +151,6 @@ function CRC.rrule(::typeof(matmuladd), opmode::LoopedArrayOp,
end

# EnzymeRules
@enzyme_reverse_alternative __matmul_loopvec! __matmul_generic!
@enzyme_reverse_alternative __matmul_octavian! __matmul_generic!

@enzyme_reverse_alternative __matmuladd_loopvec! __matmuladd_generic!
@enzyme_reverse_alternative __matmuladd_octavian! __matmuladd_generic!

0 comments on commit e31aa74

Please sign in to comment.