Skip to content
This repository was archived by the owner on Nov 4, 2024. It is now read-only.

Commit bc0131d

Browse files
committed
feat: offload matrix multiply routines to Octavian.jl
1 parent 854ba3f commit bc0131d

File tree

3 files changed

+26
-23
lines changed

3 files changed

+26
-23
lines changed

Project.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LuxLib"
22
uuid = "82251201-b29d-42c6-8e01-566dec8acb11"
33
authors = ["Avik Pal <[email protected]> and contributors"]
4-
version = "0.3.38"
4+
version = "0.3.39"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
@@ -17,6 +17,7 @@ LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
1717
MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40"
1818
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
1919
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
20+
Octavian = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
2021
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
2122
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
2223
SLEEFPirates = "476501e8-09a2-5ece-8869-fb82de89a1fa"
@@ -63,6 +64,7 @@ LuxTestUtils = "1.1"
6364
MLDataDevices = "1.0.0"
6465
Markdown = "1.10"
6566
NNlib = "0.9.21"
67+
Octavian = "0.3.28"
6668
Pkg = "1.10"
6769
Preferences = "1.4"
6870
Random = "1.10"

src/LuxLib.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ using Markdown: @doc_str
1414
using MLDataDevices: get_device_type, AMDGPUDevice, CUDADevice, CPUDevice,
1515
AbstractGPUDevice, AbstractDevice
1616
using NNlib: NNlib, ConvDims, conv, conv!, relu, gelu, σ, ∇conv_data, ∇conv_filter
17+
using Octavian: Octavian
1718
using Random: Random, AbstractRNG, rand!
1819
using Reexport: @reexport
1920
using Setfield: @set!

src/impl/matmul.jl

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,21 @@ function matmuladd!(C::AbstractMatrix, ::AbstractInternalArrayOpMode,
3232
end
3333
function matmuladd!(C::AbstractMatrix, ::LoopedArrayOp, A::AbstractMatrix,
3434
B::AbstractMatrix, bias::AbstractVector)
35-
if unrolled_any((256), (size(C, 1), size(A, 2), size(B, 2))) &&
35+
dims = (size(C, 1), size(A, 2), size(B, 2))
36+
if unrolled_any((2048), dims) &&
37+
unrolled_all((10_000), dims) &&
3638
LoopVectorization.check_args(C, A, B)
37-
__matmuladd_loopvec!(C, A, B, bias)
39+
__matmuladd_octavian!(C, A, B, bias)
3840
return
3941
end
4042
__matmuladd_generic!(C, A, B, bias)
4143
return
4244
end
4345

44-
function __matmuladd_loopvec!(
46+
function __matmuladd_octavian!(
4547
C::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix, bias::AbstractVector)
48+
# NOTE: Octavian doesn't do size checks.
49+
# See https://github.com/JuliaLinearAlgebra/Octavian.jl/issues/109
4650
if size(A, 2) != size(B, 1)
4751
throw(DimensionMismatch(lazy"A has shape ($(size(A, 1)), $(size(A, 2))) but B has shape ($(size(B, 1)), $(size(B, 2)))"))
4852
end
@@ -51,13 +55,11 @@ function __matmuladd_loopvec!(
5155
throw(DimensionMismatch(lazy"bias has length $(length(bias)) but A has shape ($(size(A, 1)), $(size(A, 2)))"))
5256
end
5357

54-
@tturbo for n in indices((C, B), 2), m in indices((C, A), 1)
55-
Cmn = zero(eltype(C))
56-
for k in indices((A, B), (2, 1))
57-
Cmn += A[m, k] * B[k, n]
58-
end
59-
C[m, n] = Cmn + bias[m]
58+
@tturbo for n in indices(C, 2), m in indices(C, 1)
59+
C[m, n] = bias[m]
6060
end
61+
Octavian.matmul!(C, A, B, true, true)
62+
return
6163
end
6264

6365
function __matmuladd_generic!(
@@ -91,27 +93,25 @@ function matmul!(C::AbstractMatrix, ::AbstractInternalArrayOpMode,
9193
return
9294
end
9395
function matmul!(C::AbstractMatrix, ::LoopedArrayOp, A::AbstractMatrix, B::AbstractMatrix)
94-
if unrolled_any((256), (size(C, 1), size(A, 2), size(B, 2))) &&
96+
dims = (size(C, 1), size(A, 2), size(B, 2))
97+
if unrolled_any((2048), dims) &&
98+
unrolled_all((10_000), dims) &&
9599
LoopVectorization.check_args(C, A, B)
96-
__matmul_loopvec!(C, A, B)
100+
__matmul_octavian!(C, A, B)
97101
return
98102
end
99103
__matmul_generic!(C, A, B)
100104
return
101105
end
102106

103-
function __matmul_loopvec!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix)
107+
function __matmul_octavian!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix)
108+
# NOTE: Octavian doesn't do size checks.
109+
# See https://github.com/JuliaLinearAlgebra/Octavian.jl/issues/109
104110
if size(A, 2) != size(B, 1)
105111
throw(DimensionMismatch(lazy"A has shape ($(size(A, 1)), $(size(A, 2))) but B has shape ($(size(B, 1)), $(size(B, 2)))"))
106112
end
107-
108-
@tturbo for n in indices((C, B), 2), m in indices((C, A), 1)
109-
Cmn = zero(eltype(C))
110-
for k in indices((A, B), (2, 1))
111-
Cmn += A[m, k] * B[k, n]
112-
end
113-
C[m, n] = Cmn
114-
end
113+
Octavian.matmul!(C, A, B)
114+
return
115115
end
116116

117117
function __matmul_generic!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix)
@@ -151,6 +151,6 @@ function CRC.rrule(::typeof(matmuladd), opmode::LoopedArrayOp,
151151
end
152152

153153
# EnzymeRules
154-
@enzyme_reverse_alternative __matmul_loopvec! __matmul_generic!
154+
@enzyme_reverse_alternative __matmul_octavian! __matmul_generic!
155155

156-
@enzyme_reverse_alternative __matmuladd_loopvec! __matmuladd_generic!
156+
@enzyme_reverse_alternative __matmuladd_octavian! __matmuladd_generic!

0 commit comments

Comments
 (0)