@@ -32,17 +32,21 @@ function matmuladd!(C::AbstractMatrix, ::AbstractInternalArrayOpMode,
3232end
3333function matmuladd! (C:: AbstractMatrix , :: LoopedArrayOp , A:: AbstractMatrix ,
3434 B:: AbstractMatrix , bias:: AbstractVector )
35- if unrolled_any (≤ (256 ), (size (C, 1 ), size (A, 2 ), size (B, 2 ))) &&
35+ dims = (size (C, 1 ), size (A, 2 ), size (B, 2 ))
36+ if unrolled_any (≤ (2048 ), dims) &&
37+ unrolled_all (≤ (10_000 ), dims) &&
3638 LoopVectorization. check_args (C, A, B)
37- __matmuladd_loopvec ! (C, A, B, bias)
39+ __matmuladd_octavian ! (C, A, B, bias)
3840 return
3941 end
4042 __matmuladd_generic! (C, A, B, bias)
4143 return
4244end
4345
44- function __matmuladd_loopvec ! (
46+ function __matmuladd_octavian ! (
4547 C:: AbstractMatrix , A:: AbstractMatrix , B:: AbstractMatrix , bias:: AbstractVector )
48+ # NOTE: Octavian doesn't do size checks.
49+ # See https://github.com/JuliaLinearAlgebra/Octavian.jl/issues/109
4650 if size (A, 2 ) != size (B, 1 )
4751 throw (DimensionMismatch (lazy " A has shape ($(size(A, 1)), $(size(A, 2))) but B has shape ($(size(B, 1)), $(size(B, 2)))" ))
4852 end
@@ -51,13 +55,11 @@ function __matmuladd_loopvec!(
5155 throw (DimensionMismatch (lazy " bias has length $(length(bias)) but A has shape ($(size(A, 1)), $(size(A, 2)))" ))
5256 end
5357
54- @tturbo for n in indices ((C, B), 2 ), m in indices ((C, A), 1 )
55- Cmn = zero (eltype (C))
56- for k in indices ((A, B), (2 , 1 ))
57- Cmn += A[m, k] * B[k, n]
58- end
59- C[m, n] = Cmn + bias[m]
58+ @tturbo for n in indices (C, 2 ), m in indices (C, 1 )
59+ C[m, n] = bias[m]
6060 end
61+ Octavian. matmul! (C, A, B, true , true )
62+ return
6163end
6264
6365function __matmuladd_generic! (
@@ -91,27 +93,25 @@ function matmul!(C::AbstractMatrix, ::AbstractInternalArrayOpMode,
9193 return
9294end
9395function matmul! (C:: AbstractMatrix , :: LoopedArrayOp , A:: AbstractMatrix , B:: AbstractMatrix )
94- if unrolled_any (≤ (256 ), (size (C, 1 ), size (A, 2 ), size (B, 2 ))) &&
96+ dims = (size (C, 1 ), size (A, 2 ), size (B, 2 ))
97+ if unrolled_any (≤ (2048 ), dims) &&
98+ unrolled_all (≤ (10_000 ), dims) &&
9599 LoopVectorization. check_args (C, A, B)
96- __matmul_loopvec ! (C, A, B)
100+ __matmul_octavian ! (C, A, B)
97101 return
98102 end
99103 __matmul_generic! (C, A, B)
100104 return
101105end
102106
103- function __matmul_loopvec! (C:: AbstractMatrix , A:: AbstractMatrix , B:: AbstractMatrix )
107+ function __matmul_octavian! (C:: AbstractMatrix , A:: AbstractMatrix , B:: AbstractMatrix )
108+ # NOTE: Octavian doesn't do size checks.
109+ # See https://github.com/JuliaLinearAlgebra/Octavian.jl/issues/109
104110 if size (A, 2 ) != size (B, 1 )
105111 throw (DimensionMismatch (lazy " A has shape ($(size(A, 1)), $(size(A, 2))) but B has shape ($(size(B, 1)), $(size(B, 2)))" ))
106112 end
107-
108- @tturbo for n in indices ((C, B), 2 ), m in indices ((C, A), 1 )
109- Cmn = zero (eltype (C))
110- for k in indices ((A, B), (2 , 1 ))
111- Cmn += A[m, k] * B[k, n]
112- end
113- C[m, n] = Cmn
114- end
113+ Octavian. matmul! (C, A, B)
114+ return
115115end
116116
117117function __matmul_generic! (C:: AbstractMatrix , A:: AbstractMatrix , B:: AbstractMatrix )
@@ -151,6 +151,6 @@ function CRC.rrule(::typeof(matmuladd), opmode::LoopedArrayOp,
151151end
152152
153153# EnzymeRules
154- @enzyme_reverse_alternative __matmul_loopvec ! __matmul_generic!
154+ @enzyme_reverse_alternative __matmul_octavian ! __matmul_generic!
155155
156- @enzyme_reverse_alternative __matmuladd_loopvec ! __matmuladd_generic!
156+ @enzyme_reverse_alternative __matmuladd_octavian ! __matmuladd_generic!
0 commit comments