Skip to content

Commit ffe772b

Browse files
committed
Start epilogue
Signed-off-by: ElizaWszola <[email protected]>
1 parent c570c69 commit ffe772b

File tree

9 files changed

+650
-60
lines changed

9 files changed

+650
-60
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
264264
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0 AND SCALED_MM_3X_ARCHS)
265265
set(SRCS
266266
"csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu"
267-
"csrc/quantization/cutlass_w8a8/grouped_gemm_test.cu")
267+
"csrc/quantization/cutlass_w8a8/grouped_gemm_c3x.cu")
268268
set_gencode_flags_for_srcs(
269269
SRCS "${SRCS}"
270270
CUDA_ARCHS "${SCALED_MM_3X_ARCHS}")

csrc/cpu/torch_bindings.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,8 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
123123
"cutlass_grouped_mm(Tensor! out, Tensor a, Tensor b, Tensor a_scales, "
124124
" Tensor b_scales, Tensor problem_sizes, "
125125
" Tensor out_offsets, Tensor a_offsets, "
126-
" Tensor b_offsets) -> ()");
126+
" Tensor b_offsets, Tensor a_scales_offsets, "
127+
" Tensor b_scales_offsets) -> ()");
127128
ops.impl("cutlass_grouped_mm", torch::kCUDA, &cutlass_grouped_mm);
128129
// w8a8 GEMM, supporting asymmetric per-tensor or per-row/column
129130
// quantization.

0 commit comments

Comments
 (0)