@@ -512,9 +512,9 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
512512 # The cutlass_scaled_mm kernels for Blackwell SM100 (c3x, i.e. CUTLASS 3.x)
513513 # require CUDA 12.8 or later
514514 if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
515- cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f;12.0f " "${CUDA_ARCHS} " )
515+ cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f" "${CUDA_ARCHS} " )
516516 else ()
517- cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a;10.1a;10.3a;12.0a;12.1a " "${CUDA_ARCHS} " )
517+ cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a;10.1a;10.3a" "${CUDA_ARCHS} " )
518518 endif ()
519519 if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND SCALED_MM_ARCHS)
520520 set (SRCS
@@ -619,9 +619,9 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
619619
620620 # FP4 Archs and flags
621621 if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
622- cuda_archs_loose_intersection(FP4_ARCHS "10.0f;11.0f;12.0f " "${CUDA_ARCHS} " )
622+ cuda_archs_loose_intersection(FP4_ARCHS "10.0f;11.0f" "${CUDA_ARCHS} " )
623623 else ()
624- cuda_archs_loose_intersection(FP4_ARCHS "10.0a;10.1a;12.0a;12.1a " "${CUDA_ARCHS} " )
624+ cuda_archs_loose_intersection(FP4_ARCHS "10.0a;10.1a;10.3a " "${CUDA_ARCHS} " )
625625 endif ()
626626 if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND FP4_ARCHS)
627627 set (SRCS
@@ -695,7 +695,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
695695 if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
696696 cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f" "${CUDA_ARCHS} " )
697697 else ()
698- cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a" "${CUDA_ARCHS} " )
698+ cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a;10.1a;10.3a " "${CUDA_ARCHS} " )
699699 endif ()
700700 if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND SCALED_MM_ARCHS)
701701 set (SRCS "csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm100.cu" )
@@ -741,9 +741,9 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
741741 endif ()
742742
743743 if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0)
744- cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f;12.0f " "${CUDA_ARCHS} " )
744+ cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f" "${CUDA_ARCHS} " )
745745 else ()
746- cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a;10.1a;10.3a;12.0a;12.1a " "${CUDA_ARCHS} " )
746+ cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a;10.1a;10.3a" "${CUDA_ARCHS} " )
747747 endif ()
748748 if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND SCALED_MM_ARCHS)
749749 set (SRCS "csrc/quantization/w8a8/cutlass/moe/blockwise_scaled_group_mm_sm100.cu" )
0 commit comments