Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/vllm_ascend_test_nightly_a3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,21 @@ jobs:
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3'
tests: ${{ matrix.test_config.tests }}
name: ${{ matrix.test_config.name }}
custom-ops-tests:
name: test ops
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
needs: multi-node-tests
strategy:
fail-fast: false
matrix:
test_config:
- name: custom-op-dispatch_gmm_combine_decode
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this file is for model only, no need to add it here. @Potabk

Copy link
Collaborator

@Potabk Potabk Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can add a new job for the ops at the end of https://github.com/vllm-project/vllm-ascend/blob/main/.github/workflows/vllm_ascend_test_nightly_a3.yaml , That's enough for your PR; I'll do a more detailed restructuring later.

  custom-ops-tests:
    name: test ops
    if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
    needs: multi-node-tests
    strategy:
      fail-fast: false
      matrix:
        test_config:
          - name: custom-op-dispatch_gmm_combine_decode
            os: linux-aarch64-a3-16
            tests: tests/e2e/nightly/multicard_ops/test_dispatch_gmm_combine_decode.py
    uses: ./.github/workflows/_e2e_nightly_single_node.yaml
    with:
      runner: ${{ matrix.test_config.os }}
      vllm: 0.12.0
      image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3'
      tests: ${{ matrix.test_config.tests }}
      name: ${{ matrix.test_config.name }}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

os: linux-aarch64-a3-16
tests: tests/e2e/nightly/multicard_ops/test_dispatch_gmm_combine_decode.py
uses: ./.github/workflows/_e2e_nightly_single_node.yaml
with:
runner: ${{ matrix.test_config.os }}
vllm: v0.12.0
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3'
tests: ${{ matrix.test_config.tests }}
name: ${{ matrix.test_config.name }}
59 changes: 32 additions & 27 deletions csrc/build_aclnn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
ROOT_DIR=$1
SOC_VERSION=$2

git config --global --add safe.directory "$ROOT_DIR"

if [[ "$SOC_VERSION" =~ ^ascend310 ]]; then
# ASCEND310P series
# currently, no custom aclnn ops for ASCEND310 series
Expand All @@ -13,41 +11,48 @@ if [[ "$SOC_VERSION" =~ ^ascend310 ]]; then
exit 0
elif [[ "$SOC_VERSION" =~ ^ascend910b ]]; then
# ASCEND910B (A2) series
CUSTOM_OPS="grouped_matmul_swiglu_quant_weight_nz_tensor_list;lightning_indexer;sparse_flash_attention;dispatch_ffn_combine"
CUSTOM_OPS="grouped_matmul_swiglu_quant_weight_nz_tensor_list;lightning_indexer;sparse_flash_attention"
SOC_ARG="ascend910b"
elif [[ "$SOC_VERSION" =~ ^ascend910_93 ]]; then
# ASCEND910C (A3) series
CUSTOM_OPS="grouped_matmul_swiglu_quant_weight_nz_tensor_list;lightning_indexer;sparse_flash_attention;dispatch_ffn_combine"
# depdendency: catlass
git config --global --add safe.directory "$ROOT_DIR"
CATLASS_PATH=${ROOT_DIR}/csrc/third_party/catlass/include
if [[ ! -d "${CATLASS_PATH}" ]]; then
echo "depdendency catlass is missing, try to fetch it..."
if ! git submodule update --init --recursive; then
echo "fetch failed"
exit 1
fi
fi
# depdendency: cann-toolkit file moe_distribute_base.h
HCCL_STRUCT_FILE_PATH=$(find -L "${ASCEND_TOOLKIT_HOME}" -name "moe_distribute_base.h" 2>/dev/null | head -n1)
if [ -z "$HCCL_STRUCT_FILE_PATH" ]; then
echo "cannot find moe_distribute_base.h file in CANN env"
exit 1
fi
# for dispatch_gmm_combine_decode
yes | cp "${HCCL_STRUCT_FILE_PATH}" "${ROOT_DIR}/csrc/dispatch_gmm_combine_decode/op_kernel"
# for dispatch_ffn_combine
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
TARGET_DIR="$SCRIPT_DIR/dispatch_ffn_combine/op_kernel/utils/"
TARGET_FILE="$TARGET_DIR/$(basename "$HCCL_STRUCT_FILE_PATH")"

echo "*************************************"
echo $HCCL_STRUCT_FILE_PATH
echo "$TARGET_DIR"
cp "$HCCL_STRUCT_FILE_PATH" "$TARGET_DIR"

sed -i 's/struct HcclOpResParam {/struct HcclOpResParamCustom {/g' "$TARGET_FILE"
sed -i 's/struct HcclRankRelationResV2 {/struct HcclRankRelationResV2Custom {/g' "$TARGET_FILE"
CUSTOM_OPS="grouped_matmul_swiglu_quant_weight_nz_tensor_list;lightning_indexer;sparse_flash_attention;dispatch_ffn_combine;dispatch_gmm_combine_decode;"
SOC_ARG="ascend910_93"
else
# others
# currently, no custom aclnn ops for other series
exit 0
fi

git submodule init
git submodule update


# For the compatibility of CANN8.5 and CANN8.3: copy and modify moe_distribute_base.h
file_path=$(find /usr/local/Ascend/ascend-toolkit -name "moe_distribute_base.h" 2>/dev/null | head -n1)
if [ -z "$file_path" ]; then
echo "cannot find moe_distribute_base.h file in CANN env"
exit 1
fi

SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
TARGET_DIR="$SCRIPT_DIR/dispatch_ffn_combine/op_kernel/utils/"
TARGET_FILE="$TARGET_DIR/$(basename "$file_path")"

echo "*************************************"
echo $file_path
echo "$TARGET_DIR"
cp "$file_path" "$TARGET_DIR"

sed -i 's/struct HcclOpResParam {/struct HcclOpResParamCustom {/g' "$TARGET_FILE"
sed -i 's/struct HcclRankRelationResV2 {/struct HcclRankRelationResV2Custom {/g' "$TARGET_FILE"


# build custom ops
cd csrc
Expand Down
59 changes: 59 additions & 0 deletions csrc/dispatch_gmm_combine_decode/op_host/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (c) 2025 Huawei Technologies Co., Ltd.
# This file is a part of the CANN Open Software.
# Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
# Please refer to the License for details. You may not use this file except in compliance with the License.
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
# See LICENSE in the root of the software repository for the full text of the License.
# ======================================================================================================================

set(_DISPATCH_GMM_INC_OPTS)
if (EXISTS ${CMAKE_SOURCE_DIR}/third_party/catlass/include)
list(APPEND _DISPATCH_GMM_INC_OPTS -I${CMAKE_SOURCE_DIR}/third_party/catlass/include)
else()
message(FATAL_ERROR "dependency catlass is missing, you can fetch it by running 'git submodule update --init --recursive'")
endif()

add_ops_compile_options(
OP_NAME DispatchGmmCombineDecode
OPTIONS --cce-auto-sync=off
-Wno-deprecated-declarations
-Werror
${_DISPATCH_GMM_INC_OPTS}
)

target_sources(op_host_aclnnInner PRIVATE
dispatch_gmm_combine_decode_def.cpp
)

target_sources(opapi PRIVATE
aclnn_dispatch_gmm_combine_decode.cpp
)

if (NOT BUILD_OPEN_PROJECT)
target_sources(aclnn_ops_train PRIVATE
aclnn_dispatch_gmm_combine_decode.cpp
)

target_sources(aclnn_ops_infer PRIVATE
aclnn_dispatch_gmm_combine_decode.cpp
)
endif ()

target_sources(optiling PRIVATE
dispatch_gmm_combine_decode_tiling.cpp
)

target_include_directories(optiling PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
)

target_sources(opsproto PRIVATE
dispatch_gmm_combine_decode_proto.cpp
)

file(GLOB _GMM_Aclnn_header "${CMAKE_CURRENT_SOURCE_DIR}/aclnn_dispatch_gmm_combine_decode.h")

install(FILES ${_GMM_Aclnn_header}
DESTINATION ${ACLNN_INC_INSTALL_DIR} OPTIONAL
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This file is a part of the CANN Open Software.
* Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#include <string.h>
#include "graph/types.h"
#include "aclnn/opdev/platform.h"
#include "aclnn_dispatch_gmm_combine_decode.h"

enum NnopbaseHcclServerType {
NNOPBASE_HCCL_SERVER_TYPE_AICPU = 0,
NNOPBASE_HCCL_SERVER_TYPE_MTE,
NNOPBASE_HCCL_SERVER_TYPE_END
};
extern "C" void __attribute__((weak)) NnopbaseSetHcclServerType(void *executor, NnopbaseHcclServerType sType);

#ifdef __cplusplus
extern "C" {
#endif

extern aclnnStatus aclnnInnerDispatchGmmCombineDecodeGetWorkspaceSize(
const aclTensor *x,
const aclTensor *expertIds,
const aclTensor *gmm1PermutedWeight,
const aclTensor *gmm1PermutedWeightScale,
const aclTensor *gmm2Weight,
const aclTensor *gmm2WeightScale,
const aclTensor *expertSmoothScalesOptional,
const aclTensor *expertScalesOptional,
char *groupEp,
int64_t epRankSize,
int64_t epRankId,
int64_t moeExpertNum,
int64_t shareExpertNum,
int64_t shareExpertRankNum,
int64_t quantMode,
int64_t globalBs,
const aclTensor *output,
const aclTensor *epRecvCount,
uint64_t *workspaceSize,
aclOpExecutor **executor);
extern aclnnStatus aclnnInnerDispatchGmmCombineDecode(
void *workspace,
uint64_t workspaceSize,
aclOpExecutor *executor,
aclrtStream stream);

aclnnStatus aclnnDispatchGmmCombineDecodeGetWorkspaceSize(
const aclTensor *x,
const aclTensor *expertIds,
const aclTensor *gmm1PermutedWeight,
const aclTensor *gmm1PermutedWeightScale,
const aclTensor *gmm2Weight,
const aclTensor *gmm2WeightScale,
const aclTensor *expertSmoothScalesOptional,
const aclTensor *expertScalesOptional,
char *groupEp,
int64_t epRankSize,
int64_t epRankId,
int64_t moeExpertNum,
int64_t shareExpertNum,
int64_t shareExpertRankNum,
int64_t quantMode,
int64_t globalBs,
const aclTensor *output,
const aclTensor *epRecvCount,
uint64_t *workspaceSize,
aclOpExecutor **executor)
{
return aclnnInnerDispatchGmmCombineDecodeGetWorkspaceSize(x, expertIds, gmm1PermutedWeight, gmm1PermutedWeightScale,
gmm2Weight, gmm2WeightScale, expertSmoothScalesOptional, expertScalesOptional, groupEp, epRankSize,
epRankId, moeExpertNum, shareExpertNum, shareExpertRankNum, quantMode, globalBs,
output, epRecvCount, workspaceSize, executor);
}

aclnnStatus aclnnDispatchGmmCombineDecode(
void *workspace,
uint64_t workspaceSize,
aclOpExecutor *executor,
aclrtStream stream)
{
if (NnopbaseSetHcclServerType) {
if (op::GetCurrentPlatformInfo().GetSocVersion() == op::SocVersion::ASCEND910B) {
NnopbaseSetHcclServerType(executor, NNOPBASE_HCCL_SERVER_TYPE_AICPU);
} else {
NnopbaseSetHcclServerType(executor, NNOPBASE_HCCL_SERVER_TYPE_MTE);
}
}
return aclnnInnerDispatchGmmCombineDecode(workspace, workspaceSize, executor, stream);
}

#ifdef __cplusplus
}
#endif


Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) 2025 Huawei Technologies Co., Ltd.
* This file is a part of the CANN Open Software.
* Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
* Please refer to the License for details. You may not use this file except in compliance with the License.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
* INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
* See LICENSE in the root of the software repository for the full text of the License.
*/
#ifndef DISPATCH_GMM_COMBINE_DECODE
#define DISPATCH_GMM_COMBINE_DECODE

#include "aclnn/acl_meta.h"

#ifdef __cplusplus
extern "C" {
#endif

__attribute__((visibility("default"))) aclnnStatus aclnnDispatchGmmCombineDecodeGetWorkspaceSize(
const aclTensor *x,
const aclTensor *expertIds,
const aclTensor *gmm1PermutedWeight,
const aclTensor *gmm1PermutedWeightScale,
const aclTensor *gmm2Weight,
const aclTensor *gmm2WeightScale,
const aclTensor *expertSmoothScalesOptional,
const aclTensor *expertScalesOptional,
char *groupEp,
int64_t epRankSize,
int64_t epRankId,
int64_t moeExpertNum,
int64_t shareExpertNum,
int64_t shareExpertRankNum,
int64_t quantMode,
int64_t globalBs,
const aclTensor *output,
const aclTensor *epRecvCount,
uint64_t *workspaceSize,
aclOpExecutor **executor);

__attribute__((visibility("default"))) aclnnStatus aclnnDispatchGmmCombineDecode(
void *workspace,
uint64_t workspaceSize,
aclOpExecutor *executor,
aclrtStream stream);

#ifdef __cplusplus
}
#endif

#endif
Loading
Loading