diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc index 4d183b95bd938..0bb3accb4d754 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc @@ -76,6 +76,9 @@ Status BaseOpBuilder::ProcessDataTypes(QnnModelWrapper& qnn_model_wrapper, return CheckHtpDataTypes(input_qnn_dtypes, output_qnn_dtypes); } else if (IsGpuBackend(qnn_model_wrapper.GetQnnBackendType())) { return CheckGpuDataTypes(input_qnn_dtypes, output_qnn_dtypes); + } else if (IsIrBackend(qnn_model_wrapper.GetQnnBackendType())) { + // TODO: CheckIrDataTypes + return Status::OK(); } return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Only support backend: CPU, HTP and GPU"); } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.cc b/onnxruntime/core/providers/qnn/builder/qnn_def.cc index f3d81d7d2fdd7..9f28e2609faa1 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.cc @@ -574,6 +574,10 @@ bool QnnOpConfigWrapper::CreateQnnGraphOp(const QNN_INTERFACE_VER_TYPE& qnn_inte return true; } +bool IsIrBackend(QnnBackendType backend_type) { + return backend_type == QnnBackendType::SERIALIZER; +} + bool IsNpuBackend(QnnBackendType backend_type) { return backend_type == QnnBackendType::HTP || backend_type == QnnBackendType::DSP; } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h index 42f4d7bb60f34..77508f3934a20 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h @@ -96,6 +96,8 @@ enum class QnnBackendType : uint8_t { SERIALIZER, }; +bool IsIrBackend(QnnBackendType backend_type); + bool IsCpuBackend(QnnBackendType backend_type); bool IsNpuBackend(QnnBackendType backend_type); diff --git a/onnxruntime/test/providers/qnn/README.md b/onnxruntime/test/providers/qnn/README.md new file mode 100644 index 0000000000000..d1eab7d59bb9a --- /dev/null +++ b/onnxruntime/test/providers/qnn/README.md @@ -0,0 +1,51 @@ +# ONNX Runtime QNN Execution Provider Tests +## Overview +1. The `onnxruntime/test/providers/qnn` directory contains integration tests for the Qualcomm Neural Network (QNN) execution provider. +2. Most testcases run an ONNX model through the QNN-EP, then verifies the inference result against the one on CPU-EP + +## Building the Tests +The tests are built as part of the regular ONNX Runtime build. After a successful build you will have an executable named +- onnxruntime_provider_test.exe (Windows) +- onnxruntime_provider_test (Linux/macOS) + +## Running the Tests +1. QNN supports several backends. You can use the standard Google‑Test syntax for filtering: + - `onnxruntime_provider_test.exe --gtest_filter=QnnCPUBackendTests.*` + - `onnxruntime_provider_test.exe --gtest_filter=QnnHTPBackendTests.*` + - `onnxruntime_provider_test.exe --gtest_filter=QnnGPUBackendTests.*` + - `onnxruntime_provider_test.exe --gtest_filter=QnnIRBackendTests.*` +2. Saving Test Artifacts + - For debugging it is often helpful to keep the intermediate files that the tests generate. The following custom flags are + recognized by the test binary: + - `--dump_onnx`: Saves the input ONNX model used for the test + - `--dump_json`: Save json qnn graph with provider_option `dump_json_qnn_graph` + - `--dump_dlc`: Saves the compiled QNN DLC file by specifying the provider_option `backend_path` to `QnnIr.dll` + - The artifacts will be saved to a directory named with `_` + ``` + . + ├── QnnCPUBackendTests_BatchNorm2D_fp32 # RunQnnModelTest + │ ├── dumped_f32_model.onnx # float32 ONNX model + │ ├── QNNExecutionProvider_QNN_XXXX_X_X.dlc + │ └── QNNExecutionProvider_QNN_XXXX_X_X.json + ├── QnnHTPBackendTests_BatchNorm_FP16 # TestFp16ModelAccuracy + │ ├── dumped_f16_model.onnx # float16 ONNX model + │ ├── dumped_f32_model.onnx # float32 ONNX model + │ ├── QNNExecutionProvider_QNN_XXXX_X_X.dlc + │ └── QNNExecutionProvider_QNN_XXXX_X_X.json + └── QnnHTPBackendTests_BatchNorm2D_U8U8S32 # TestQDQModelAccuracy + ├── dumped_f32_model.onnx # float32 ONNX model + ├── dumped_qdq_model.onnx # QDQ ONNX model + ├── QNNExecutionProvider_QNN_XXXX_X_X.dlc + └── QNNExecutionProvider_QNN_XXXX_X_X.json + + # All artifact files are placed under the current working directory from which the test binary is invoked. + ``` +3. Verbose + - `--verbose`: Sets the ONNX Runtime log level to `ORT_LOGGING_LEVEL_VERBOSE` + +4. You can enable any combination of these flags, for example: + - `onnxruntime_provider_test.exe --gtest_filter=QnnHTPBackendTests.* --dump_onnx --dump_json --dump_dlc --verbose` + +# Note +- An issue on QNN backends can prevent the test artifacts from being successfully saved. +- The `onnxruntime_provider_test.exe` does not automatically delete the artifact directories, so you may want to prune them after a debugging session. diff --git a/onnxruntime/test/providers/qnn/qnn_test_env.h b/onnxruntime/test/providers/qnn/qnn_test_env.h new file mode 100644 index 0000000000000..326b3d9e65ad0 --- /dev/null +++ b/onnxruntime/test/providers/qnn/qnn_test_env.h @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include + +class QNNTestEnvironment { + public: + // Constructor takes argc and argv directly + explicit QNNTestEnvironment(int argc, char** argv) { + ParseCommandLineFlags(argc, argv); + } + + bool dump_onnx() const { return dump_onnx_; } + bool dump_json() const { return dump_json_; } + bool dump_dlc() const { return dump_dlc_; } + bool verbose() const { return verbose_; } + + public: + std::filesystem::path CreateTestcaseDirs() { + std::string test_suite_name = ::testing::UnitTest::GetInstance()->current_test_info()->test_suite_name(); + std::string test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); + std::filesystem::path output_dir = std::filesystem::current_path() / (test_suite_name + "_" + test_name); + std::filesystem::create_directories(output_dir); + + return output_dir; + } + + private: + void ParseCommandLineFlags(int argc, char** argv) { + for (int i = 1; i < argc; ++i) { + const std::string arg = argv[i]; + if (arg == "--dump_onnx") { + std::cout << "[QNN only] ONNX model dumping enabled." << std::endl; + dump_onnx_ = true; + } else if (arg == "--dump_json") { + std::cout << "[QNN only] Json QNN Graph dumping enabled." << std::endl; + dump_json_ = true; + } else if (arg == "--dump_dlc") { + std::cout << "[QNN only] DLC dumping enabled." << std::endl; + dump_dlc_ = true; + } else if (arg == "--verbose") { + std::cout << "Verbose enabled" << std::endl; + verbose_ = true; + } + } + } + + bool dump_onnx_ = false; + bool dump_json_ = false; + bool dump_dlc_ = false; + bool verbose_ = false; +}; diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.cc b/onnxruntime/test/providers/qnn/qnn_test_utils.cc index 1c70f4012090e..6b56e90125c79 100644 --- a/onnxruntime/test/providers/qnn/qnn_test_utils.cc +++ b/onnxruntime/test/providers/qnn/qnn_test_utils.cc @@ -101,6 +101,10 @@ void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions prov int opset_version, ExpectedEPNodeAssignment expected_ep_assignment, float fp32_abs_err, logging::Severity log_severity, bool verify_outputs, std::function* ep_graph_checker) { + std::filesystem::path output_dir; + if (qnn_env->dump_onnx() || qnn_env->dump_dlc() || qnn_env->dump_json()) { + output_dir = qnn_env->CreateTestcaseDirs(); + } EPVerificationParams verification_params; verification_params.ep_node_assignment = expected_ep_assignment; verification_params.fp32_abs_err = fp32_abs_err; @@ -110,6 +114,10 @@ void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions prov auto& logging_manager = DefaultLoggingManager(); logging_manager.SetDefaultLoggerSeverity(log_severity); + if (qnn_env->verbose()) { + logging_manager.RemoveSink(logging::SinkType::EtwSink); + logging_manager.SetDefaultLoggerSeverity(logging::Severity::kVERBOSE); + } onnxruntime::Model model("QNN_EP_TestModel", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, {}, @@ -123,7 +131,27 @@ void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions prov // Serialize the model to a string. std::string model_data; model.ToProto().SerializeToString(&model_data); + + if (qnn_env->dump_onnx()) { + auto dump_path = output_dir / ToPathString("dumped_f32_model.onnx"); + LOGS(logging_manager.DefaultLogger(), VERBOSE) << "Save onnx model at: " << dump_path; + ASSERT_STATUS_OK(onnxruntime::Model::Save(model, dump_path)); + } + TryEnableQNNSaver(provider_options); + if (qnn_env->dump_dlc()) { + provider_options["dump_qnn_ir_dlc"] = "1"; + provider_options["dump_qnn_ir_dlc_dir"] = output_dir.string(); +#if defined(_WIN32) + provider_options["qnn_ir_backend_path"] = "QnnIr.dll"; +#else + provider_options["qnn_ir_backend_path"] = "libQnnIr.so"; +#endif // defined(_WIN32) + } + if (qnn_env->dump_json()) { + provider_options["dump_json_qnn_graph"] = "1"; + provider_options["json_qnn_graph_dir"] = output_dir.string(); + } RunAndVerifyOutputsWithEP(AsByteSpan(model_data.data(), model_data.size()), "QNN_EP_TestLogID", QnnExecutionProviderWithOptions(provider_options), helper.feeds_, verification_params, @@ -134,11 +162,19 @@ void RunQnnModelTestHTPNoVerify(const GetTestModelFn& build_test_case, ProviderO int opset_version, ExpectedEPNodeAssignment expected_ep_assignment, logging::Severity log_severity, std::function* ep_graph_checker) { + std::filesystem::path output_dir; + if (qnn_env->dump_onnx() || qnn_env->dump_dlc() || qnn_env->dump_json()) { + output_dir = qnn_env->CreateTestcaseDirs(); + } // Add kMSDomain to cover contrib op like Gelu const std::unordered_map domain_to_version = {{"", opset_version}, {kMSDomain, 1}}; auto& logging_manager = DefaultLoggingManager(); logging_manager.SetDefaultLoggerSeverity(log_severity); + if (qnn_env->verbose()) { + logging_manager.RemoveSink(logging::SinkType::EtwSink); + logging_manager.SetDefaultLoggerSeverity(logging::Severity::kVERBOSE); + } onnxruntime::Model model("QNN_EP_TestModel", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, {}, @@ -152,7 +188,27 @@ void RunQnnModelTestHTPNoVerify(const GetTestModelFn& build_test_case, ProviderO // Serialize the model to a string. std::string model_data; model.ToProto().SerializeToString(&model_data); + + if (qnn_env->dump_onnx()) { + auto dump_path = output_dir / ToPathString("dumped_f32_model.onnx"); + LOGS(logging_manager.DefaultLogger(), VERBOSE) << "Save onnx model at: " << dump_path; + ASSERT_STATUS_OK(onnxruntime::Model::Save(model, dump_path)); + } + TryEnableQNNSaver(provider_options); + if (qnn_env->dump_dlc()) { + provider_options["dump_qnn_ir_dlc"] = "1"; + provider_options["dump_qnn_ir_dlc_dir"] = output_dir.string(); +#if defined(_WIN32) + provider_options["qnn_ir_backend_path"] = "QnnIr.dll"; +#else + provider_options["qnn_ir_backend_path"] = "libQnnIr.so"; +#endif // defined(_WIN32) + } + if (qnn_env->dump_json()) { + provider_options["dump_json_qnn_graph"] = "1"; + provider_options["json_qnn_graph_dir"] = output_dir.string(); + } SessionOptions so; so.session_logid = "QNN_EP_TestLogID"; diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.h b/onnxruntime/test/providers/qnn/qnn_test_utils.h index aeb3a9a114871..65c51ee8f8ffa 100644 --- a/onnxruntime/test/providers/qnn/qnn_test_utils.h +++ b/onnxruntime/test/providers/qnn/qnn_test_utils.h @@ -20,6 +20,11 @@ #include "gtest/gtest.h" +#include "qnn_test_env.h" + +// in test_main.cc +extern std::unique_ptr qnn_env; + namespace onnxruntime { namespace test { @@ -529,15 +534,19 @@ inline void TestQDQModelAccuracy(const GetTestModelFn& f32_model_fn, const GetTe const std::string& qnn_ctx_model_path = "", const std::unordered_map& session_option_pairs = {}, std::function* qnn_ep_graph_checker = nullptr) { + std::filesystem::path output_dir; + if (qnn_env->dump_onnx() || qnn_env->dump_dlc() || qnn_env->dump_json()) { + output_dir = qnn_env->CreateTestcaseDirs(); + } // Add kMSDomain to cover contrib op like Gelu const std::unordered_map domain_to_version = {{"", opset_version}, {kMSDomain, 1}}; auto& logging_manager = DefaultLoggingManager(); - - // Uncomment to dump LOGGER() output to stdout. - // logging_manager.RemoveSink(logging::SinkType::EtwSink); - logging_manager.SetDefaultLoggerSeverity(log_severity); + if (qnn_env->verbose()) { + logging_manager.RemoveSink(logging::SinkType::EtwSink); + logging_manager.SetDefaultLoggerSeverity(logging::Severity::kVERBOSE); + } // Create float model and serialize it to a string. onnxruntime::Model f32_model("f32_model", false, ModelMetaData(), PathString(), @@ -551,8 +560,11 @@ inline void TestQDQModelAccuracy(const GetTestModelFn& f32_model_fn, const GetTe ASSERT_STATUS_OK(f32_model.MainGraph().Resolve()); f32_model.ToProto().SerializeToString(&f32_model_data); - // Uncomment to save f32 model to disk for debugging. - // ASSERT_STATUS_OK(onnxruntime::Model::Save(f32_model, ToPathString("cmp_accuracy.f32.onnx"))); + if (qnn_env->dump_onnx()) { + auto dump_path = output_dir / ToPathString("dumped_f32_model.onnx"); + LOGS(logging_manager.DefaultLogger(), VERBOSE) << "Save onnx float32 model at: " << dump_path; + ASSERT_STATUS_OK(onnxruntime::Model::Save(f32_model, dump_path)); + } // Run f32 model on CPU EP and collect outputs. std::vector cpu_f32_outputs; @@ -594,11 +606,27 @@ inline void TestQDQModelAccuracy(const GetTestModelFn& f32_model_fn, const GetTe ASSERT_STATUS_OK(qdq_model.MainGraph().Resolve()); qdq_model.ToProto().SerializeToString(&qdq_model_data); - // Uncomment to save QDQ model to disk for debugging. - // ASSERT_STATUS_OK(onnxruntime::Model::Save(qdq_model, ToPathString("cmp_accuracy.qdq.onnx"))); + if (qnn_env->dump_onnx()) { + auto dump_path = output_dir / ToPathString("dumped_qdq_model.onnx"); + LOGS(logging_manager.DefaultLogger(), VERBOSE) << "Save onnx QDQ model at: " << dump_path; + ASSERT_STATUS_OK(onnxruntime::Model::Save(qdq_model, dump_path)); + } bool is_qnn_ep = true; TryEnableQNNSaver(qnn_options); + if (qnn_env->dump_dlc()) { + qnn_options["dump_qnn_ir_dlc"] = "1"; + qnn_options["dump_qnn_ir_dlc_dir"] = output_dir.string(); +#if defined(_WIN32) + qnn_options["qnn_ir_backend_path"] = "QnnIr.dll"; +#else + qnn_options["qnn_ir_backend_path"] = "libQnnIr.so"; +#endif // defined(_WIN32) + } + if (qnn_env->dump_json()) { + qnn_options["dump_json_qnn_graph"] = "1"; + qnn_options["json_qnn_graph_dir"] = output_dir.string(); + } std::vector qnn_qdq_outputs; if (!qnn_ctx_model_path.empty()) { onnx::ModelProto model_proto; @@ -743,11 +771,19 @@ inline void TestFp16ModelAccuracy(const GetTestModelFn& f32_model_fn, logging::Severity log_severity = logging::Severity::kERROR, const std::string& qnn_ctx_model_path = "", const std::unordered_map& session_option_pairs = {}) { + std::filesystem::path output_dir; + if (qnn_env->dump_onnx() || qnn_env->dump_dlc() || qnn_env->dump_json()) { + output_dir = qnn_env->CreateTestcaseDirs(); + } // Add kMSDomain to cover contrib op like Gelu const std::unordered_map domain_to_version = {{"", opset_version}, {kMSDomain, 1}}; auto& logging_manager = DefaultLoggingManager(); logging_manager.SetDefaultLoggerSeverity(log_severity); + if (qnn_env->verbose()) { + logging_manager.RemoveSink(logging::SinkType::EtwSink); + logging_manager.SetDefaultLoggerSeverity(logging::Severity::kVERBOSE); + } // Create float model and serialize it to a string. onnxruntime::Model f32_model("f32_model", false, ModelMetaData(), PathString(), @@ -760,6 +796,12 @@ inline void TestFp16ModelAccuracy(const GetTestModelFn& f32_model_fn, ASSERT_STATUS_OK(f32_model.MainGraph().Resolve()); f32_model.ToProto().SerializeToString(&f32_model_data); + if (qnn_env->dump_onnx()) { + auto dump_path = output_dir / ToPathString("dumped_f32_model.onnx"); + LOGS(logging_manager.DefaultLogger(), VERBOSE) << "Save onnx float32 model at: " << dump_path; + ASSERT_STATUS_OK(onnxruntime::Model::Save(f32_model, dump_path)); + } + // Run f32 model on CPU EP and collect outputs. std::vector cpu_f32_outputs; InferenceModel(f32_model_data, "f32_model_logger", {}, ExpectedEPNodeAssignment::All, @@ -796,8 +838,27 @@ inline void TestFp16ModelAccuracy(const GetTestModelFn& f32_model_fn, ASSERT_STATUS_OK(f16_model.MainGraph().Resolve()); f16_model.ToProto().SerializeToString(&f16_model_data); + if (qnn_env->dump_onnx()) { + auto dump_path = output_dir / ToPathString("dumped_f16_model.onnx"); + LOGS(logging_manager.DefaultLogger(), VERBOSE) << "Save onnx float16 model at: " << dump_path; + ASSERT_STATUS_OK(onnxruntime::Model::Save(f16_model, dump_path)); + } + bool is_qnn_ep = true; TryEnableQNNSaver(qnn_options); + if (qnn_env->dump_dlc()) { + qnn_options["dump_qnn_ir_dlc"] = "1"; + qnn_options["dump_qnn_ir_dlc_dir"] = output_dir.string(); +#if defined(_WIN32) + qnn_options["qnn_ir_backend_path"] = "QnnIr.dll"; +#else + qnn_options["qnn_ir_backend_path"] = "libQnnIr.so"; +#endif // defined(_WIN32) + } + if (qnn_env->dump_json()) { + qnn_options["dump_json_qnn_graph"] = "1"; + qnn_options["json_qnn_graph_dir"] = output_dir.string(); + } std::vector qnn_f16_outputs; if (!qnn_ctx_model_path.empty()) { onnx::ModelProto model_proto; diff --git a/onnxruntime/test/unittest_main/test_main.cc b/onnxruntime/test/unittest_main/test_main.cc index 117a26d48efe9..e7ce8a104f3ef 100644 --- a/onnxruntime/test/unittest_main/test_main.cc +++ b/onnxruntime/test/unittest_main/test_main.cc @@ -37,7 +37,14 @@ #include "test/unittest_util/test_dynamic_plugin_ep.h" #endif // defined(TEST_MAIN_ENABLE_DYNAMIC_PLUGIN_EP_USAGE) +#ifdef USE_QNN +#include "test/providers/qnn/qnn_test_env.h" +#endif + std::unique_ptr ort_env; +#ifdef USE_QNN +std::unique_ptr qnn_env; +#endif // define environment variable name constants here namespace env_var_names { @@ -151,6 +158,9 @@ int TEST_MAIN(int argc, char** argv) { ORT_TRY { ortenv_setup(); ::testing::InitGoogleTest(&argc, argv); +#ifdef USE_QNN + qnn_env = std::make_unique(argc, argv); +#endif status = RUN_ALL_TESTS(); }