Skip to content

Commit 8b81d9b

Browse files
quic-calvnguyquic_calvnguy
andauthored
[QNN-EP] - Tie HTP power config id lifetime to session (#26457)
### Description - Remove PerThreadContext (Only used for id management) - Create ManagedHtpPowerConfigId to manage destruction of id - Only create one htp power config id per session (previously was one per thread) ### Motivation and Context One session can potentially be used on multiple threads for execution, and there is a set max number of htp power config ids that can be used at any given time. If enough sessions and enough threads are created, then the maximum number of power config ids can be reached easily (see ticket). Additionally, all power configurations are available on a per-session basis. Therefore, there is no reason to have more than one power config id per session. Removal of PerThreadContext is due to the fact that it was only used to contain and destroy the power config ids on thread termination. As such, there is no more need for PerThreadContext. --------- Co-authored-by: quic_calvnguy <quic_calvnguy@quic_inc.com>
1 parent 549d741 commit 8b81d9b

File tree

7 files changed

+202
-195
lines changed

7 files changed

+202
-195
lines changed

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,6 +1555,61 @@ Status QnnBackendManager::SetRpcPowerConfigs(uint32_t htp_power_config_client_id
15551555
return Status::OK();
15561556
}
15571557

1558+
Status QnnBackendManager::SetPerThreadHtpPowerConfigs(const std::thread::id& thread_id, bool pre_run) {
1559+
PerThreadHtpPowerConfigs_t htp_power_configs;
1560+
if (!GetPerThreadHtpPowerConfigMapping(thread_id, htp_power_configs)) {
1561+
return Status::OK();
1562+
}
1563+
1564+
auto htp_power_config_id = htp_power_configs.power_config_id;
1565+
if (pre_run) {
1566+
if (htp_power_configs.pre_run_perf_mode.has_value()) {
1567+
ORT_RETURN_IF_ERROR(SetHtpPowerConfig(htp_power_config_id, *htp_power_configs.pre_run_perf_mode));
1568+
}
1569+
1570+
if (htp_power_configs.rpc_configs.has_value()) {
1571+
ORT_RETURN_IF_ERROR(SetRpcPowerConfigs(htp_power_config_id,
1572+
htp_power_configs.rpc_configs->rpc_control_latency,
1573+
htp_power_configs.rpc_configs->rpc_polling_time));
1574+
}
1575+
} else if (htp_power_configs.post_run_perf_mode.has_value()) {
1576+
ORT_RETURN_IF_ERROR(SetHtpPowerConfig(htp_power_config_id, *htp_power_configs.post_run_perf_mode));
1577+
}
1578+
1579+
return Status::OK();
1580+
}
1581+
1582+
Status QnnBackendManager::AddPerThreadHtpPowerConfigMapping(const std::thread::id& thread_id,
1583+
const PerThreadHtpPowerConfigs_t& htp_power_configs) {
1584+
std::lock_guard<std::mutex> lock(per_thread_power_configs_mutex_);
1585+
1586+
auto res = per_thread_power_configs_.find(thread_id);
1587+
ORT_RETURN_IF(res != per_thread_power_configs_.end(), "Trying to set HtpPowerConfigs for thread id ", thread_id,
1588+
" but one already exists!");
1589+
1590+
per_thread_power_configs_.emplace(thread_id, std::move(htp_power_configs));
1591+
1592+
return Status::OK();
1593+
}
1594+
1595+
bool QnnBackendManager::GetPerThreadHtpPowerConfigMapping(const std::thread::id& thread_id,
1596+
PerThreadHtpPowerConfigs_t& htp_power_configs) {
1597+
std::lock_guard<std::mutex> lock(per_thread_power_configs_mutex_);
1598+
1599+
auto it = per_thread_power_configs_.find(thread_id);
1600+
if (it == per_thread_power_configs_.end()) {
1601+
return false;
1602+
}
1603+
1604+
htp_power_configs = it->second;
1605+
return true;
1606+
}
1607+
1608+
void QnnBackendManager::RemovePerThreadHtpPowerConfigMapping(const std::thread::id& thread_id) {
1609+
std::lock_guard<std::mutex> lock(per_thread_power_configs_mutex_);
1610+
per_thread_power_configs_.erase(thread_id);
1611+
}
1612+
15581613
Status QnnBackendManager::DestroyHTPPowerConfigID(uint32_t htp_power_config_id) {
15591614
QnnDevice_Infrastructure_t qnn_device_infra = nullptr;
15601615
auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra);

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <mutex>
1616
#include <string>
1717
#include <string_view>
18+
#include <thread>
1819
#include <unordered_map>
1920
#include <vector>
2021

@@ -172,6 +173,13 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
172173
uint32_t rpc_control_latency,
173174
uint32_t rpc_polling_time);
174175

176+
Status SetPerThreadHtpPowerConfigs(const std::thread::id& thread_id, bool pre_run);
177+
178+
Status AddPerThreadHtpPowerConfigMapping(const std::thread::id& thread_id,
179+
const PerThreadHtpPowerConfigs_t& htp_power_configs);
180+
181+
void RemovePerThreadHtpPowerConfigMapping(const std::thread::id& thread_id);
182+
175183
const QNN_INTERFACE_VER_TYPE& GetQnnInterface() { return qnn_interface_; }
176184

177185
const Qnn_ContextHandle_t& GetQnnContext(int index = 0) {
@@ -334,6 +342,9 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
334342
// Transfers ownership of `context_handle` (i.e., responsibility of freeing it) to this instance
335343
Status AddQnnContextHandle(Qnn_ContextHandle_t context_handle);
336344

345+
bool GetPerThreadHtpPowerConfigMapping(const std::thread::id& thread_id,
346+
PerThreadHtpPowerConfigs_t& htp_power_configs);
347+
337348
private:
338349
// assume Qnn_ContextHandle_t is a pointer and able to be wrapped with std::unique_ptr
339350
static_assert(std::is_pointer_v<Qnn_ContextHandle_t>);
@@ -464,6 +475,10 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
464475
uint32_t soc_model_ = QNN_SOC_MODEL_UNKNOWN;
465476
const std::vector<OpPackage> op_packages_;
466477
bool skip_qnn_version_check_ = false;
478+
479+
// Mapping of thread id to on-run-start/end power configs
480+
std::mutex per_thread_power_configs_mutex_;
481+
std::unordered_map<std::thread::id, PerThreadHtpPowerConfigs_t> per_thread_power_configs_;
467482
};
468483

469484
} // namespace qnn

onnxruntime/core/providers/qnn/builder/qnn_def.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,19 @@ enum class HtpPerformanceMode : uint8_t {
7171
kHtpExtremePowerSaver,
7272
};
7373

74+
typedef struct RpcPowerConfigs {
75+
uint32_t rpc_control_latency = 0;
76+
uint32_t rpc_polling_time = 0;
77+
} RpcPowerConfigs_t;
78+
79+
typedef struct PerThreadHtpPowerConfigs {
80+
std::optional<HtpPerformanceMode> pre_run_perf_mode;
81+
std::optional<HtpPerformanceMode> post_run_perf_mode;
82+
std::optional<RpcPowerConfigs_t> rpc_configs;
83+
84+
uint32_t power_config_id = 0;
85+
} PerThreadHtpPowerConfigs_t;
86+
7487
enum class ContextPriority : uint8_t {
7588
LOW = 0,
7689
NORMAL,

onnxruntime/core/providers/qnn/builder/qnn_model.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <iostream>
77
#include <fstream>
88
#include <gsl/gsl>
9+
#include <thread>
910
#include "QnnOpDef.h"
1011

1112
#include "core/providers/qnn/builder/op_builder_factory.h"
@@ -346,8 +347,11 @@ Status QnnModel::ExecuteGraph(const Ort::KernelContext& context,
346347
profiling_info.start_time = qnn::utils::GetTimeStampInUs();
347348
}
348349
#endif
349-
350350
auto profile_backend_handle = qnn_backend_manager_->GetQnnProfileHandle();
351+
352+
auto thread_id = std::this_thread::get_id();
353+
ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetPerThreadHtpPowerConfigs(thread_id, true));
354+
351355
execute_status = qnn_interface.graphExecute(graph_info_->Graph(),
352356
qnn_inputs.data(),
353357
static_cast<uint32_t>(qnn_inputs.size()),
@@ -363,6 +367,8 @@ Status QnnModel::ExecuteGraph(const Ort::KernelContext& context,
363367
}
364368
#endif
365369

370+
ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetPerThreadHtpPowerConfigs(thread_id, false));
371+
366372
// NOTE: This function returns immediately when profiling is disabled.
367373
// Extracting profiling data can be expensive, but it is typically only enabled for debugging purposes
368374
// and not in production. We can improve synchronization for event profiling if it becomes an issue.

0 commit comments

Comments
 (0)