Skip to content

Commit bd9c8fe

Browse files
committed
[CPU][I64] Transformation & config.
1 parent bc26142 commit bd9c8fe

File tree

17 files changed

+269
-82
lines changed

17 files changed

+269
-82
lines changed

src/common/transformations/src/transformations/convert_precision.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,14 @@ inline int32_t convert_value<uint32_t, int32_t>(uint32_t val) {
871871
return static_cast<int32_t>(val);
872872
}
873873

874+
template <>
875+
inline int64_t convert_value<uint64_t, int64_t>(uint64_t val) {
876+
if (val > static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
877+
return std::numeric_limits<int64_t>::max();
878+
}
879+
return static_cast<int64_t>(val);
880+
}
881+
874882
namespace {
875883
template <ov::element::Type_t PREC_FROM, ov::element::Type_t PREC_TO>
876884
std::shared_ptr<ngraph::Node> change_constant_precision(std::shared_ptr<opset4::Constant>& constant) {
@@ -1110,7 +1118,9 @@ bool fuse_type_to_constant(const std::shared_ptr<ngraph::Node>& node,
11101118
const auto& to = it->second;
11111119
if (auto constant = ov::as_type_ptr<opset4::Constant>(node)) {
11121120
std::shared_ptr<ngraph::Node> new_const;
1113-
if (from == ov::element::u64 && to == ov::element::i32) {
1121+
if (from == ov::element::u64 && to == ov::element::i64) {
1122+
new_const = change_constant_precision<ov::element::Type_t::u64, ov::element::Type_t::i64>(constant);
1123+
} else if (from == ov::element::u64 && to == ov::element::i32) {
11141124
new_const = change_constant_precision<ov::element::Type_t::u64, ov::element::Type_t::i32>(constant);
11151125
} else if (from == ov::element::i64 && to == ov::element::i32) {
11161126
new_const = change_constant_precision<ov::element::Type_t::i64, ov::element::Type_t::i32>(constant);

src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(ENABLE);
110110
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(IGNORE_CALLBACK);
111111
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(DISABLE);
112112

113+
/**
114+
* @brief Enables inference with INT64 data type in CPU plugin if it's presented in the original model.
115+
*/
116+
DECLARE_CONFIG_KEY(CPU_NATIVE_I64);
117+
113118
} // namespace PluginConfigInternalParams
114119

115120
} // namespace InferenceEngine

src/plugins/intel_cpu/src/config.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,15 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
230230
IE_THROW() << "Wrong value for property key " << ov::hint::execution_mode.name()
231231
<< ". Supported values: PERFORMANCE, ACCURACY";
232232
}
233+
} else if (key == PluginConfigInternalParams::KEY_CPU_NATIVE_I64) {
234+
if (val == PluginConfigParams::YES) {
235+
enableNativeI64 = true;
236+
} else if (val == PluginConfigParams::NO) {
237+
enableNativeI64 = false;
238+
} else {
239+
IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_NATIVE_I64 << ": " << val
240+
<< ". Expected only YES or NO values.";
241+
}
233242
} else {
234243
IE_THROW(NotFound) << "Unsupported property " << key << " by CPU plugin";
235244
}
@@ -308,4 +317,4 @@ void Config::updateProperties() {
308317
}
309318

310319
} // namespace intel_cpu
311-
} // namespace ov
320+
} // namespace ov

src/plugins/intel_cpu/src/config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ struct Config {
5757
// TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives
5858
size_t rtCacheCapacity = 0ul;
5959
#endif
60+
bool enableNativeI64 = false;
6061
InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
6162
InferenceEngine::PerfHintsConfig perfHintsConfig;
6263
bool enableCpuPinning = true;

src/plugins/intel_cpu/src/dnnl_extension_utils.cpp

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,45 +4,43 @@
44

55
#include "dnnl_extension_utils.h"
66

7-
#include "utils/general_utils.h"
87
#include <oneapi/dnnl/dnnl.hpp>
98
#include "memory_desc/dnnl_blocked_memory_desc.h"
10-
#include "onednn/iml_type_mapper.h"
11-
#include <common/primitive_desc.hpp>
129
#include <common/primitive_desc_iface.hpp>
1310

14-
#include <vector>
15-
1611
using namespace dnnl;
1712

1813
namespace ov {
1914
namespace intel_cpu {
2015

21-
uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) {
16+
uint8_t DnnlExtensionUtils::sizeOfDataType(memory::data_type dataType) {
2217
switch (dataType) {
23-
case dnnl::memory::data_type::f32:
24-
return 4;
25-
case dnnl::memory::data_type::s32:
18+
case memory::data_type::f64:
19+
case memory::data_type::s64:
20+
return 8;
21+
case memory::data_type::f32:
22+
case memory::data_type::s32:
2623
return 4;
27-
case dnnl::memory::data_type::bf16:
24+
case memory::data_type::bf16:
25+
case memory::data_type::f16:
2826
return 2;
29-
case dnnl::memory::data_type::s8:
30-
return 1;
31-
case dnnl::memory::data_type::u8:
27+
case memory::data_type::s8:
28+
case memory::data_type::u8:
29+
case memory::data_type::bin:
3230
return 1;
33-
case dnnl::memory::data_type::bin:
34-
return 1;
35-
case dnnl::memory::data_type::f16:
36-
return 2;
37-
case dnnl::memory::data_type::undef:
31+
case memory::data_type::undef:
3832
return 0;
3933
default:
40-
IE_THROW() << "Unsupported data type.";
34+
IE_THROW() << "Unsupported data type: " << DataTypeToIEPrecision(dataType);
4135
}
4236
}
4337

4438
memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngine::Precision& prec) {
4539
switch (prec) {
40+
case InferenceEngine::Precision::FP64:
41+
return memory::data_type::f64;
42+
case InferenceEngine::Precision::I64:
43+
return memory::data_type::s64;
4644
case InferenceEngine::Precision::FP32:
4745
return memory::data_type::f32;
4846
case InferenceEngine::Precision::I32:
@@ -68,6 +66,10 @@ memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngin
6866

6967
InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::data_type dataType) {
7068
switch (dataType) {
69+
case memory::data_type::f64:
70+
return InferenceEngine::Precision::FP64;
71+
case memory::data_type::s64:
72+
return InferenceEngine::Precision::I64;
7173
case memory::data_type::f32:
7274
return InferenceEngine::Precision::FP32;
7375
case memory::data_type::s32:
@@ -90,11 +92,11 @@ InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::dat
9092
}
9193
}
9294

93-
Dim DnnlExtensionUtils::convertToDim(const dnnl::memory::dim &dim) {
95+
Dim DnnlExtensionUtils::convertToDim(const memory::dim &dim) {
9496
return dim == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : static_cast<size_t>(dim);
9597
}
96-
dnnl::memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) {
97-
return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast<dnnl::memory::dim>(dim);
98+
memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) {
99+
return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast<memory::dim>(dim);
98100
}
99101

100102
VectorDims DnnlExtensionUtils::convertToVectorDims(const memory::dims& dims) {
@@ -133,19 +135,19 @@ memory::format_tag DnnlExtensionUtils::GetPlainFormatByRank(size_t rank) {
133135
}
134136
}
135137

136-
DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const dnnl::memory::desc &desc) {
138+
DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const memory::desc &desc) {
137139
return makeDescriptor(desc.get());
138140
}
139141

140142
DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const_dnnl_memory_desc_t desc) {
141-
if (desc->format_kind == dnnl::impl::format_kind_t::dnnl_blocked) {
143+
if (desc->format_kind == impl::format_kind_t::dnnl_blocked) {
142144
return std::shared_ptr<DnnlBlockedMemoryDesc>(new DnnlBlockedMemoryDesc(desc));
143145
} else {
144146
return std::shared_ptr<DnnlMemoryDesc>(new DnnlMemoryDesc(desc));
145147
}
146148
}
147149

148-
size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const dnnl::memory::desc& desc) {
150+
size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const memory::desc& desc) {
149151
auto tmpDesc = desc;
150152

151153
const auto offset0 = tmpDesc.get()->offset0;
@@ -167,8 +169,8 @@ std::shared_ptr<DnnlBlockedMemoryDesc> DnnlExtensionUtils::makeUndefinedDesc(con
167169
}
168170
}
169171

170-
DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t& pd, const dnnl::query& what, int idx) {
171-
auto query = dnnl::convert_to_c(what);
172+
DnnlMemoryDescPtr DnnlExtensionUtils::query_md(const const_dnnl_primitive_desc_t& pd, const query& what, int idx) {
173+
auto query = convert_to_c(what);
172174
const auto* cdesc = dnnl_primitive_desc_query_md(pd, query, idx);
173175

174176
if (!cdesc)

src/plugins/intel_cpu/src/graph.cpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
#include "memory_desc/cpu_memory_desc_utils.h"
4646

4747
#include <openvino/core/model.hpp>
48-
#include <openvino/core/node.hpp>
4948
#include <openvino/op/ops.hpp>
5049
#include <transformations/utils/utils.hpp>
5150
#include <low_precision/low_precision.hpp>
@@ -306,7 +305,7 @@ void Graph::Replicate(const CNNNetwork &network) {
306305
// change precision for input/output nodes to avoid extra data conversion when set input/output blobs
307306
// also we need to change input/output precisions for consumers/producers to avoid inserting reorder
308307
for (auto &input : inputNodesMap) {
309-
const auto precToSet = normalizeToSupportedPrecision(inputsInfo.at(input.first)->getPrecision());
308+
auto precToSet = normalizeToSupportedPrecision(inputsInfo.at(input.first)->getPrecision(), getConfig().enableNativeI64);
310309
input.second->setOriginalOutputPrecisionAtPort(0, precToSet);
311310
const auto childEdges = input.second->getChildEdgesAtPort(0);
312311
for (size_t i = 0; i < childEdges.size(); i++) {
@@ -320,7 +319,7 @@ void Graph::Replicate(const CNNNetwork &network) {
320319
}
321320

322321
for (auto &output : outputNodesMap) {
323-
const auto precToSet = normalizeToSupportedPrecision(outputsInfo.at(output.first)->getPrecision());
322+
auto precToSet = normalizeToSupportedPrecision(outputsInfo.at(output.first)->getPrecision(), getConfig().enableNativeI64);
324323
output.second->setOriginalInputPrecisionAtPort(0, precToSet);
325324
const auto parentEdges = output.second->getParentEdgesAtPort(0);
326325
for (size_t i = 0; i < parentEdges.size(); i++) {
@@ -978,7 +977,7 @@ void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob::
978977

979978
// todo: make sure 'name' exists in this map...
980979
if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) {
981-
if (inTensorDesc.getPrecision() == InferenceEngine::Precision::FP32) {
980+
if (inTensorDesc.getPrecision() == Precision::FP32) {
982981
_normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast<float *>(inter_data_ptr),
983982
inTensorDesc.getLayout());
984983
} else {
@@ -1425,16 +1424,16 @@ void Graph::SortTopologically() {
14251424
}
14261425
}
14271426

1428-
void Graph::GetPerfData(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) const {
1427+
void Graph::GetPerfData(std::map<std::string, InferenceEngineProfileInfo> &perfMap) const {
14291428
unsigned i = 0;
1430-
std::function<void(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &, const NodePtr&)>
1431-
getPerfMapFor = [&](std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap, const NodePtr& node) {
1432-
InferenceEngine::InferenceEngineProfileInfo &pc = perfMap[node->getName()];
1429+
std::function<void(std::map<std::string, InferenceEngineProfileInfo> &, const NodePtr&)>
1430+
getPerfMapFor = [&](std::map<std::string, InferenceEngineProfileInfo> &perfMap, const NodePtr& node) {
1431+
InferenceEngineProfileInfo &pc = perfMap[node->getName()];
14331432
pc.execution_index = i++;
14341433
// TODO: Why time counter is signed?
14351434
pc.cpu_uSec = pc.realTime_uSec = (long long) node->PerfCounter().avg();
1436-
pc.status = pc.cpu_uSec > 0 ? InferenceEngine::InferenceEngineProfileInfo::EXECUTED
1437-
: InferenceEngine::InferenceEngineProfileInfo::NOT_RUN;
1435+
pc.status = pc.cpu_uSec > 0 ? InferenceEngineProfileInfo::EXECUTED
1436+
: InferenceEngineProfileInfo::NOT_RUN;
14381437
std::string pdType = node->getPrimitiveDescriptorType();
14391438
size_t typeLen = sizeof(pc.exec_type) / sizeof(pc.exec_type[0]);
14401439
pdType.copy(pc.exec_type, typeLen, 0);

src/plugins/intel_cpu/src/graph_optimizer.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "nodes/mvn.h"
2020
#include "nodes/transpose.h"
2121
#include "nodes/interpolate.h"
22-
#include "nodes/reduce.h"
2322
#include "nodes/input.h"
2423
#include "nodes/rnn.h"
2524
#include "nodes/common/cpu_convert.h"

src/plugins/intel_cpu/src/infer_request.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ InferRequestBase::normToInputSupportedPrec(const std::pair<const std::string, In
313313
if (graph->hasMeanImageFor(input.first) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
314314
inPrec = InferenceEngine::Precision::FP32;
315315
} else {
316-
inPrec = normalizeToSupportedPrecision(inPrec);
316+
inPrec = normalizeToSupportedPrecision(inPrec, graph->getConfig().enableNativeI64);
317317
}
318318

319319
if (inPrec == InferenceEngine::Precision::UNSPECIFIED) {
@@ -529,7 +529,7 @@ InferenceEngine::Blob::Ptr LegacyInferRequest::GetBlob(const std::string& name)
529529
auto pBlobDesc = MemoryDescUtils::interpretAsBlobDesc(graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory());
530530
if (!data) {
531531
InferenceEngine::TensorDesc desc = _networkOutputs[name]->getTensorDesc();
532-
desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision()));
532+
desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision(), graph->getConfig().enableNativeI64));
533533

534534
// WA: need to avoid exception thrown when we compare blocking desc in SetBlob
535535
// in situation if we push output blobs as inputs for next network (in Hetero plugin)

src/plugins/intel_cpu/src/plugin.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
#include "transformations/transformation_pipeline.h"
1111
#include "itt.h"
12-
#include "extension_mngr.h"
1312
#include "extension.h"
1413
#include "serialize.h"
1514
#include "threading/ie_executor_manager.hpp"
@@ -21,11 +20,9 @@
2120
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
2221
#include "openvino/runtime/intel_cpu/properties.hpp"
2322

24-
#include <transformations/utils/utils.hpp>
2523
#include <ie_ngraph_utils.hpp>
2624

2725
#include "performance_heuristics.hpp"
28-
#include "openvino/runtime/properties.hpp"
2926
#include "weights_cache.hpp"
3027
#include "utils/denormals.hpp"
3128

@@ -36,7 +33,6 @@
3633
#endif
3734

3835
#include <cpu/x64/cpu_isa_traits.hpp>
39-
#include <itt.h>
4036

4137
#if defined(OV_CPU_WITH_ACL)
4238
#include "nodes/executors/acl/acl_ie_scheduler.hpp"
@@ -164,7 +160,7 @@ static bool streamsSet(const std::map<std::string, std::string>& config) {
164160
config.count(ov::num_streams.name());
165161
}
166162

167-
void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ngraph::Function>& ngraphFunc) const {
163+
void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ov::Model>& ngraphFunc) const {
168164
auto getNumStreamsLatency = [&]() {
169165
return std::pair<std::string, std::string>(CONFIG_VALUE(CPU_THROUGHPUT_NUMA), ov::util::to_string(ov::streams::NUMA));
170166
};
@@ -281,7 +277,7 @@ void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, c
281277
}
282278
}
283279

284-
void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr<ngraph::Function>& ngraphFunc) {
280+
void Engine::GetPerformanceStreams(Config& config, const std::shared_ptr<ov::Model>& ngraphFunc) {
285281
const auto perf_hint_name = config.perfHintsConfig.ovPerfHint;
286282
// save hints parameters to model rt_info
287283
ov::AnyMap hints_props;
@@ -430,6 +426,19 @@ static Config::SnippetsMode getSnippetsMode(const std::map<std::string, std::str
430426
IE_THROW() << "Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK";
431427
}
432428

429+
static void setI64Mode(const std::map<std::string, std::string>& modelConfig, Config& engineConfig) {
430+
engineConfig.enableNativeI64 = false;
431+
const auto i64prop = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_CPU_NATIVE_I64);
432+
if (i64prop != modelConfig.end()) {
433+
if (i64prop->second == PluginConfigParams::YES) {
434+
engineConfig.enableNativeI64 = true;
435+
} else if (i64prop->second != PluginConfigParams::NO) {
436+
IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_NATIVE_I64 << ": " << i64prop->second
437+
<< ". Expected only YES or NO values.";
438+
}
439+
}
440+
}
441+
433442
InferenceEngine::IExecutableNetworkInternal::Ptr
434443
Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &orig_config) {
435444
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Engine::LoadExeNetworkImpl");
@@ -463,6 +472,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
463472
const bool enableLPT = shouldEnableLPT(config, engConfig);
464473
ov::element::Type inferencePrecision = getInferencePrecision(config, engConfig);
465474
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig);
475+
setI64Mode(config, engConfig);
466476

467477
auto nGraphFunc = clonedNetwork.getFunction();
468478

@@ -738,6 +748,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
738748
const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
739749
|| Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */;
740750
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf);
751+
setI64Mode(config, conf);
741752

742753
auto model = network.getFunction();
743754
if (model == nullptr) {
@@ -753,7 +764,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
753764
transformation.UpToCpuSpecificOpSet();
754765
transformation.CpuSpecificOpSet();
755766
},
756-
[&](const std::shared_ptr<ngraph::Node>& op) {
767+
[&](const std::shared_ptr<ov::Node>& op) {
757768
std::unique_ptr<Node> ptr;
758769
try {
759770
ptr.reset(Node::factory().create(op, context));

0 commit comments

Comments
 (0)