Skip to content

Commit 291c9e9

Browse files
yeonbokMirceaDan99
authored andcommitted
[GPU] Fix many unittest failures due to the zero byte allocation of ocl memory (openvinotoolkit#28270)
### Details: - Fix many unittest failures due to the zero byte allocation of ocl memory - Removed unnecessary function to allocate 1 byte for zero byte layout ### Tickets: - CVS-160053
1 parent 3f4be1b commit 291c9e9

File tree

4 files changed

+17
-36
lines changed

4 files changed

+17
-36
lines changed

src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
#include <ostream>
88
#include <tuple>
9-
#include "intel_gpu/runtime/engine.hpp"
109
#include "intel_gpu/runtime/layout.hpp"
1110
#include "intel_gpu/runtime/memory.hpp"
1211
#include "intel_gpu/runtime/optionals.hpp"
@@ -104,26 +103,6 @@ inline ov::Shape predict_shape(const std::string& name, const cldnn::layout layo
104103
return layout.get_shape();
105104
}
106105

107-
inline cldnn::memory::ptr allocate_memory_evenif_zero_bytes(cldnn::engine& _engine,
108-
const cldnn::layout& layout,
109-
cldnn::allocation_type type,
110-
bool reset = true) {
111-
if (layout.bytes_count() == 0) {
112-
auto non_zero_layout = cldnn::layout({1}, layout.data_type, layout.format);
113-
auto res = _engine.allocate_memory(non_zero_layout, type, false);
114-
return _engine.reinterpret_buffer(*res, layout);
115-
} else {
116-
return _engine.allocate_memory(layout, type, reset);
117-
}
118-
}
119-
120-
inline cldnn::memory::ptr allocate_memory_evenif_zero_bytes(cldnn::engine& _engine,
121-
const cldnn::layout& layout,
122-
bool reset = true) {
123-
cldnn::allocation_type type = _engine.get_lockable_preferred_memory_allocation_type(layout.format.is_image_2d());
124-
return allocate_memory_evenif_zero_bytes(_engine, layout, type, reset);
125-
}
126-
127106
/// WA: Force exit. Any opencl api call can be hang after CL_OUT_OF_RESOURCES.
128107
inline void ForceExit() {
129108
std::cerr << "[GPU] force exit.\n"

src/plugins/intel_gpu/src/graph/loop.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include "mutable_data_inst.h"
88
#include "json_object.h"
99
#include "primitive_type_base.h"
10-
#include "intel_gpu/plugin/common_utils.hpp"
1110
#include "intel_gpu/primitives/data.hpp"
1211
#include "intel_gpu/primitives/mutable_data.hpp"
1312
#include <string>
@@ -319,7 +318,7 @@ void loop_inst::update_backedge_mapped_memory() {
319318
// generally, shouldn't go this way, but...
320319
auto output_prim = body_network->get_primitive(back_edge.from);
321320
layout output_layout = output_prim->output_memory().get_layout();
322-
backedge_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), output_layout, false);
321+
backedge_mem = body_network->get_engine().allocate_memory(output_layout, 0);
323322
}
324323
} else {
325324
auto external_id = output_mapping.front()->external_id;
@@ -397,7 +396,7 @@ loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map(
397396
<< sliced_layout.get_partial_shape().to_string()
398397
<< " to " << updated_sliced_layout.to_string() << std::endl;
399398
sliced_layout.set_partial_shape(updated_sliced_layout);
400-
inter_mem_ptr = ov::intel_gpu::allocate_memory_evenif_zero_bytes(engine, sliced_layout);
399+
inter_mem_ptr = engine.allocate_memory(sliced_layout);
401400
intern_prim->set_output_layout(sliced_layout, internal_id.idx);
402401
}
403402

@@ -407,8 +406,8 @@ loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map(
407406
} else {
408407
sliced_mems.reserve(num_iterations);
409408
sliced_mems.push_back(inter_mem_ptr);
410-
for (int j=1; j < num_iterations; ++j) {
411-
memory::ptr sliced_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(engine, sliced_layout);
409+
for (int j = 1; j < num_iterations; ++j) {
410+
memory::ptr sliced_mem = engine.allocate_memory(sliced_layout);
412411
sliced_mems.push_back(sliced_mem);
413412
}
414413
}
@@ -499,7 +498,7 @@ void loop_inst::preprocess_input_memory(const int64_t num_iterations) {
499498
// if internal input memory is in backedge, allocate new memory.
500499
// Because internal input memory's data will be updated through backedge process.
501500
if (iter != _back_edges.end()) {
502-
internal_input_memory = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), memory->get_layout(), false);
501+
internal_input_memory = body_network->get_engine().allocate_memory(memory->get_layout(), false);
503502
internal_input_memory->copy_from(body_network->get_stream(), *memory);
504503
GPU_DEBUG_LOG << "Input memory of internal node(" << internal_id.to_string() << ") is set to new memory("
505504
<< internal_input_memory << ", " << internal_input_memory->get_layout().to_short_string()
@@ -722,7 +721,7 @@ void loop_inst::postprocess_output_memory(bool is_dynamic, int64_t current_itera
722721
} else {
723722
if (!output_allocated || get_flag(ExecutionFlags::SHAPE_CHANGED)) {
724723
auto concat_layout = _impl_params->get_output_layout(external_id.idx);
725-
auto concat_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(_network.get_engine(), concat_layout, false);
724+
auto concat_mem = _network.get_engine().allocate_memory(concat_layout, false);
726725
external_outputs[external_id.idx] = concat_mem;
727726
auto iter = std::find_if(concatenated_output_mem_mappings.begin(),
728727
concatenated_output_mem_mappings.end(),
@@ -1081,7 +1080,7 @@ std::vector<event::ptr> loop_inst::handle_buffers_for_next_iteration(const loop_
10811080
// Check backedge_to shape needs to be updated by initial_mem
10821081
OPENVINO_ASSERT(mapping.initial_mem != nullptr, "initial_mem should not be null");
10831082
if (!mapping.initial_mem->get_layout().identical(to_mem->get_layout())) {
1084-
to_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), mapping.initial_mem->get_layout(), false);
1083+
to_mem = body_network->get_engine().allocate_memory(mapping.initial_mem->get_layout(), false);
10851084

10861085
body_network->set_input_data(to_id, to_mem);
10871086
ev = to_mem->copy_from(body_network->get_stream(), *(mapping.initial_mem));
@@ -1104,7 +1103,7 @@ std::vector<event::ptr> loop_inst::handle_buffers_for_next_iteration(const loop_
11041103

11051104
// Check backedge_to shape needs to be updated by backedge_from
11061105
if (!from_mem->get_layout().identical(to_mem->get_layout())) {
1107-
to_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), from_mem->get_layout(), false);
1106+
to_mem = body_network->get_engine().allocate_memory(from_mem->get_layout(), false);
11081107
GPU_DEBUG_LOG << iter << ") [SINGLE] Backedge_to node(" << to_id << ") is set to new memory("
11091108
<< to_mem << ", " << to_mem->get_layout().to_short_string()
11101109
<< ") because of shape update from backedge_from()" << from_id

src/plugins/intel_gpu/src/graph/primitive_inst.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2411,11 +2411,11 @@ memory::ptr primitive_inst::allocate_output(engine& _engine,
24112411
if ((_node.is_output() && is_reorder_weights) || (!_node.is_output() && _node.is_type<input_layout>()))
24122412
reset = false;
24132413
GPU_DEBUG_LOG << "[" << _node.id() << ": constant]" << std::endl;
2414-
return ov::intel_gpu::allocate_memory_evenif_zero_bytes(_engine, layout, alloc_type, reset);
2414+
return _engine.allocate_memory(layout, alloc_type, reset);
24152415
}
24162416
} else if (!_node.can_share_buffer() || impl_params.can_be_optimized() || _node.is_output()) {
24172417
GPU_DEBUG_LOG << "[" << _node.id() << ": output]" << std::endl;
2418-
return ov::intel_gpu::allocate_memory_evenif_zero_bytes(_engine, layout, alloc_type, reset);
2418+
return _engine.allocate_memory(layout, alloc_type, reset);
24192419
} else {
24202420
return get_memory_from_pool(_engine,
24212421
net_id,

src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -454,22 +454,25 @@ gpu_usm::gpu_usm(ocl_engine* engine, const layout& layout, allocation_type type)
454454
, memory(engine, layout, type, nullptr)
455455
, _buffer(engine->get_usm_helper())
456456
, _host_buffer(engine->get_usm_helper()) {
457+
auto actual_bytes_count = _bytes_count;
458+
if (actual_bytes_count == 0)
459+
actual_bytes_count = 1;
457460
switch (get_allocation_type()) {
458461
case allocation_type::usm_host:
459-
_buffer.allocateHost(_bytes_count);
462+
_buffer.allocateHost(actual_bytes_count);
460463
break;
461464
case allocation_type::usm_shared:
462-
_buffer.allocateShared(_bytes_count);
465+
_buffer.allocateShared(actual_bytes_count);
463466
break;
464467
case allocation_type::usm_device:
465-
_buffer.allocateDevice(_bytes_count);
468+
_buffer.allocateDevice(actual_bytes_count);
466469
break;
467470
default:
468471
CLDNN_ERROR_MESSAGE("gpu_usm allocation type",
469472
"Unknown unified shared memory type!");
470473
}
471474

472-
m_mem_tracker = std::make_shared<MemoryTracker>(engine, _buffer.get(), layout.bytes_count(), type);
475+
m_mem_tracker = std::make_shared<MemoryTracker>(engine, _buffer.get(), actual_bytes_count, type);
473476
}
474477

475478
void* gpu_usm::lock(const stream& stream, mem_lock_type type) {

0 commit comments

Comments
 (0)