Skip to content

Commit 3bd87f0

Browse files
authored
[GPU] Add check of multiple axis broadcasting in is_valid_fusion() when dynamic shape (#28252)
### Details: - Add check of multiple axis broadcasting in is_valid_fusion() when dynamic shape - Current ocl fused_op jit constant doesn't support multiple axis boradcase. ![image](https://github.com/user-attachments/assets/5064faef-252c-416e-a7ba-71151d35db1e) ### Tickets: - 159939
1 parent 74041ec commit 3bd87f0

File tree

2 files changed

+107
-0
lines changed

2 files changed

+107
-0
lines changed

src/plugins/intel_gpu/src/graph/primitive_inst.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2658,6 +2658,18 @@ bool primitive_inst::is_valid_fusion() const {
26582658
if (fd.is_type<eltwise>())
26592659
can_broadcast = ov::PartialShape::broadcast_merge_into(merged_shape, outer_dep_pshape, fd.typed_desc<eltwise>()->broadcast_spec);
26602660

2661+
// Check if broadcast happens more than single axis.
2662+
// Current gemm_tiled_opt kernel FUSED_OP_LOAD macro cannot support broadcast on dynamic dimension.
2663+
if (_node->is_type<gemm>() && can_broadcast == true && merged_shape.rank().get_length() == outer_dep_pshape.rank().get_length()) {
2664+
uint8_t broadcast_more_than_single_axis = 0;
2665+
for (int64_t i = 0; i < merged_shape.rank().get_length(); i++) {
2666+
if (merged_shape.get_shape().at(i) != outer_dep_pshape.get_shape().at(i))
2667+
broadcast_more_than_single_axis++;
2668+
}
2669+
if (broadcast_more_than_single_axis > 1)
2670+
can_broadcast = false;
2671+
}
2672+
26612673
#ifdef ENABLE_ONEDNN_FOR_GPU
26622674
// WA for OneDNN binary add fusions: we need to broadcast batch dimension to avoid situation with
26632675
// batch dimension mismatch in OneDNN tensor descriptors as follow:
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "common_test_utils/ov_tensor_utils.hpp"
6+
#include "common_test_utils/file_utils.hpp"
7+
#include "shared_test_classes/base/ov_subgraph.hpp"
8+
9+
#include "openvino/op/parameter.hpp"
10+
#include "openvino/op/matmul.hpp"
11+
#include "openvino/op/multiply.hpp"
12+
13+
namespace {
14+
15+
using ov::test::InputShape;
16+
17+
using DynamicUnfusionsParams = std::tuple<std::vector<InputShape>, // input shapes
18+
ov::element::Type>; // input precision
19+
20+
class DynamicUnfusions : public testing::WithParamInterface<DynamicUnfusionsParams>,
21+
virtual public ov::test::SubgraphBaseTest {
22+
public:
23+
static std::string getTestCaseName(testing::TestParamInfo<DynamicUnfusionsParams> obj) {
24+
std::vector<InputShape> input_shapes;
25+
ov::element::Type input_precision;
26+
27+
std::tie(input_shapes, input_precision) = obj.param;
28+
29+
std::ostringstream result;
30+
result << "IS=(";
31+
for (const auto& shape : input_shapes) {
32+
result << ov::test::utils::partialShape2str({shape.first}) << "_";
33+
}
34+
result << ")_TS=";
35+
for (const auto& shape : input_shapes) {
36+
result << "(";
37+
if (!shape.second.empty()) {
38+
auto itr = shape.second.begin();
39+
do {
40+
result << ov::test::utils::vec2str(*itr);
41+
} while (++itr != shape.second.end() && result << "_");
42+
}
43+
result << ")_";
44+
}
45+
result << "input_precision=" << input_precision;
46+
return result.str();
47+
}
48+
49+
protected:
50+
std::shared_ptr<ov::Model> init_subgraph(std::vector<ov::PartialShape>& input_shapes,
51+
const ov::element::Type input_precision) {
52+
auto input0 = std::make_shared<ov::op::v0::Parameter>(input_precision, input_shapes[0]);
53+
auto input1 = std::make_shared<ov::op::v0::Parameter>(input_precision, input_shapes[1]);
54+
auto input2 = std::make_shared<ov::op::v0::Parameter>(input_precision, input_shapes[2]);
55+
56+
auto matmul = std::make_shared<ov::op::v0::MatMul>(input0, input1);
57+
auto mul = std::make_shared<ov::op::v1::Multiply>(matmul, input2);
58+
59+
matmul->set_friendly_name("MatMul");
60+
mul->set_friendly_name("Multiply");
61+
62+
return std::make_shared<ov::Model>(ov::NodeVector{mul}, ov::ParameterVector{input0, input1, input2}, "DynamicUnfusions");
63+
}
64+
65+
void SetUp() override {
66+
targetDevice = ov::test::utils::DEVICE_GPU;
67+
68+
std::vector<InputShape> input_shapes;
69+
ov::element::Type input_precision;
70+
71+
std::tie(input_shapes, input_precision) = GetParam();
72+
73+
init_input_shapes(input_shapes);
74+
75+
inType = outType = input_precision;
76+
function = init_subgraph(inputDynamicShapes, input_precision);
77+
}
78+
};
79+
80+
TEST_P(DynamicUnfusions, Inference) {
81+
run();
82+
}
83+
84+
const std::vector<ov::element::Type> input_precisions = {ov::element::f32};
85+
86+
const std::vector<std::vector<InputShape>> input_shapes_dyn = {
87+
{{{1024, -1}, {{1024, 1024}}}, {{-1, 1024}, {{1024, 1024}}}, {{1, -1}, {{1, 1}}}},
88+
};
89+
90+
INSTANTIATE_TEST_SUITE_P(DynamicUnfusions_basic,
91+
DynamicUnfusions,
92+
::testing::Combine(::testing::ValuesIn(input_shapes_dyn),
93+
::testing::ValuesIn(input_precisions)),
94+
DynamicUnfusions::getTestCaseName);
95+
} // namespace

0 commit comments

Comments
 (0)