66#include < vector>
77
88#include " common_test_utils/ov_tensor_utils.hpp"
9+ #include " openvino/runtime/exec_model_info.hpp"
10+ #include " shared_test_classes/base/ov_subgraph.hpp"
911#include " openvino/op/convert.hpp"
1012#include " openvino/op/gelu.hpp"
1113#include " openvino/op/matmul.hpp"
1214#include " openvino/op/multiply.hpp"
1315#include " openvino/op/swish.hpp"
14- #include " openvino/runtime/exec_model_info.hpp"
15- #include " shared_test_classes/base/ov_subgraph.hpp"
1616
1717namespace ov {
1818namespace test {
@@ -23,7 +23,6 @@ struct LLMMLPFusionParams {
2323 size_t up_size;
2424 std::string act_type;
2525 bool use_dynamic_quant;
26- bool swap_inputs; // true = swap inputs to prevent fusion, false = normal order for fusion
2726};
2827
2928class LLMMLPFusionTest : public testing ::WithParamInterface<LLMMLPFusionParams>, public ov::test::SubgraphBaseTest {
@@ -40,7 +39,6 @@ class LLMMLPFusionTest : public testing::WithParamInterface<LLMMLPFusionParams>,
4039 result << " up_size=" << obj.param .up_size << " _" ;
4140 result << " act_type=" << obj.param .act_type << " _" ;
4241 result << " use_dynamic_quant=" << obj.param .use_dynamic_quant << " _" ;
43- result << " swap_inputs=" << obj.param .swap_inputs << " _" ;
4442 result << obj.index ;
4543 return result.str ();
4644 }
@@ -72,8 +70,7 @@ class LLMMLPFusionTest : public testing::WithParamInterface<LLMMLPFusionParams>,
7270 in_data.start_from = 0 ;
7371 in_data.range = 1 ;
7472 in_data.resolution = 128 ;
75- auto tensor_scale_per_oc =
76- ov::test::utils::create_and_fill_tensor (ov::element::f32 , ov::Shape{OC, 1 }, in_data);
73+ auto tensor_scale_per_oc = ov::test::utils::create_and_fill_tensor (ov::element::f32 , ov::Shape{OC, 1 }, in_data);
7774 auto scale_per_oc = std::make_shared<ov::op::v0::Constant>(tensor_scale_per_oc);
7875
7976 auto weight_deq = std::make_shared<ov::op::v1::Multiply>(weight_const_f32, scale_per_oc);
@@ -88,8 +85,7 @@ class LLMMLPFusionTest : public testing::WithParamInterface<LLMMLPFusionParams>,
8885 return std::make_shared<ov::op::v0::Constant>(tensor);
8986 };
9087 if (param.use_dynamic_quant )
91- configuration.insert (
92- {ov::hint::dynamic_quantization_group_size.name (), std::numeric_limits<uint64_t >::max ()});
88+ configuration.insert ({ov::hint::dynamic_quantization_group_size.name (), std::numeric_limits<uint64_t >::max ()});
9389
9490 auto gate_weight = create_const (param.up_size , param.down_size , 100 );
9591 auto up_weight = create_const (param.up_size , param.down_size , 100 );
@@ -105,22 +101,13 @@ class LLMMLPFusionTest : public testing::WithParamInterface<LLMMLPFusionParams>,
105101 if (param.act_type == " Gelu" )
106102 gate_act = std::make_shared<ov::op::v7::Gelu>(gate_proj);
107103
108- // Control input order based on swap_inputs parameter
109- std::shared_ptr<ov::op::v1::Multiply> gate_up;
110- if (param.swap_inputs ) {
111- // Swapped order should prevent fusion
112- gate_up = std::make_shared<ov::op::v1::Multiply>(up_proj, gate_act);
113- } else {
114- // Normal order should allow fusion
115- gate_up = std::make_shared<ov::op::v1::Multiply>(gate_act, up_proj);
116- }
117-
104+ auto gate_up = std::make_shared<ov::op::v1::Multiply>(gate_act, up_proj);
118105 auto output = std::make_shared<ov::op::v0::MatMul>(gate_up, down_weight, false , true );
119106
120107 function = std::make_shared<ov::Model>(ov::OutputVector{output}, ov::ParameterVector{src});
121108 }
122109
123- void check_fusion_result () {
110+ void check_results () {
124111 auto exec_model = compiledModel.get_runtime_model ();
125112
126113 int fused_node_found = 0 ;
@@ -129,40 +116,26 @@ class LLMMLPFusionTest : public testing::WithParamInterface<LLMMLPFusionParams>,
129116 if (layer_type == " LLMMLP" )
130117 fused_node_found++;
131118 }
132-
133- auto & param = this ->GetParam ();
134- if (param.swap_inputs ) {
135- // When inputs are swapped, fusion should NOT happen
136- ASSERT_EQ (fused_node_found, 0 ) << " Fusion should not occur with swapped inputs" ;
137- } else {
138- // Normal case, fusion should happen
139- ASSERT_EQ (fused_node_found, 1 ) << " Fusion should occur with correct input order" ;
140- }
119+ ASSERT_EQ (fused_node_found, 1 );
141120 }
142121};
143122
144123TEST_P (LLMMLPFusionTest, CompareWithRefs) {
145124 if (!ov::with_cpu_x86_avx512_core_amx_bf16 ())
146125 GTEST_SKIP ();
147126 run ();
148- check_fusion_result ();
127+ check_results ();
149128}
150129
151130namespace {
152131
153- static ov::test::InputShape ishape{ov::PartialShape{-1 , -1 , 4096 / 4 },
154- {ov::Shape{1 , 8 , 4096 / 4 }, ov::Shape{5 , 37 , 4096 / 4 }}};
132+ static ov::test::InputShape ishape{ov::PartialShape{-1 , -1 , 4096 / 4 }, {ov::Shape{1 , 8 , 4096 / 4 }, ov::Shape{5 , 37 , 4096 / 4 }}};
155133
156- // Test parameters combining both normal fusion and no-fusion cases
157134const std::vector<LLMMLPFusionParams> mlp_params = {
158- // Normal cases - should fuse (swap_inputs = false)
159- {ishape, 4096 / 4 , 11008 / 4 , " Gelu" , false , false },
160- {ishape, 4096 / 4 , 11008 / 4 , " Gelu" , true , false },
161- {ishape, 4096 / 4 , 11008 / 4 , " Swish" , false , false },
162- {ishape, 4096 / 4 , 11008 / 4 , " Swish" , true , false },
163-
164- // Port order issue cases - should NOT fuse (swap_inputs = true)
165- {ishape, 4096 / 4 , 11008 / 4 , " Gelu" , false , true },
135+ {ishape, 4096 / 4 , 11008 / 4 , " Gelu" , false },
136+ {ishape, 4096 / 4 , 11008 / 4 , " Gelu" , true },
137+ {ishape, 4096 / 4 , 11008 / 4 , " Swish" , false },
138+ {ishape, 4096 / 4 , 11008 / 4 , " Swish" , true },
166139};
167140
168141INSTANTIATE_TEST_SUITE_P (smoke_LLMMLPFusion,
0 commit comments