@@ -1996,7 +1996,8 @@ class fully_connected_gpu_tests: public ::testing::Test {
19961996 long int batch_num,
19971997 long int scales_group_size = 128 ,
19981998 bool is_uint4 = false ,
1999- bool is_wei_dyn = false ) {
1999+ bool is_wei_dyn = false ,
2000+ bool is_output_fp16 = true ) {
20002001 tests::random_generator rg (GET_SUITE_NAME);
20012002 auto & engine = get_test_engine ();
20022003 auto supports_immad = engine.get_device_info ().supports_immad ;
@@ -2045,7 +2046,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
20452046 " bias" ,
20462047 " scale" ,
20472048 dcomp_zp_name,
2048- data_types::f16 ,
2049+ is_output_fp16 ? data_types::f16 : data_types:: f32 ,
20492050 2 ,
20502051 2 );
20512052
@@ -2127,14 +2128,26 @@ class fully_connected_gpu_tests: public ::testing::Test {
21272128 ASSERT_TRUE (false );
21282129 }
21292130
2130- auto output_mem = outputs.begin ()->second .get_memory ();
2131- cldnn::mem_lock<ov::float16> output_ptr (output_mem, get_test_stream ());
2131+ if (is_output_fp16) {
2132+ auto output_mem = outputs.begin ()->second .get_memory ();
2133+ cldnn::mem_lock<ov::float16> output_ptr (output_mem, get_test_stream ());
21322134
2133- auto ref_output_mem = get_ref_results ();
2134- cldnn::mem_lock<ov::float16> output_ptr_ref (ref_output_mem, get_test_stream ());
2135+ auto ref_output_mem = get_ref_results ();
2136+ cldnn::mem_lock<ov::float16> output_ptr_ref (ref_output_mem, get_test_stream ());
2137+
2138+ for (size_t i = 0 ; i < output_ptr_ref.size () / batch_num; i++) {
2139+ EXPECT_NEAR (output_ptr_ref[i], output_ptr[i], 30.0 ) << " i = " << i;
2140+ }
2141+ } else {
2142+ auto output_mem = outputs.begin ()->second .get_memory ();
2143+ cldnn::mem_lock<float > output_ptr (output_mem, get_test_stream ());
2144+
2145+ auto ref_output_mem = get_ref_results ();
2146+ cldnn::mem_lock<float > output_ptr_ref (ref_output_mem, get_test_stream ());
21352147
2136- for (size_t i = 0 ; i < output_ptr_ref.size () / batch_num; i++) {
2137- EXPECT_NEAR (output_ptr_ref[i], output_ptr[i], 30.0 ) << " i = " << i;
2148+ for (size_t i = 0 ; i < output_ptr_ref.size () / batch_num; i++) {
2149+ EXPECT_NEAR (output_ptr_ref[i], output_ptr[i], 30.0 ) << " i = " << i;
2150+ }
21382151 }
21392152 }
21402153
@@ -2143,7 +2156,8 @@ class fully_connected_gpu_tests: public ::testing::Test {
21432156 long int batch_num,
21442157 long int scales_group_size = 128 ,
21452158 bool is_uint4 = false ,
2146- bool is_wei_dyn = false ) {
2159+ bool is_wei_dyn = false ,
2160+ bool is_output_fp16 = true ) {
21472161 tests::random_generator rg (GET_SUITE_NAME);
21482162 auto & engine = get_test_engine ();
21492163 auto supports_immad = engine.get_device_info ().supports_immad ;
@@ -2192,7 +2206,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
21922206 " " ,
21932207 " scale" ,
21942208 " dcomp_zp" ,
2195- data_types::f16 ,
2209+ is_output_fp16 ? data_types::f16 : data_types:: f32 ,
21962210 2 ,
21972211 2 );
21982212
@@ -2272,22 +2286,35 @@ class fully_connected_gpu_tests: public ::testing::Test {
22722286 ASSERT_TRUE (false );
22732287 }
22742288
2275- auto output_mem = outputs.begin ()->second .get_memory ();
2276- cldnn::mem_lock<ov::float16> output_ptr (output_mem, get_test_stream ());
2289+ if (is_output_fp16) {
2290+ auto output_mem = outputs.begin ()->second .get_memory ();
2291+ cldnn::mem_lock<ov::float16> output_ptr (output_mem, get_test_stream ());
22772292
2278- auto ref_output_mem = get_ref_results ();
2279- cldnn::mem_lock<ov::float16> output_ptr_ref (ref_output_mem, get_test_stream ());
2293+ auto ref_output_mem = get_ref_results ();
2294+ cldnn::mem_lock<ov::float16> output_ptr_ref (ref_output_mem, get_test_stream ());
2295+
2296+ for (size_t i = 0 ; i < output_ptr_ref.size () / batch_num; i++) {
2297+ EXPECT_NEAR (output_ptr_ref[i], output_ptr[i], 10.0 ) << " i = " << i;
2298+ }
2299+ } else {
2300+ auto output_mem = outputs.begin ()->second .get_memory ();
2301+ cldnn::mem_lock<float > output_ptr (output_mem, get_test_stream ());
2302+
2303+ auto ref_output_mem = get_ref_results ();
2304+ cldnn::mem_lock<float > output_ptr_ref (ref_output_mem, get_test_stream ());
22802305
2281- for (size_t i = 0 ; i < output_ptr_ref.size () / batch_num; i++) {
2282- EXPECT_NEAR (output_ptr_ref[i], output_ptr[i], 10.0 ) << " i = " << i;
2306+ for (size_t i = 0 ; i < output_ptr_ref.size () / batch_num; i++) {
2307+ EXPECT_NEAR (output_ptr_ref[i], output_ptr[i], 10.0 ) << " i = " << i;
2308+ }
22832309 }
22842310 }
22852311
22862312 void test_compressed_int4_scale_activation_gemv (bool is_caching_test,
22872313 bool is_dynamic,
22882314 long int batch_num,
22892315 long int scales_group_size = 128 ,
2290- bool is_wei_dyn = false ) {
2316+ bool is_wei_dyn = false ,
2317+ bool is_output_fp16 = true ) {
22912318 tests::random_generator rg (GET_SUITE_NAME);
22922319 auto & engine = get_test_engine ();
22932320 auto supports_immad = engine.get_device_info ().supports_immad ;
@@ -2335,7 +2362,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
23352362 " bias" ,
23362363 " scale" ,
23372364 dcomp_zp_name,
2338- data_types::f16 ,
2365+ is_output_fp16? data_types::f16 : data_types:: f32 ,
23392366 2 ,
23402367 2 );
23412368
@@ -2411,20 +2438,32 @@ class fully_connected_gpu_tests: public ::testing::Test {
24112438 }
24122439 }
24132440
2414- auto output_mem = outputs.begin ()->second .get_memory ();
2415- cldnn::mem_lock<ov::float16> output_ptr (output_mem, get_test_stream ());
2441+ if (is_output_fp16) {
2442+ auto output_mem = outputs.begin ()->second .get_memory ();
2443+ cldnn::mem_lock<ov::float16> output_ptr (output_mem, get_test_stream ());
24162444
2417- auto ref_output_mem = get_ref_results ();
2418- cldnn::mem_lock<ov::float16> output_ptr_ref (ref_output_mem, get_test_stream ());
2445+ auto ref_output_mem = get_ref_results ();
2446+ cldnn::mem_lock<ov::float16> output_ptr_ref (ref_output_mem, get_test_stream ());
24192447
2420- for (size_t i = 0 ; i < output_ptr_ref.size (); i++)
2421- ASSERT_NEAR (output_ptr_ref[i], output_ptr[i], 9.0 ) << " i = " << i;
2448+ for (size_t i = 0 ; i < output_ptr_ref.size (); i++)
2449+ ASSERT_NEAR (output_ptr_ref[i], output_ptr[i], 9.0 ) << " i = " << i;
2450+ } else {
2451+ auto output_mem = outputs.begin ()->second .get_memory ();
2452+ cldnn::mem_lock<float > output_ptr (output_mem, get_test_stream ());
2453+
2454+ auto ref_output_mem = get_ref_results ();
2455+ cldnn::mem_lock<float > output_ptr_ref (ref_output_mem, get_test_stream ());
2456+
2457+ for (size_t i = 0 ; i < output_ptr_ref.size (); i++)
2458+ ASSERT_NEAR (output_ptr_ref[i], output_ptr[i], 9.0 ) << " i = " << i;
2459+ }
24222460 }
24232461
24242462 void test_compressed_int4_scale_large_n_gemv (bool is_caching_test,
24252463 bool is_dynamic,
24262464 long int batch_num,
2427- bool is_dyn_quan = false ) {
2465+ bool is_dyn_quan = false ,
2466+ bool is_output_fp16 = true ) {
24282467 tests::random_generator rg (GET_SUITE_NAME);
24292468 auto & engine = get_test_engine ();
24302469
@@ -2447,7 +2486,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
24472486 auto input_data = rg.generate_random_1d <ov::float16>(batch_num * ifm_num, -1 .0f , 1 .0f );
24482487 set_values (input_mem, input_data);
24492488
2450- auto weigths_data = rg.generate_random_1d <uint8_t >(ofm_num * ifm_num / 2 , 0 , 10 );
2489+ auto weigths_data = rg.generate_random_1d <uint8_t >(ofm_num * ifm_num / 2 , 0 , 5 );
24512490 set_values (weights_mem, weigths_data);
24522491
24532492 auto scale_data = rg.generate_random_1d <ov::float16>(ofm_num * ifm_num / scales_group_size, -1 .0f , 1 .0f );
@@ -2470,7 +2509,7 @@ class fully_connected_gpu_tests: public ::testing::Test {
24702509 " " ,
24712510 " scale" ,
24722511 dcomp_zp_name,
2473- data_types::f16 ,
2512+ is_output_fp16? data_types::f16 : data_types:: f32 ,
24742513 3 ,
24752514 2 );
24762515
@@ -2496,9 +2535,6 @@ class fully_connected_gpu_tests: public ::testing::Test {
24962535 network.set_input_data (" input" , input_mem);
24972536
24982537 auto outputs = network.execute ();
2499- // for (size_t i = 0; i < 100; i++) {
2500- // outputs = network.execute();
2501- // }
25022538 OPENVINO_ASSERT (outputs.size () == 1 );
25032539 OPENVINO_ASSERT (outputs.begin ()->first == " fc_prim" );
25042540
@@ -2559,14 +2595,26 @@ class fully_connected_gpu_tests: public ::testing::Test {
25592595 ASSERT_EQ (outputs.size (), size_t (1 ));
25602596 ASSERT_EQ (outputs.begin ()->first , " fc_prim" );
25612597
2562- auto output_mem = outputs.begin ()->second .get_memory ();
2563- cldnn::mem_lock<ov::float16> output_ptr (output_mem, get_test_stream ());
2598+ if (is_output_fp16) {
2599+ auto output_mem = outputs.begin ()->second .get_memory ();
2600+ cldnn::mem_lock<ov::float16> output_ptr (output_mem, get_test_stream ());
25642601
2565- auto ref_output_mem = get_ref_results ();
2566- cldnn::mem_lock<ov::float16> output_ptr_ref (ref_output_mem, get_test_stream ());
2602+ auto ref_output_mem = get_ref_results ();
2603+ cldnn::mem_lock<ov::float16> output_ptr_ref (ref_output_mem, get_test_stream ());
25672604
2568- for (size_t i = 0 ; i < output_ptr_ref.size (); i++) {
2569- EXPECT_NEAR (output_ptr_ref[i], output_ptr[i], 9.0 ) << " i = " << i;
2605+ for (size_t i = 0 ; i < output_ptr_ref.size (); i++) {
2606+ EXPECT_NEAR (output_ptr_ref[i], output_ptr[i], 9.0 ) << " i = " << i;
2607+ }
2608+ } else {
2609+ auto output_mem = outputs.begin ()->second .get_memory ();
2610+ cldnn::mem_lock<float > output_ptr (output_mem, get_test_stream ());
2611+
2612+ auto ref_output_mem = get_ref_results ();
2613+ cldnn::mem_lock<float > output_ptr_ref (ref_output_mem, get_test_stream ());
2614+
2615+ for (size_t i = 0 ; i < output_ptr_ref.size (); i++) {
2616+ EXPECT_NEAR (output_ptr_ref[i], output_ptr[i], 9.0 ) << " i = " << i;
2617+ }
25702618 }
25712619 }
25722620
@@ -5053,6 +5101,27 @@ TEST_F(fully_connected_gpu_tests, gemv_compressed_int4_dynamic_batch) {
50535101 this ->test_compressed_int4_scale_dynamic_batch_gemv (false , 128 , false );
50545102}
50555103
5104+ // Test for fp32 output
5105+ TEST_F (fully_connected_gpu_tests, gemv_compressed_int4_scale_dynamic_b1g32_f32) {
5106+ this ->test_compressed_int4_scale_gemv (false , true , 1 , 32 , false , false , false );
5107+ }
5108+
5109+ TEST_F (fully_connected_gpu_tests, gemv_compressed_int4_scale_b1g32_f32) {
5110+ this ->test_compressed_int4_scale_gemv (false , false , 1 , 32 , false , false , false );
5111+ }
5112+
5113+ TEST_F (fully_connected_gpu_tests, gemv_compressed_int4_scale_relu_b1g128_f32) {
5114+ this ->test_compressed_int4_scale_activation_gemv (false , false , 1 , 128 , false , false );
5115+ }
5116+
5117+ TEST_F (fully_connected_gpu_tests, gemv_compressed_int4_scale_large_n_b1_f32) {
5118+ this ->test_compressed_int4_scale_large_n_gemv (false , false , 1 , false , false );
5119+ }
5120+
5121+ TEST_F (fully_connected_gpu_tests, gemv_compressed_int4_scale_large_n_dynamic_b1_f32) {
5122+ this ->test_compressed_int4_scale_large_n_gemv (false , true , 1 , false , false );
5123+ }
5124+
50565125// Test weight zp for INT8 ASYM
50575126TEST_F (fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_128_large_input_1025) {
50585127 this ->test_comp_weight_scale_zp (true , 1025 , 1792 , 4608 , 128 , 128 , 1 , WzpMode::AsymmetricScalar, WeightMode::Bit8, TargetDevice::SkipDgpu);
0 commit comments