Skip to content

Commit 0bd527d

Browse files
authored
[NPU] Treat the boolean type as u8 and remove unnecessary type conversions (#30690)
### Details: - *Treat the boolean type as u8* - *If ZE_GRAPH_ARGUMENT_PRECISION_BOOLEAN is reported by the driver, the plugin will fail to convert to an element type* - *Too many type conversions to keep updated when a new type is supported. Removing unnecessary conversions* ### Tickets: - *E#167996* --------- Signed-off-by: Bogdan Pereanu <[email protected]>
1 parent f34496e commit 0bd527d

File tree

4 files changed

+67
-180
lines changed

4 files changed

+67
-180
lines changed

src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ void check_level_zero_attributes_match(const IODescriptor& ioDescriptor, const A
4444
" vs. ",
4545
zeDescriptorName,
4646
". The I/O order may have been altered, which could lead to an erroneous behavior.");
47-
OPENVINO_ASSERT(zeroUtils::getZePrecision(ioDescriptor.precision) == zeDescriptor.info.devicePrecision,
47+
OPENVINO_ASSERT(ioDescriptor.precision == zeroUtils::toOVElementType(zeDescriptor.info.devicePrecision),
4848
"Precision mismatch for input/output named " + ioDescriptor.nameFromCompiler);
4949

5050
const std::vector<size_t>& ovDimensions = ioDescriptor.shapeFromCompiler.get_max_shape();
@@ -723,10 +723,12 @@ void ZeroInferRequest::check_network_precision(const ov::element::Type_t precisi
723723
break;
724724
case ov::element::Type_t::f64:
725725
break;
726+
case ov::element::Type_t::boolean:
727+
break;
726728
default:
727-
OPENVINO_THROW(
728-
"Unsupported tensor precision: " + ov::element::Type(precision).get_type_name() +
729-
"! Supported precisions: FP32, FP16, BF16, FP8, NF4, U4, I4, U8, I8, U16, I16, U32, I32, U64, I64, FP64");
729+
OPENVINO_THROW("Unsupported tensor precision: " + ov::element::Type(precision).get_type_name() +
730+
"! Supported precisions: FP32, FP16, BF16, FP8, NF4, U4, I4, U8, I8, U16, I16, U32, I32, U64, "
731+
"I64, FP64, BOOLEAN");
730732
}
731733
}
732734

src/plugins/intel_npu/src/common/src/sync_infer_request.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,11 +179,24 @@ void SyncInferRequest::check_tensor(const ov::Output<const ov::Node>& port,
179179

180180
OPENVINO_ASSERT(tensor->is_continuous(), "The tensor is not continuous");
181181

182-
OPENVINO_ASSERT(port.get_element_type() == tensor->get_element_type(),
183-
"The tensor element type is not corresponding with output element type (",
184-
tensor->get_element_type(),
185-
" != ",
186-
port.get_element_type());
182+
if ((port.get_element_type() == ov::element::Type_t::boolean ||
183+
tensor->get_element_type() == ov::element::Type_t::boolean) &&
184+
port.get_element_type() != tensor->get_element_type()) {
185+
// Exception case for boolean treated as u8 in the NPU driver
186+
OPENVINO_ASSERT(
187+
port.get_element_type() == ov::element::Type_t::u8 || tensor->get_element_type() == ov::element::Type_t::u8,
188+
"The tensor element type is not corresponding with output element type (",
189+
tensor->get_element_type(),
190+
" != ",
191+
port.get_element_type());
192+
} else {
193+
OPENVINO_ASSERT(port.get_element_type() == tensor->get_element_type(),
194+
"The tensor element type is not corresponding with output element type (",
195+
tensor->get_element_type(),
196+
" != ",
197+
port.get_element_type());
198+
}
199+
187200
bool is_dynamic = port.get_partial_shape().is_dynamic();
188201
OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor->get_shape(),
189202
"The ",

src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp

Lines changed: 2 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "intel_npu/prefix.hpp"
1212
#include "intel_npu/utils/zero/zero_api.hpp"
1313
#include "intel_npu/utils/zero/zero_result.hpp"
14+
#include "intel_npu/utils/zero/zero_utils.hpp"
1415
#include "intel_npu/utils/zero/zero_wrappers.hpp"
1516
#include "openvino/core/dimension.hpp"
1617
#include "openvino/core/model.hpp"
@@ -35,61 +36,6 @@
3536

3637
#define UseCopyForNativeBinary(T) (T < ZE_GRAPH_EXT_VERSION_1_7)
3738

38-
namespace {
39-
40-
ov::element::Type_t toOVElementType(const ze_graph_argument_precision_t zeElementType) {
41-
switch (zeElementType) {
42-
case ZE_GRAPH_ARGUMENT_PRECISION_UNKNOWN:
43-
return ov::element::Type_t::dynamic;
44-
case ZE_GRAPH_ARGUMENT_PRECISION_DYNAMIC:
45-
return ov::element::Type_t::dynamic;
46-
case ZE_GRAPH_ARGUMENT_PRECISION_BOOLEAN:
47-
return ov::element::Type_t::boolean;
48-
case ZE_GRAPH_ARGUMENT_PRECISION_NF4:
49-
return ov::element::Type_t::nf4;
50-
case ZE_GRAPH_ARGUMENT_PRECISION_FP8_E4M3:
51-
return ov::element::Type_t::f8e4m3;
52-
case ZE_GRAPH_ARGUMENT_PRECISION_FP8_E5M2:
53-
return ov::element::Type_t::f8e5m2;
54-
case ZE_GRAPH_ARGUMENT_PRECISION_FP8_E8M0:
55-
return ov::element::Type_t::f8e8m0;
56-
case ZE_GRAPH_ARGUMENT_PRECISION_BF16:
57-
return ov::element::Type_t::bf16;
58-
case ZE_GRAPH_ARGUMENT_PRECISION_FP16:
59-
return ov::element::Type_t::f16;
60-
case ZE_GRAPH_ARGUMENT_PRECISION_FP32:
61-
return ov::element::Type_t::f32;
62-
case ZE_GRAPH_ARGUMENT_PRECISION_FP64:
63-
return ov::element::Type_t::f64;
64-
case ZE_GRAPH_ARGUMENT_PRECISION_INT4:
65-
return ov::element::Type_t::i4;
66-
case ZE_GRAPH_ARGUMENT_PRECISION_INT8:
67-
return ov::element::Type_t::i8;
68-
case ZE_GRAPH_ARGUMENT_PRECISION_INT16:
69-
return ov::element::Type_t::i16;
70-
case ZE_GRAPH_ARGUMENT_PRECISION_INT32:
71-
return ov::element::Type_t::i32;
72-
case ZE_GRAPH_ARGUMENT_PRECISION_INT64:
73-
return ov::element::Type_t::i64;
74-
case ZE_GRAPH_ARGUMENT_PRECISION_BIN:
75-
return ov::element::Type_t::u1;
76-
case ZE_GRAPH_ARGUMENT_PRECISION_UINT4:
77-
return ov::element::Type_t::u4;
78-
case ZE_GRAPH_ARGUMENT_PRECISION_UINT8:
79-
return ov::element::Type_t::u8;
80-
case ZE_GRAPH_ARGUMENT_PRECISION_UINT16:
81-
return ov::element::Type_t::u16;
82-
case ZE_GRAPH_ARGUMENT_PRECISION_UINT32:
83-
return ov::element::Type_t::u32;
84-
case ZE_GRAPH_ARGUMENT_PRECISION_UINT64:
85-
return ov::element::Type_t::u64;
86-
default:
87-
return ov::element::Type_t::dynamic;
88-
}
89-
}
90-
91-
} // namespace
92-
9339
namespace intel_npu {
9440

9541
ZeGraphExtWrappers::ZeGraphExtWrappers(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct)
@@ -402,7 +348,7 @@ ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const uint8_t& blobData, si
402348
*/
403349
static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
404350
const std::optional<ze_graph_argument_metadata_t>& metadata) {
405-
ov::element::Type_t precision = toOVElementType(arg.devicePrecision);
351+
ov::element::Type_t precision = zeroUtils::toOVElementType(arg.devicePrecision);
406352
ov::Shape shapeFromCompiler;
407353
ov::PartialShape shapeFromIRModel;
408354
std::unordered_set<std::string> outputTensorNames;

src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp

Lines changed: 41 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -63,129 +63,55 @@ static inline ze_command_queue_priority_t toZeQueuePriority(const ov::hint::Prio
6363
}
6464
}
6565

66-
static inline std::size_t precisionToSize(const ze_graph_argument_precision_t val) {
67-
switch (val) {
68-
case ZE_GRAPH_ARGUMENT_PRECISION_INT4:
69-
return 4;
70-
case ZE_GRAPH_ARGUMENT_PRECISION_UINT4:
71-
return 4;
72-
case ZE_GRAPH_ARGUMENT_PRECISION_INT8:
73-
return 8;
74-
case ZE_GRAPH_ARGUMENT_PRECISION_UINT8:
75-
return 8;
76-
case ZE_GRAPH_ARGUMENT_PRECISION_INT16:
77-
return 16;
78-
case ZE_GRAPH_ARGUMENT_PRECISION_UINT16:
79-
return 16;
80-
case ZE_GRAPH_ARGUMENT_PRECISION_INT32:
81-
return 32;
82-
case ZE_GRAPH_ARGUMENT_PRECISION_UINT32:
83-
return 32;
84-
case ZE_GRAPH_ARGUMENT_PRECISION_INT64:
85-
return 64;
86-
case ZE_GRAPH_ARGUMENT_PRECISION_UINT64:
87-
return 64;
66+
static inline ov::element::Type_t toOVElementType(const ze_graph_argument_precision_t zeElementType) {
67+
switch (zeElementType) {
68+
case ZE_GRAPH_ARGUMENT_PRECISION_UNKNOWN:
69+
return ov::element::Type_t::dynamic;
70+
case ZE_GRAPH_ARGUMENT_PRECISION_DYNAMIC:
71+
return ov::element::Type_t::dynamic;
72+
case ZE_GRAPH_ARGUMENT_PRECISION_BOOLEAN:
73+
return ov::element::Type_t::boolean;
8874
case ZE_GRAPH_ARGUMENT_PRECISION_NF4:
89-
return 4;
75+
return ov::element::Type_t::nf4;
76+
case ZE_GRAPH_ARGUMENT_PRECISION_FP8_E4M3:
77+
return ov::element::Type_t::f8e4m3;
78+
case ZE_GRAPH_ARGUMENT_PRECISION_FP8_E5M2:
79+
return ov::element::Type_t::f8e5m2;
80+
case ZE_GRAPH_ARGUMENT_PRECISION_FP8_E8M0:
81+
return ov::element::Type_t::f8e8m0;
9082
case ZE_GRAPH_ARGUMENT_PRECISION_BF16:
91-
return 16;
83+
return ov::element::Type_t::bf16;
9284
case ZE_GRAPH_ARGUMENT_PRECISION_FP16:
93-
return 16;
85+
return ov::element::Type_t::f16;
9486
case ZE_GRAPH_ARGUMENT_PRECISION_FP32:
95-
return 32;
87+
return ov::element::Type_t::f32;
9688
case ZE_GRAPH_ARGUMENT_PRECISION_FP64:
97-
return 64;
89+
return ov::element::Type_t::f64;
90+
case ZE_GRAPH_ARGUMENT_PRECISION_INT4:
91+
return ov::element::Type_t::i4;
92+
case ZE_GRAPH_ARGUMENT_PRECISION_INT8:
93+
return ov::element::Type_t::i8;
94+
case ZE_GRAPH_ARGUMENT_PRECISION_INT16:
95+
return ov::element::Type_t::i16;
96+
case ZE_GRAPH_ARGUMENT_PRECISION_INT32:
97+
return ov::element::Type_t::i32;
98+
case ZE_GRAPH_ARGUMENT_PRECISION_INT64:
99+
return ov::element::Type_t::i64;
98100
case ZE_GRAPH_ARGUMENT_PRECISION_BIN:
99-
return 1;
100-
default:
101-
OPENVINO_THROW("precisionToSize switch->default reached");
102-
}
103-
}
104-
105-
static inline ze_graph_argument_precision_t getZePrecision(const ov::element::Type_t precision) {
106-
switch (precision) {
107-
case ov::element::Type_t::i4:
108-
return ZE_GRAPH_ARGUMENT_PRECISION_INT4;
109-
case ov::element::Type_t::u4:
110-
return ZE_GRAPH_ARGUMENT_PRECISION_UINT4;
111-
case ov::element::Type_t::i8:
112-
return ZE_GRAPH_ARGUMENT_PRECISION_INT8;
113-
case ov::element::Type_t::u8:
114-
return ZE_GRAPH_ARGUMENT_PRECISION_UINT8;
115-
case ov::element::Type_t::i16:
116-
return ZE_GRAPH_ARGUMENT_PRECISION_INT16;
117-
case ov::element::Type_t::u16:
118-
return ZE_GRAPH_ARGUMENT_PRECISION_UINT16;
119-
case ov::element::Type_t::i32:
120-
return ZE_GRAPH_ARGUMENT_PRECISION_INT32;
121-
case ov::element::Type_t::u32:
122-
return ZE_GRAPH_ARGUMENT_PRECISION_UINT32;
123-
case ov::element::Type_t::i64:
124-
return ZE_GRAPH_ARGUMENT_PRECISION_INT64;
125-
case ov::element::Type_t::u64:
126-
return ZE_GRAPH_ARGUMENT_PRECISION_UINT64;
127-
case ov::element::Type_t::nf4:
128-
return ZE_GRAPH_ARGUMENT_PRECISION_NF4;
129-
case ov::element::Type_t::f8e4m3:
130-
return ZE_GRAPH_ARGUMENT_PRECISION_FP8_E4M3;
131-
case ov::element::Type_t::f8e5m2:
132-
return ZE_GRAPH_ARGUMENT_PRECISION_FP8_E5M2;
133-
case ov::element::Type_t::f8e8m0:
134-
return ZE_GRAPH_ARGUMENT_PRECISION_FP8_E8M0;
135-
case ov::element::Type_t::bf16:
136-
return ZE_GRAPH_ARGUMENT_PRECISION_BF16;
137-
case ov::element::Type_t::f16:
138-
return ZE_GRAPH_ARGUMENT_PRECISION_FP16;
139-
case ov::element::Type_t::f32:
140-
return ZE_GRAPH_ARGUMENT_PRECISION_FP32;
141-
case ov::element::Type_t::f64:
142-
return ZE_GRAPH_ARGUMENT_PRECISION_FP64;
143-
case ov::element::Type_t::u1:
144-
return ZE_GRAPH_ARGUMENT_PRECISION_BIN;
145-
default:
146-
return ZE_GRAPH_ARGUMENT_PRECISION_UNKNOWN;
147-
}
148-
}
149-
150-
static inline std::size_t layoutCount(const ze_graph_argument_layout_t val) {
151-
switch (val) {
152-
case ZE_GRAPH_ARGUMENT_LAYOUT_NCHW:
153-
return 4;
154-
case ZE_GRAPH_ARGUMENT_LAYOUT_NHWC:
155-
return 4;
156-
case ZE_GRAPH_ARGUMENT_LAYOUT_NCDHW:
157-
return 5;
158-
case ZE_GRAPH_ARGUMENT_LAYOUT_NDHWC:
159-
return 5;
160-
case ZE_GRAPH_ARGUMENT_LAYOUT_OIHW:
161-
return 4;
162-
case ZE_GRAPH_ARGUMENT_LAYOUT_C:
163-
return 1;
164-
case ZE_GRAPH_ARGUMENT_LAYOUT_CHW:
165-
return 3;
166-
case ZE_GRAPH_ARGUMENT_LAYOUT_HW:
167-
return 2;
168-
case ZE_GRAPH_ARGUMENT_LAYOUT_NC:
169-
return 2;
170-
case ZE_GRAPH_ARGUMENT_LAYOUT_CN:
171-
return 2;
172-
case ZE_GRAPH_ARGUMENT_LAYOUT_ANY:
173-
// When input has empty shape, val is ZE_GRAPH_ARGUMENT_LAYOUT_ANY
174-
// Add this to pass Single Layer Test on Windows
175-
return 0;
101+
return ov::element::Type_t::u1;
102+
case ZE_GRAPH_ARGUMENT_PRECISION_UINT4:
103+
return ov::element::Type_t::u4;
104+
case ZE_GRAPH_ARGUMENT_PRECISION_UINT8:
105+
return ov::element::Type_t::u8;
106+
case ZE_GRAPH_ARGUMENT_PRECISION_UINT16:
107+
return ov::element::Type_t::u16;
108+
case ZE_GRAPH_ARGUMENT_PRECISION_UINT32:
109+
return ov::element::Type_t::u32;
110+
case ZE_GRAPH_ARGUMENT_PRECISION_UINT64:
111+
return ov::element::Type_t::u64;
176112
default:
177-
OPENVINO_THROW("layoutCount switch->default reached");
178-
}
179-
}
180-
181-
static inline std::size_t getSizeIOBytes(const ze_graph_argument_properties_3_t& argument) {
182-
std::size_t num_elements = 1;
183-
for (std::size_t i = 0; i < layoutCount(argument.deviceLayout); ++i) {
184-
num_elements *= argument.dims[i];
113+
return ov::element::Type_t::dynamic;
185114
}
186-
const std::size_t size_in_bits = num_elements * precisionToSize(argument.devicePrecision);
187-
const std::size_t size_in_bytes = (size_in_bits + (CHAR_BIT - 1)) / CHAR_BIT;
188-
return size_in_bytes;
189115
}
190116

191117
static inline uint32_t findCommandQueueGroupOrdinal(

0 commit comments

Comments
 (0)