|
12 | 12 | #include "core/providers/openvino/onnx_ctx_model_helper.h" |
13 | 13 | #include "core/providers/openvino/ov_versions/capability.h" |
14 | 14 | #include "core/providers/openvino/qdq_transformations/qdq_stripping.h" |
| 15 | +#include "core/providers/openvino/exceptions.h" |
15 | 16 | #include "core/session/onnxruntime_session_options_config_keys.h" |
16 | 17 | #include "openvino/core/version.hpp" |
17 | 18 | #ifdef USE_OVEP_NPU_MEMORY |
@@ -94,124 +95,128 @@ common::Status OpenVINOExecutionProvider::Compile( |
94 | 95 | auto& logger = *GetLogger(); |
95 | 96 | Status status = Status::OK(); |
96 | 97 |
|
97 | | - bool is_epctx_model = false; |
98 | | - if (!fused_nodes.empty()) { |
99 | | - // Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext |
100 | | - const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get(); |
101 | | - session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string(); |
102 | | - session_context_.onnx_opset_version = |
103 | | - graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain); |
104 | | - |
105 | | - // OVIR wrapped in epctx should be treated as source but this code does not |
106 | | - // This corner case is not in use and will be addressed in a future commit |
107 | | - is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0); |
108 | | - } |
109 | | - |
110 | | - // The block below is executed during EP context model inference |
111 | | - auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory |
112 | | - if (session_context_.so_share_ep_contexts && |
113 | | - is_epctx_model && |
114 | | - metadata.empty()) { |
115 | | - fs::path context_model_file_path = session_context_.so_context_file_path; |
116 | | - if (context_model_file_path.empty()) { |
117 | | - // If ep.context_file_path is not set the input model path is used |
118 | | - context_model_file_path = session_context_.onnx_model_path_name; |
| 98 | + try { |
| 99 | + bool is_epctx_model = false; |
| 100 | + if (!fused_nodes.empty()) { |
| 101 | + // Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext |
| 102 | + const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get(); |
| 103 | + session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string(); |
| 104 | + session_context_.onnx_opset_version = |
| 105 | + graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain); |
| 106 | + |
| 107 | + // OVIR wrapped in epctx should be treated as source but this code does not |
| 108 | + // This corner case is not in use and will be addressed in a future commit |
| 109 | + is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0); |
119 | 110 | } |
120 | 111 |
|
121 | | - // Metadata is always read from model location, this could be a source or epctx model |
122 | | - fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin"; |
123 | | - fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename; |
124 | | - std::ifstream file(metadata_file_path, std::ios::binary); |
125 | | - ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string()); |
126 | | - shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path); |
127 | | - file >> metadata; |
128 | | - } |
129 | | - |
130 | | - struct OpenVINOEPFunctionState { |
131 | | - AllocateFunc allocate_func = nullptr; |
132 | | - DestroyFunc destroy_func = nullptr; |
133 | | - AllocatorHandle allocator_handle = nullptr; |
134 | | - BackendManager& backend_manager; |
135 | | - }; |
136 | | - |
137 | | - for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) { |
138 | | - const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph; |
139 | | - const Node& fused_node = fused_node_graph.fused_node; |
140 | | - |
141 | | - NodeComputeInfo compute_info; |
142 | | - |
143 | | - // During backend creation, we check if user wants to use precompiled blob onnx model or the original model |
144 | | - // For precompiled blob, directly load the model instead of compiling the model |
145 | | - // For original model, check if the user wants to export a model with pre-compiled blob |
146 | | - |
147 | | - auto& backend_manager = backend_managers_.emplace_back(session_context_, |
148 | | - *shared_context_, |
149 | | - fused_node, |
150 | | - graph_body_viewer, |
151 | | - logger, |
152 | | - ep_ctx_handle_); |
153 | | - |
154 | | - compute_info.create_state_func = |
155 | | - [&backend_manager](ComputeContext* context, FunctionState* state) { |
156 | | - OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{ |
157 | | - .allocate_func = context->allocate_func, |
158 | | - .destroy_func = context->release_func, |
159 | | - .allocator_handle = context->allocator_handle, |
160 | | - .backend_manager = backend_manager}; |
161 | | - *state = static_cast<FunctionState>(p); |
162 | | - return 0; |
163 | | - }; |
164 | | - |
165 | | - compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) { |
166 | | - auto function_state = static_cast<OpenVINOEPFunctionState*>(state); |
167 | | - try { |
168 | | - function_state->backend_manager.Compute(context); |
169 | | - } catch (const std::exception& ex) { |
170 | | - return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what()); |
| 112 | + // The block below is executed during EP context model inference |
| 113 | + auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory |
| 114 | + if (session_context_.so_share_ep_contexts && |
| 115 | + is_epctx_model && |
| 116 | + metadata.empty()) { |
| 117 | + fs::path context_model_file_path = session_context_.so_context_file_path; |
| 118 | + if (context_model_file_path.empty()) { |
| 119 | + // If ep.context_file_path is not set the input model path is used |
| 120 | + context_model_file_path = session_context_.onnx_model_path_name; |
171 | 121 | } |
172 | | - return Status::OK(); |
| 122 | + |
| 123 | + // Metadata is always read from model location, this could be a source or epctx model |
| 124 | + fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin"; |
| 125 | + fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename; |
| 126 | + std::ifstream file(metadata_file_path, std::ios::binary); |
| 127 | + ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string()); |
| 128 | + shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path); |
| 129 | + file >> metadata; |
| 130 | + } |
| 131 | + |
| 132 | + struct OpenVINOEPFunctionState { |
| 133 | + AllocateFunc allocate_func = nullptr; |
| 134 | + DestroyFunc destroy_func = nullptr; |
| 135 | + AllocatorHandle allocator_handle = nullptr; |
| 136 | + BackendManager& backend_manager; |
173 | 137 | }; |
174 | 138 |
|
175 | | - compute_info.release_state_func = |
176 | | - [](FunctionState state) { |
177 | | - if (state) { |
178 | | - OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state); |
179 | | - delete function_state; |
180 | | - } |
181 | | - }; |
| 139 | + for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) { |
| 140 | + const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph; |
| 141 | + const Node& fused_node = fused_node_graph.fused_node; |
| 142 | + |
| 143 | + NodeComputeInfo compute_info; |
| 144 | + |
| 145 | + // During backend creation, we check if user wants to use precompiled blob onnx model or the original model |
| 146 | + // For precompiled blob, directly load the model instead of compiling the model |
| 147 | + // For original model, check if the user wants to export a model with pre-compiled blob |
| 148 | + |
| 149 | + auto& backend_manager = backend_managers_.emplace_back(session_context_, |
| 150 | + *shared_context_, |
| 151 | + fused_node, |
| 152 | + graph_body_viewer, |
| 153 | + logger, |
| 154 | + ep_ctx_handle_); |
| 155 | + |
| 156 | + compute_info.create_state_func = |
| 157 | + [&backend_manager](ComputeContext* context, FunctionState* state) { |
| 158 | + OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{ |
| 159 | + .allocate_func = context->allocate_func, |
| 160 | + .destroy_func = context->release_func, |
| 161 | + .allocator_handle = context->allocator_handle, |
| 162 | + .backend_manager = backend_manager}; |
| 163 | + *state = static_cast<FunctionState>(p); |
| 164 | + return 0; |
| 165 | + }; |
| 166 | + |
| 167 | + compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) { |
| 168 | + auto function_state = static_cast<OpenVINOEPFunctionState*>(state); |
| 169 | + try { |
| 170 | + function_state->backend_manager.Compute(context); |
| 171 | + } catch (const std::exception& ex) { |
| 172 | + return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what()); |
| 173 | + } |
| 174 | + return Status::OK(); |
| 175 | + }; |
182 | 176 |
|
183 | | - node_compute_funcs.push_back(std::move(compute_info)); |
| 177 | + compute_info.release_state_func = |
| 178 | + [](FunctionState state) { |
| 179 | + if (state) { |
| 180 | + OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state); |
| 181 | + delete function_state; |
| 182 | + } |
| 183 | + }; |
184 | 184 |
|
185 | | - if (!status.IsOK()) { |
186 | | - break; |
| 185 | + node_compute_funcs.push_back(std::move(compute_info)); |
| 186 | + |
| 187 | + if (!status.IsOK()) { |
| 188 | + break; |
| 189 | + } |
187 | 190 | } |
188 | | - } |
189 | 191 |
|
190 | | - // The block below is executed during EP context model generation |
191 | | - if (session_context_.so_context_enable && |
192 | | - session_context_.so_share_ep_contexts && |
193 | | - !metadata.empty()) { |
194 | | - // For models after the first the metadata name comes from the shared context |
195 | | - fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath; |
196 | | - if (metadata_file_path.empty()) { |
197 | | - metadata_file_path = session_context_.so_context_file_path; |
198 | | - std::string name_append{"_metadata.bin"}; |
| 192 | + // The block below is executed during EP context model generation |
| 193 | + if (session_context_.so_context_enable && |
| 194 | + session_context_.so_share_ep_contexts && |
| 195 | + !metadata.empty()) { |
| 196 | + // For models after the first the metadata name comes from the shared context |
| 197 | + fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath; |
199 | 198 | if (metadata_file_path.empty()) { |
200 | | - metadata_file_path = session_context_.onnx_model_path_name; |
201 | | - name_append = "_ctx" + name_append; |
| 199 | + metadata_file_path = session_context_.so_context_file_path; |
| 200 | + std::string name_append{"_metadata.bin"}; |
| 201 | + if (metadata_file_path.empty()) { |
| 202 | + metadata_file_path = session_context_.onnx_model_path_name; |
| 203 | + name_append = "_ctx" + name_append; |
| 204 | + } |
| 205 | + auto metadata_filename = metadata_file_path.stem().string() + name_append; |
| 206 | + metadata_file_path.replace_filename(metadata_filename); |
| 207 | + shared_context_->shared_weights.metadata_filepath = metadata_file_path; |
202 | 208 | } |
203 | | - auto metadata_filename = metadata_file_path.stem().string() + name_append; |
204 | | - metadata_file_path.replace_filename(metadata_filename); |
205 | | - shared_context_->shared_weights.metadata_filepath = metadata_file_path; |
206 | | - } |
207 | 209 |
|
208 | | - // Metadata is generated only for shared contexts |
209 | | - // If saving metadata then save it to the provided path or use the original model path |
210 | | - // Multiple calls to Compile() will update the metadata and for the last call |
211 | | - // the resulting file will contain the aggregated content |
212 | | - std::ofstream file{metadata_file_path, std::ios::binary}; |
213 | | - ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path); |
214 | | - file << metadata; |
| 210 | + // Metadata is generated only for shared contexts |
| 211 | + // If saving metadata then save it to the provided path or use the original model path |
| 212 | + // Multiple calls to Compile() will update the metadata and for the last call |
| 213 | + // the resulting file will contain the aggregated content |
| 214 | + std::ofstream file{metadata_file_path, std::ios::binary}; |
| 215 | + ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path); |
| 216 | + file << metadata; |
| 217 | + } |
| 218 | + } catch (const ovep_exception& ex) { |
| 219 | + status = ex; |
215 | 220 | } |
216 | 221 |
|
217 | 222 | return status; |
|
0 commit comments