|
12 | 12 | #include "core/providers/openvino/onnx_ctx_model_helper.h"
|
13 | 13 | #include "core/providers/openvino/ov_versions/capability.h"
|
14 | 14 | #include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
|
| 15 | +#include "core/providers/openvino/exceptions.h" |
15 | 16 | #include "core/session/onnxruntime_session_options_config_keys.h"
|
16 | 17 | #include "openvino/core/version.hpp"
|
17 | 18 | #ifdef USE_OVEP_NPU_MEMORY
|
@@ -94,124 +95,128 @@ common::Status OpenVINOExecutionProvider::Compile(
|
94 | 95 | auto& logger = *GetLogger();
|
95 | 96 | Status status = Status::OK();
|
96 | 97 |
|
97 |
| - bool is_epctx_model = false; |
98 |
| - if (!fused_nodes.empty()) { |
99 |
| - // Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext |
100 |
| - const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get(); |
101 |
| - session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string(); |
102 |
| - session_context_.onnx_opset_version = |
103 |
| - graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain); |
104 |
| - |
105 |
| - // OVIR wrapped in epctx should be treated as source but this code does not |
106 |
| - // This corner case is not in use and will be addressed in a future commit |
107 |
| - is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0); |
108 |
| - } |
109 |
| - |
110 |
| - // The block below is executed during EP context model inference |
111 |
| - auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory |
112 |
| - if (session_context_.so_share_ep_contexts && |
113 |
| - is_epctx_model && |
114 |
| - metadata.empty()) { |
115 |
| - fs::path context_model_file_path = session_context_.so_context_file_path; |
116 |
| - if (context_model_file_path.empty()) { |
117 |
| - // If ep.context_file_path is not set the input model path is used |
118 |
| - context_model_file_path = session_context_.onnx_model_path_name; |
| 98 | + try { |
| 99 | + bool is_epctx_model = false; |
| 100 | + if (!fused_nodes.empty()) { |
| 101 | + // Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext |
| 102 | + const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get(); |
| 103 | + session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string(); |
| 104 | + session_context_.onnx_opset_version = |
| 105 | + graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain); |
| 106 | + |
| 107 | + // OVIR wrapped in epctx should be treated as source but this code does not |
| 108 | + // This corner case is not in use and will be addressed in a future commit |
| 109 | + is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0); |
119 | 110 | }
|
120 | 111 |
|
121 |
| - // Metadata is always read from model location, this could be a source or epctx model |
122 |
| - fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin"; |
123 |
| - fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename; |
124 |
| - std::ifstream file(metadata_file_path, std::ios::binary); |
125 |
| - ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string()); |
126 |
| - shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path); |
127 |
| - file >> metadata; |
128 |
| - } |
129 |
| - |
130 |
| - struct OpenVINOEPFunctionState { |
131 |
| - AllocateFunc allocate_func = nullptr; |
132 |
| - DestroyFunc destroy_func = nullptr; |
133 |
| - AllocatorHandle allocator_handle = nullptr; |
134 |
| - BackendManager& backend_manager; |
135 |
| - }; |
136 |
| - |
137 |
| - for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) { |
138 |
| - const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph; |
139 |
| - const Node& fused_node = fused_node_graph.fused_node; |
140 |
| - |
141 |
| - NodeComputeInfo compute_info; |
142 |
| - |
143 |
| - // During backend creation, we check if user wants to use precompiled blob onnx model or the original model |
144 |
| - // For precompiled blob, directly load the model instead of compiling the model |
145 |
| - // For original model, check if the user wants to export a model with pre-compiled blob |
146 |
| - |
147 |
| - auto& backend_manager = backend_managers_.emplace_back(session_context_, |
148 |
| - *shared_context_, |
149 |
| - fused_node, |
150 |
| - graph_body_viewer, |
151 |
| - logger, |
152 |
| - ep_ctx_handle_); |
153 |
| - |
154 |
| - compute_info.create_state_func = |
155 |
| - [&backend_manager](ComputeContext* context, FunctionState* state) { |
156 |
| - OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{ |
157 |
| - .allocate_func = context->allocate_func, |
158 |
| - .destroy_func = context->release_func, |
159 |
| - .allocator_handle = context->allocator_handle, |
160 |
| - .backend_manager = backend_manager}; |
161 |
| - *state = static_cast<FunctionState>(p); |
162 |
| - return 0; |
163 |
| - }; |
164 |
| - |
165 |
| - compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) { |
166 |
| - auto function_state = static_cast<OpenVINOEPFunctionState*>(state); |
167 |
| - try { |
168 |
| - function_state->backend_manager.Compute(context); |
169 |
| - } catch (const std::exception& ex) { |
170 |
| - return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what()); |
| 112 | + // The block below is executed during EP context model inference |
| 113 | + auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory |
| 114 | + if (session_context_.so_share_ep_contexts && |
| 115 | + is_epctx_model && |
| 116 | + metadata.empty()) { |
| 117 | + fs::path context_model_file_path = session_context_.so_context_file_path; |
| 118 | + if (context_model_file_path.empty()) { |
| 119 | + // If ep.context_file_path is not set the input model path is used |
| 120 | + context_model_file_path = session_context_.onnx_model_path_name; |
171 | 121 | }
|
172 |
| - return Status::OK(); |
| 122 | + |
| 123 | + // Metadata is always read from model location, this could be a source or epctx model |
| 124 | + fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin"; |
| 125 | + fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename; |
| 126 | + std::ifstream file(metadata_file_path, std::ios::binary); |
| 127 | + ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string()); |
| 128 | + shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path); |
| 129 | + file >> metadata; |
| 130 | + } |
| 131 | + |
| 132 | + struct OpenVINOEPFunctionState { |
| 133 | + AllocateFunc allocate_func = nullptr; |
| 134 | + DestroyFunc destroy_func = nullptr; |
| 135 | + AllocatorHandle allocator_handle = nullptr; |
| 136 | + BackendManager& backend_manager; |
173 | 137 | };
|
174 | 138 |
|
175 |
| - compute_info.release_state_func = |
176 |
| - [](FunctionState state) { |
177 |
| - if (state) { |
178 |
| - OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state); |
179 |
| - delete function_state; |
180 |
| - } |
181 |
| - }; |
| 139 | + for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) { |
| 140 | + const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph; |
| 141 | + const Node& fused_node = fused_node_graph.fused_node; |
| 142 | + |
| 143 | + NodeComputeInfo compute_info; |
| 144 | + |
| 145 | + // During backend creation, we check if user wants to use precompiled blob onnx model or the original model |
| 146 | + // For precompiled blob, directly load the model instead of compiling the model |
| 147 | + // For original model, check if the user wants to export a model with pre-compiled blob |
| 148 | + |
| 149 | + auto& backend_manager = backend_managers_.emplace_back(session_context_, |
| 150 | + *shared_context_, |
| 151 | + fused_node, |
| 152 | + graph_body_viewer, |
| 153 | + logger, |
| 154 | + ep_ctx_handle_); |
| 155 | + |
| 156 | + compute_info.create_state_func = |
| 157 | + [&backend_manager](ComputeContext* context, FunctionState* state) { |
| 158 | + OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{ |
| 159 | + .allocate_func = context->allocate_func, |
| 160 | + .destroy_func = context->release_func, |
| 161 | + .allocator_handle = context->allocator_handle, |
| 162 | + .backend_manager = backend_manager}; |
| 163 | + *state = static_cast<FunctionState>(p); |
| 164 | + return 0; |
| 165 | + }; |
| 166 | + |
| 167 | + compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) { |
| 168 | + auto function_state = static_cast<OpenVINOEPFunctionState*>(state); |
| 169 | + try { |
| 170 | + function_state->backend_manager.Compute(context); |
| 171 | + } catch (const std::exception& ex) { |
| 172 | + return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what()); |
| 173 | + } |
| 174 | + return Status::OK(); |
| 175 | + }; |
182 | 176 |
|
183 |
| - node_compute_funcs.push_back(std::move(compute_info)); |
| 177 | + compute_info.release_state_func = |
| 178 | + [](FunctionState state) { |
| 179 | + if (state) { |
| 180 | + OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state); |
| 181 | + delete function_state; |
| 182 | + } |
| 183 | + }; |
184 | 184 |
|
185 |
| - if (!status.IsOK()) { |
186 |
| - break; |
| 185 | + node_compute_funcs.push_back(std::move(compute_info)); |
| 186 | + |
| 187 | + if (!status.IsOK()) { |
| 188 | + break; |
| 189 | + } |
187 | 190 | }
|
188 |
| - } |
189 | 191 |
|
190 |
| - // The block below is executed during EP context model generation |
191 |
| - if (session_context_.so_context_enable && |
192 |
| - session_context_.so_share_ep_contexts && |
193 |
| - !metadata.empty()) { |
194 |
| - // For models after the first the metadata name comes from the shared context |
195 |
| - fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath; |
196 |
| - if (metadata_file_path.empty()) { |
197 |
| - metadata_file_path = session_context_.so_context_file_path; |
198 |
| - std::string name_append{"_metadata.bin"}; |
| 192 | + // The block below is executed during EP context model generation |
| 193 | + if (session_context_.so_context_enable && |
| 194 | + session_context_.so_share_ep_contexts && |
| 195 | + !metadata.empty()) { |
| 196 | + // For models after the first the metadata name comes from the shared context |
| 197 | + fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath; |
199 | 198 | if (metadata_file_path.empty()) {
|
200 |
| - metadata_file_path = session_context_.onnx_model_path_name; |
201 |
| - name_append = "_ctx" + name_append; |
| 199 | + metadata_file_path = session_context_.so_context_file_path; |
| 200 | + std::string name_append{"_metadata.bin"}; |
| 201 | + if (metadata_file_path.empty()) { |
| 202 | + metadata_file_path = session_context_.onnx_model_path_name; |
| 203 | + name_append = "_ctx" + name_append; |
| 204 | + } |
| 205 | + auto metadata_filename = metadata_file_path.stem().string() + name_append; |
| 206 | + metadata_file_path.replace_filename(metadata_filename); |
| 207 | + shared_context_->shared_weights.metadata_filepath = metadata_file_path; |
202 | 208 | }
|
203 |
| - auto metadata_filename = metadata_file_path.stem().string() + name_append; |
204 |
| - metadata_file_path.replace_filename(metadata_filename); |
205 |
| - shared_context_->shared_weights.metadata_filepath = metadata_file_path; |
206 |
| - } |
207 | 209 |
|
208 |
| - // Metadata is generated only for shared contexts |
209 |
| - // If saving metadata then save it to the provided path or use the original model path |
210 |
| - // Multiple calls to Compile() will update the metadata and for the last call |
211 |
| - // the resulting file will contain the aggregated content |
212 |
| - std::ofstream file{metadata_file_path, std::ios::binary}; |
213 |
| - ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path); |
214 |
| - file << metadata; |
| 210 | + // Metadata is generated only for shared contexts |
| 211 | + // If saving metadata then save it to the provided path or use the original model path |
| 212 | + // Multiple calls to Compile() will update the metadata and for the last call |
| 213 | + // the resulting file will contain the aggregated content |
| 214 | + std::ofstream file{metadata_file_path, std::ios::binary}; |
| 215 | + ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path); |
| 216 | + file << metadata; |
| 217 | + } |
| 218 | + } catch (const ovep_exception& ex) { |
| 219 | + status = ex; |
215 | 220 | }
|
216 | 221 |
|
217 | 222 | return status;
|
|
0 commit comments