Skip to content

Commit 93a6e38

Browse files
committed
Catch model import failure and report the appropriate error
1 parent e634654 commit 93a6e38

File tree

4 files changed

+558
-462
lines changed

4 files changed

+558
-462
lines changed

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "core/providers/openvino/ov_interface.h"
2121
#include "core/providers/openvino/ov_versions/capability.h"
2222
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
23+
#include "core/providers/openvino/exceptions.h"
2324
#include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h"
2425

2526
namespace onnxruntime {
@@ -154,30 +155,31 @@ BackendManager::BackendManager(SessionContext& session_context,
154155
subgraph_context_,
155156
shared_context_,
156157
model_stream);
157-
} catch (const OnnxRuntimeException& ex) {
158-
std::string exception_str = ex.what();
159-
160-
if (session_context_.device_type.find("NPU") != std::string::npos &&
161-
exception_str.find("intel_npu") != std::string::npos) {
162-
// Handle NPU device related errors
163-
#ifndef NDEBUG
164-
ORT_THROW(exception_str + "\nModel needs to be recompiled\n");
165-
#else
166-
std::string error_message = "UNKNOWN NPU ERROR";
167-
std::string error_code = "code 0x0";
168-
std::regex error_message_pattern(R"(\bZE_\w*\b)");
169-
std::regex error_code_pattern("code 0x[0-9a-fA-F]+");
170-
std::smatch matches;
171-
if (std::regex_search(exception_str, matches, error_message_pattern)) {
172-
error_message = matches[0];
173-
}
174-
if (std::regex_search(exception_str, matches, error_code_pattern)) {
175-
error_code = matches[0];
158+
} catch (const ovep_exception& ex) {
159+
#ifndef OPENVINO_DISABLE_NPU_FALLBACK
160+
bool eligible_for_cpu_fallback = device_type.find("NPU") != std::string::npos &&
161+
!session_context_.so_disable_cpu_ep_fallback &&
162+
!subgraph_context_.is_ep_ctx_graph;
163+
if (eligible_for_cpu_fallback) {
164+
std::string exception_str = ex.what();
165+
LOGS_DEFAULT(VERBOSE) << exception_str;
166+
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
167+
<< "Falling back to OV CPU for execution";
168+
session_context_.device_type = "CPU";
169+
session_context_.precision = "FP32";
170+
try {
171+
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
172+
session_context_,
173+
subgraph_context_,
174+
shared_context_,
175+
model_stream);
176+
} catch (std::string const& msg) {
177+
ORT_THROW(msg);
176178
}
177-
throw std::runtime_error(error_message + ", " + error_code + "\nModel needs to be recompiled\n");
179+
} else
178180
#endif
179-
} else {
180-
ORT_THROW(exception_str);
181+
{
182+
throw ex;
181183
}
182184
}
183185
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// Copyright (C) Intel Corporation
2+
// Licensed under the MIT License
3+
4+
#pragma once
5+
6+
#include <exception>
7+
#include <regex>
8+
#include <string>
9+
10+
#include "core/common/status.h"
11+
12+
namespace onnxruntime {
13+
namespace openvino_ep {
14+
15+
struct ovep_exception : public std::exception {
16+
enum class type {
17+
compile_model,
18+
import_model,
19+
query_prop,
20+
read_model,
21+
unknown,
22+
};
23+
24+
ovep_exception(const std::string& message,
25+
enum class type type) : message_{message},
26+
type_{type},
27+
error_code_{ze_result_code_from_string(message)},
28+
error_name_{ze_result_name_from_string(message)} {}
29+
30+
const char* what() const noexcept override {
31+
return message_.data();
32+
}
33+
34+
uint32_t get_code() const { return error_code_; }
35+
36+
operator common::Status() const {
37+
common::StatusCategory category_ort{common::ONNXRUNTIME};
38+
39+
if (type_ == type::unknown) {
40+
return {category_ort, common::FAIL, message_};
41+
}
42+
43+
// Newer drivers
44+
if ((type_ == type::import_model) &&
45+
(error_code_ == 0x7800000f /* ZE_RESULT_ERROR_INVALID_NATIVE_BINARY */)) {
46+
std::string message{error_name_ + ", code 0x" + std::to_string(error_code_) + "\nModel needs to be recompiled\n"};
47+
return {category_ort, common::INVALID_GRAPH, message};
48+
}
49+
50+
std::string error_message = "Unhandled exception type: " + std::to_string(static_cast<int>(type_));
51+
return {category_ort, common::FAIL, error_message};
52+
}
53+
54+
protected:
55+
std::string message_;
56+
type type_{type::unknown};
57+
uint32_t error_code_{0};
58+
std::string error_name_;
59+
60+
private:
61+
uint32_t ze_result_code_from_string(const std::string& ov_exception_string) {
62+
uint32_t error_code{0};
63+
std::regex error_code_pattern("code 0x([0-9a-fA-F]+)");
64+
std::smatch matches;
65+
if (std::regex_search(ov_exception_string, matches, error_code_pattern)) {
66+
std::from_chars(&(*matches[1].first), &(*matches[1].second), error_code, 16);
67+
}
68+
return error_code;
69+
}
70+
std::string ze_result_name_from_string(const std::string& ov_exception_string) {
71+
std::string error_message = "UNKNOWN NPU ERROR";
72+
std::regex error_message_pattern(R"(\bZE_\w*\b)");
73+
std::smatch matches;
74+
if (std::regex_search(ov_exception_string, matches, error_message_pattern)) {
75+
error_message = matches[0];
76+
}
77+
return error_message;
78+
}
79+
};
80+
81+
} // namespace openvino_ep
82+
} // namespace onnxruntime

onnxruntime/core/providers/openvino/openvino_execution_provider.cc

Lines changed: 111 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "core/providers/openvino/onnx_ctx_model_helper.h"
1313
#include "core/providers/openvino/ov_versions/capability.h"
1414
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
15+
#include "core/providers/openvino/exceptions.h"
1516
#include "core/session/onnxruntime_session_options_config_keys.h"
1617
#include "openvino/core/version.hpp"
1718
#ifdef USE_OVEP_NPU_MEMORY
@@ -94,124 +95,128 @@ common::Status OpenVINOExecutionProvider::Compile(
9495
auto& logger = *GetLogger();
9596
Status status = Status::OK();
9697

97-
bool is_epctx_model = false;
98-
if (!fused_nodes.empty()) {
99-
// Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
100-
const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
101-
session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
102-
session_context_.onnx_opset_version =
103-
graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);
104-
105-
// OVIR wrapped in epctx should be treated as source but this code does not
106-
// This corner case is not in use and will be addressed in a future commit
107-
is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
108-
}
109-
110-
// The block below is executed during EP context model inference
111-
auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory
112-
if (session_context_.so_share_ep_contexts &&
113-
is_epctx_model &&
114-
metadata.empty()) {
115-
fs::path context_model_file_path = session_context_.so_context_file_path;
116-
if (context_model_file_path.empty()) {
117-
// If ep.context_file_path is not set the input model path is used
118-
context_model_file_path = session_context_.onnx_model_path_name;
98+
try {
99+
bool is_epctx_model = false;
100+
if (!fused_nodes.empty()) {
101+
// Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
102+
const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
103+
session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
104+
session_context_.onnx_opset_version =
105+
graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);
106+
107+
// OVIR wrapped in epctx should be treated as source but this code does not
108+
// This corner case is not in use and will be addressed in a future commit
109+
is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
119110
}
120111

121-
// Metadata is always read from model location, this could be a source or epctx model
122-
fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin";
123-
fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename;
124-
std::ifstream file(metadata_file_path, std::ios::binary);
125-
ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string());
126-
shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path);
127-
file >> metadata;
128-
}
129-
130-
struct OpenVINOEPFunctionState {
131-
AllocateFunc allocate_func = nullptr;
132-
DestroyFunc destroy_func = nullptr;
133-
AllocatorHandle allocator_handle = nullptr;
134-
BackendManager& backend_manager;
135-
};
136-
137-
for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
138-
const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
139-
const Node& fused_node = fused_node_graph.fused_node;
140-
141-
NodeComputeInfo compute_info;
142-
143-
// During backend creation, we check if user wants to use precompiled blob onnx model or the original model
144-
// For precompiled blob, directly load the model instead of compiling the model
145-
// For original model, check if the user wants to export a model with pre-compiled blob
146-
147-
auto& backend_manager = backend_managers_.emplace_back(session_context_,
148-
*shared_context_,
149-
fused_node,
150-
graph_body_viewer,
151-
logger,
152-
ep_ctx_handle_);
153-
154-
compute_info.create_state_func =
155-
[&backend_manager](ComputeContext* context, FunctionState* state) {
156-
OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
157-
.allocate_func = context->allocate_func,
158-
.destroy_func = context->release_func,
159-
.allocator_handle = context->allocator_handle,
160-
.backend_manager = backend_manager};
161-
*state = static_cast<FunctionState>(p);
162-
return 0;
163-
};
164-
165-
compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
166-
auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
167-
try {
168-
function_state->backend_manager.Compute(context);
169-
} catch (const std::exception& ex) {
170-
return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
112+
// The block below is executed during EP context model inference
113+
auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory
114+
if (session_context_.so_share_ep_contexts &&
115+
is_epctx_model &&
116+
metadata.empty()) {
117+
fs::path context_model_file_path = session_context_.so_context_file_path;
118+
if (context_model_file_path.empty()) {
119+
// If ep.context_file_path is not set the input model path is used
120+
context_model_file_path = session_context_.onnx_model_path_name;
171121
}
172-
return Status::OK();
122+
123+
// Metadata is always read from model location, this could be a source or epctx model
124+
fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin";
125+
fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename;
126+
std::ifstream file(metadata_file_path, std::ios::binary);
127+
ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string());
128+
shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path);
129+
file >> metadata;
130+
}
131+
132+
struct OpenVINOEPFunctionState {
133+
AllocateFunc allocate_func = nullptr;
134+
DestroyFunc destroy_func = nullptr;
135+
AllocatorHandle allocator_handle = nullptr;
136+
BackendManager& backend_manager;
173137
};
174138

175-
compute_info.release_state_func =
176-
[](FunctionState state) {
177-
if (state) {
178-
OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
179-
delete function_state;
180-
}
181-
};
139+
for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
140+
const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
141+
const Node& fused_node = fused_node_graph.fused_node;
142+
143+
NodeComputeInfo compute_info;
144+
145+
// During backend creation, we check if user wants to use precompiled blob onnx model or the original model
146+
// For precompiled blob, directly load the model instead of compiling the model
147+
// For original model, check if the user wants to export a model with pre-compiled blob
148+
149+
auto& backend_manager = backend_managers_.emplace_back(session_context_,
150+
*shared_context_,
151+
fused_node,
152+
graph_body_viewer,
153+
logger,
154+
ep_ctx_handle_);
155+
156+
compute_info.create_state_func =
157+
[&backend_manager](ComputeContext* context, FunctionState* state) {
158+
OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
159+
.allocate_func = context->allocate_func,
160+
.destroy_func = context->release_func,
161+
.allocator_handle = context->allocator_handle,
162+
.backend_manager = backend_manager};
163+
*state = static_cast<FunctionState>(p);
164+
return 0;
165+
};
166+
167+
compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
168+
auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
169+
try {
170+
function_state->backend_manager.Compute(context);
171+
} catch (const std::exception& ex) {
172+
return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
173+
}
174+
return Status::OK();
175+
};
182176

183-
node_compute_funcs.push_back(std::move(compute_info));
177+
compute_info.release_state_func =
178+
[](FunctionState state) {
179+
if (state) {
180+
OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
181+
delete function_state;
182+
}
183+
};
184184

185-
if (!status.IsOK()) {
186-
break;
185+
node_compute_funcs.push_back(std::move(compute_info));
186+
187+
if (!status.IsOK()) {
188+
break;
189+
}
187190
}
188-
}
189191

190-
// The block below is executed during EP context model generation
191-
if (session_context_.so_context_enable &&
192-
session_context_.so_share_ep_contexts &&
193-
!metadata.empty()) {
194-
// For models after the first the metadata name comes from the shared context
195-
fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath;
196-
if (metadata_file_path.empty()) {
197-
metadata_file_path = session_context_.so_context_file_path;
198-
std::string name_append{"_metadata.bin"};
192+
// The block below is executed during EP context model generation
193+
if (session_context_.so_context_enable &&
194+
session_context_.so_share_ep_contexts &&
195+
!metadata.empty()) {
196+
// For models after the first the metadata name comes from the shared context
197+
fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath;
199198
if (metadata_file_path.empty()) {
200-
metadata_file_path = session_context_.onnx_model_path_name;
201-
name_append = "_ctx" + name_append;
199+
metadata_file_path = session_context_.so_context_file_path;
200+
std::string name_append{"_metadata.bin"};
201+
if (metadata_file_path.empty()) {
202+
metadata_file_path = session_context_.onnx_model_path_name;
203+
name_append = "_ctx" + name_append;
204+
}
205+
auto metadata_filename = metadata_file_path.stem().string() + name_append;
206+
metadata_file_path.replace_filename(metadata_filename);
207+
shared_context_->shared_weights.metadata_filepath = metadata_file_path;
202208
}
203-
auto metadata_filename = metadata_file_path.stem().string() + name_append;
204-
metadata_file_path.replace_filename(metadata_filename);
205-
shared_context_->shared_weights.metadata_filepath = metadata_file_path;
206-
}
207209

208-
// Metadata is generated only for shared contexts
209-
// If saving metadata then save it to the provided path or use the original model path
210-
// Multiple calls to Compile() will update the metadata and for the last call
211-
// the resulting file will contain the aggregated content
212-
std::ofstream file{metadata_file_path, std::ios::binary};
213-
ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path);
214-
file << metadata;
210+
// Metadata is generated only for shared contexts
211+
// If saving metadata then save it to the provided path or use the original model path
212+
// Multiple calls to Compile() will update the metadata and for the last call
213+
// the resulting file will contain the aggregated content
214+
std::ofstream file{metadata_file_path, std::ios::binary};
215+
ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path);
216+
file << metadata;
217+
}
218+
} catch (const ovep_exception& ex) {
219+
status = ex;
215220
}
216221

217222
return status;

0 commit comments

Comments
 (0)