1
+ #include < algorithm>
2
+ #include < numeric>
1
3
#include < unordered_map>
2
4
3
5
#include " build.cpp"
@@ -17,13 +19,9 @@ std::unordered_map<int, std::string> load_class_names(
17
19
}
18
20
19
21
while (std::getline (file, line)) {
20
- // Óáèðàåì ïðîáåëû â íà÷àëå è êîíöå
21
22
line = std::regex_replace (line, std::regex (" ^\\ s+|\\ s+$" ), " " );
22
-
23
- // Ïðîïóñêàåì ïóñòûå ñòðîêè
24
23
if (line.empty ()) continue ;
25
24
26
- // Èùåì ôîðìàò: ÷èñëî: 'íàçâàíèå'
27
25
std::regex pattern (" (\\ d+):\\ s*'([^']+)'" );
28
26
std::smatch matches;
29
27
@@ -68,8 +66,32 @@ std::vector<int> get_input_shape_from_json(const std::string& json_path) {
68
66
throw std::runtime_error (" Could not determine input shape from JSON" );
69
67
}
70
68
69
+ std::vector<float > process_model_output (const std::vector<float >& output,
70
+ const std::string& model_name) {
71
+ bool is_yolo = (model_name.find (" yolo" ) != std::string::npos);
72
+
73
+ if (!is_yolo) {
74
+ // Äëÿ íå-YOLO ìîäåëåé èñïîëüçóåì ñòàíäàðòíûé softmax
75
+ return softmax<float >(output);
76
+ }
77
+
78
+ // Äëÿ YOLO ìîäåëåé àíàëèçèðóåì âûõîäíûå äàííûå
79
+ float sum_val = std::accumulate (output.begin (), output.end (), 0 .0f );
80
+
81
+ // Åñëè ñóììà áëèçêà ê 1, âåðîÿòíîñòè óæå íîðìàëèçîâàíû
82
+ if (std::abs (sum_val - 1 .0f ) < 0 .01f ) {
83
+ std::cout << " YOLO output already normalized, using as-is" << std::endl;
84
+ return output;
85
+ }
86
+
87
+ // Èíà÷å ïðèìåíÿåì softmax
88
+ std::cout << " Applying softmax to YOLO output" << std::endl;
89
+ return softmax<float >(output);
90
+ }
91
+
71
92
it_lab_ai::Tensor prepare_image (const cv::Mat& image,
72
- const std::vector<int >& input_shape) {
93
+ const std::vector<int >& input_shape,
94
+ const std::string& model_name = " " ) {
73
95
if (input_shape.size () != 4 ) {
74
96
throw std::runtime_error (" Input shape must have 4 dimensions" );
75
97
}
@@ -79,55 +101,70 @@ it_lab_ai::Tensor prepare_image(const cv::Mat& image,
79
101
int height = input_shape[2 ];
80
102
int width = input_shape[3 ];
81
103
82
- if (height == 28 && width == 28 && channels == 1 ) {
83
- cv::Mat processed_image;
84
-
85
- if (image.channels () == 3 ) {
86
- cv::cvtColor (image, processed_image, cv::COLOR_BGR2GRAY);
87
- } else {
88
- processed_image = image.clone ();
89
- }
90
-
91
- cv::resize (processed_image, processed_image, cv::Size (28 , 28 ));
92
-
93
- cv::Mat float_image;
94
- processed_image.convertTo (float_image, CV_32FC1);
95
- float_image /= 255.0 ;
104
+ cv::Mat processed_image;
105
+ cv::Size target_size (width, height);
96
106
97
- std::vector< float > data;
98
- data. reserve (batch_size * channels * height * width );
107
+ bool is_yolo_model =
108
+ (model_name. find ( " yolo " ) != std::string::npos || model_name. find ( " Google " ) );
99
109
100
- for (int i = 0 ; i < 28 ; ++i) {
101
- for (int j = 0 ; j < 28 ; ++j) {
102
- data.push_back (float_image.at <float >(j, i));
110
+ if (image.rows == height && image.cols == width) {
111
+ processed_image = image.clone ();
112
+ std::cout << " Image already at target size - no resize needed" << std::endl;
113
+ } else {
114
+ if (is_yolo_model) {
115
+ // Äëÿ YOLO: ðåñàéç ñ ñîõðàíåíèåì ñîîòíîøåíèÿ ñòîðîí
116
+ double scale = std::min (static_cast <double >(width) / image.cols ,
117
+ static_cast <double >(height) / image.rows );
118
+ int new_width = static_cast <int >(image.cols * scale);
119
+ int new_height = static_cast <int >(image.rows * scale);
120
+
121
+ cv::Mat resized_image;
122
+ cv::resize (image, resized_image, cv::Size (new_width, new_height), 0 , 0 ,
123
+ cv::INTER_LINEAR);
124
+
125
+ processed_image = cv::Mat::zeros (height, width, image.type ());
126
+ int x_offset = (width - new_width) / 2 ;
127
+ int y_offset = (height - new_height) / 2 ;
128
+ resized_image.copyTo (
129
+ processed_image (cv::Rect (x_offset, y_offset, new_width, new_height)));
130
+
131
+ std::cout << " YOLO resize with padding applied" << std::endl;
132
+ } else {
133
+ int interpolation = cv::INTER_LINEAR;
134
+ if (image.rows < height || image.cols < width) {
135
+ interpolation = cv::INTER_CUBIC;
136
+ } else if (image.rows > height * 2 || image.cols > width * 2 ) {
137
+ interpolation = cv::INTER_AREA;
103
138
}
139
+ cv::resize (image, processed_image, target_size, 0 , 0 , interpolation);
140
+ std::cout << " Standard resize applied" << std::endl;
104
141
}
105
-
106
- it_lab_ai::Shape shape (
107
- {static_cast <size_t >(batch_size), static_cast <size_t >(channels),
108
- static_cast <size_t >(height), static_cast <size_t >(width)});
109
-
110
- return it_lab_ai::make_tensor (data, shape);
111
142
}
112
143
113
- cv::Mat resized;
114
- cv::resize (image, resized, cv::Size (width, height));
115
-
116
144
cv::Mat float_image;
117
- resized.convertTo (float_image, CV_32FC3);
118
- float_image /= 255.0 ;
145
+ processed_image.convertTo (float_image, CV_32FC3);
119
146
120
- if (channels == 3 ) {
121
- std::vector<cv::Mat> image_channels;
122
- cv::split (float_image, image_channels);
147
+ if (is_yolo_model) {
148
+ // Äëÿ YOLO: ïðîñòàÿ íîðìàëèçàöèÿ 0-1
149
+ float_image /= 255.0 ;
150
+ std::cout << " YOLO normalization: 0-1 range" << std::endl;
151
+ } else {
152
+ // ImageNet íîðìàëèçàöèÿ äëÿ äðóãèõ ìîäåëåé
153
+ float_image /= 255.0 ;
123
154
124
- image_channels[ 0 ] = (image_channels[ 0 ] - 0.485 ) / 0.229 ;
125
- image_channels[ 1 ] = (image_channels[ 1 ] - 0.456 ) / 0.224 ;
126
- image_channels[ 2 ] = (image_channels[ 2 ] - 0.406 ) / 0.225 ;
155
+ if (channels == 3 ) {
156
+ std::vector<cv::Mat> image_channels ;
157
+ cv::split (float_image, image_channels) ;
127
158
128
- cv::merge (image_channels, float_image);
129
- } else if (channels == 1 ) {
130
- cv::cvtColor (float_image, float_image, cv::COLOR_BGR2GRAY);
159
+ image_channels[0 ] = (image_channels[0 ] - 0.485 ) / 0.229 ;
160
+ image_channels[1 ] = (image_channels[1 ] - 0.456 ) / 0.224 ;
161
+ image_channels[2 ] = (image_channels[2 ] - 0.406 ) / 0.225 ;
162
+
163
+ cv::merge (image_channels, float_image);
164
+ std::cout << " ImageNet normalization applied" << std::endl;
165
+ } else if (channels == 1 ) {
166
+ cv::cvtColor (float_image, float_image, cv::COLOR_BGR2GRAY);
167
+ }
131
168
}
132
169
133
170
std::vector<float > data;
@@ -136,6 +173,10 @@ it_lab_ai::Tensor prepare_image(const cv::Mat& image,
136
173
std::vector<cv::Mat> processed_channels;
137
174
cv::split (float_image, processed_channels);
138
175
176
+ if (!is_yolo_model && channels == 3 ) {
177
+ std::swap (processed_channels[0 ], processed_channels[2 ]);
178
+ }
179
+
139
180
for (int c = 0 ; c < channels; ++c) {
140
181
for (int h = 0 ; h < height; ++h) {
141
182
for (int w = 0 ; w < width; ++w) {
@@ -168,7 +209,7 @@ int main(int argc, char* argv[]) {
168
209
std::vector<int > input_shape;
169
210
try {
170
211
input_shape = get_input_shape_from_json (json_path);
171
- std::cout << " Input shape from JSON : [" ;
212
+ std::cout << " Input shape: [" ;
172
213
for (size_t i = 0 ; i < input_shape.size (); ++i) {
173
214
std::cout << input_shape[i];
174
215
if (i < input_shape.size () - 1 ) std::cout << " , " ;
@@ -179,26 +220,14 @@ int main(int argc, char* argv[]) {
179
220
return 1 ;
180
221
}
181
222
182
- std::string image_folder;
183
- if (input_shape[1 ] == 1 && input_shape[2 ] == 28 && input_shape[3 ] == 28 ) {
184
- image_folder = IMAGE28_PATH;
185
- std::cout << " Using MNIST image folder: " << image_folder << std::endl;
186
- } else if (input_shape[2 ] == 224 && input_shape[3 ] == 224 ) {
187
- image_folder = IMAGE224_PATH;
188
- std::cout << " Using 224x224 image folder: " << image_folder << std::endl;
189
- } else if (input_shape[2 ] == 256 && input_shape[3 ] == 256 ) {
190
- image_folder = IMAGE256_PATH;
191
- std::cout << " Using 256x256 image folder: " << image_folder << std::endl;
192
- } else {
193
- image_folder = IMAGE28_PATH;
194
- std::cout << " Using default image folder: " << image_folder << std::endl;
195
- }
223
+ std::string image_folder = IMAGENET_PATH;
224
+ std::cout << " Using image folder: " << image_folder << std::endl;
196
225
197
226
std::vector<std::string> image_paths;
198
-
199
227
for (const auto & entry : fs::directory_iterator (image_folder)) {
200
228
if (entry.path ().extension () == " .png" ||
201
- entry.path ().extension () == " .jpg" ) {
229
+ entry.path ().extension () == " .jpg" ||
230
+ entry.path ().extension () == " .jpeg" ) {
202
231
image_paths.push_back (entry.path ().string ());
203
232
}
204
233
}
@@ -211,7 +240,6 @@ int main(int argc, char* argv[]) {
211
240
class_names = load_class_names (IMAGENET_LABELS);
212
241
} catch (const std::exception& e) {
213
242
std::cerr << " Warning: " << e.what () << std::endl;
214
- // Ñîçäàåì ïóñòîé ñëîâàðü - áóäóò âûâîäèòüñÿ òîëüêî íîìåðà
215
243
}
216
244
217
245
for (const auto & image_path : image_paths) {
@@ -222,8 +250,11 @@ int main(int argc, char* argv[]) {
222
250
}
223
251
224
252
try {
225
- std::cout << " Processing image: " << image_path << std::endl;
226
- it_lab_ai::Tensor input = prepare_image (image, input_shape);
253
+ std::cout << " \n Processing image: " << image_path << std::endl;
254
+ std::cout << " Original size: " << image.cols << " x" << image.rows
255
+ << " , channels: " << image.channels () << std::endl;
256
+
257
+ it_lab_ai::Tensor input = prepare_image (image, input_shape, model_name);
227
258
228
259
if (model_name == " alexnet_mnist" ) {
229
260
it_lab_ai::Shape sh1 ({1 , 5 , 5 , 3 });
@@ -245,28 +276,19 @@ int main(int argc, char* argv[]) {
245
276
246
277
build_graph (input, output, json_path, true , parallel);
247
278
248
- std::vector<float > tmp_output = softmax<float >(*output.as <float >());
249
-
250
- // Íàõîäèì òîï-1 êëàññ
251
- int max_class = 0 ;
252
- float max_prob = tmp_output[0 ];
253
- for (int i = 1 ; i < tmp_output.size (); i++) {
254
- if (tmp_output[i] > max_prob) {
255
- max_prob = tmp_output[i];
256
- max_class = i;
257
- }
258
- }
279
+ // Èñïîëüçóåì óëó÷øåííóþ îáðàáîòêó âûõîäîâ
280
+ std::vector<float > tmp_output =
281
+ process_model_output (*output.as <float >(), model_name);
259
282
260
- // Âûâîä òîï-5 êëàññîâ ñ íàçâàíèÿìè
261
- std::cout << " Top 5 predictions:" << std::endl;
283
+ // Íàõîäèì òîï-5 êëàññîâ
262
284
int top_n = std::min (5 , static_cast <int >(tmp_output.size ()));
263
-
264
285
std::vector<int > indices (tmp_output.size ());
265
286
std::iota (indices.begin (), indices.end (), 0 );
266
287
std::partial_sort (
267
288
indices.begin (), indices.begin () + top_n, indices.end (),
268
289
[&](int a, int b) { return tmp_output[a] > tmp_output[b]; });
269
290
291
+ std::cout << " Top " << top_n << " predictions:" << std::endl;
270
292
for (int i = 0 ; i < top_n; i++) {
271
293
int idx = indices[i];
272
294
std::cout << " " << (i + 1 ) << " . Class " << idx << " : "
@@ -278,20 +300,22 @@ int main(int argc, char* argv[]) {
278
300
std::cout << std::endl;
279
301
}
280
302
281
- // Âûâîä èòîãîâîãî ðåçóëüòàòà
303
+ // Èòîãîâûé ðåçóëüòàò
304
+ int max_class = indices[0 ];
305
+ float max_prob = tmp_output[max_class];
282
306
std::cout << " Image: " << fs::path (image_path).filename ().string ()
283
307
<< " -> Predicted class: " << max_class;
284
308
if (class_names.find (max_class) != class_names.end ()) {
285
309
std::cout << " (" << class_names[max_class] << " )" ;
286
310
}
287
- std::cout << " (probability: " << max_prob << " )" << std::endl;
288
- std::cout << " ----------------------------------------" << std::endl;
289
- }
290
- }
291
- catch (const std::exception& e) {
292
- std::cerr << " Error processing image " << image_path << " : " << e.what ()
293
- << std::endl;
311
+ std::cout << " (probability: " << std::fixed << std::setprecision (6 )
312
+ << max_prob << " )" << std::endl;
294
313
}
314
+ std::cout << " ----------------------------------------" << std::endl;
315
+ } catch (const std::exception& e) {
316
+ std::cerr << " Error processing image " << image_path << " : " << e.what ()
317
+ << std::endl;
295
318
}
319
+ }
296
320
return 0 ;
297
321
}
0 commit comments