1
- #include < algorithm>
1
+ #include < algorithm>
2
2
#include < numeric>
3
3
#include < unordered_map>
4
4
@@ -62,29 +62,28 @@ std::vector<int> get_input_shape_from_json(const std::string& json_path) {
62
62
}
63
63
}
64
64
}
65
-
66
- throw std::runtime_error (" Could not determine input shape from JSON" );
65
+ return {28 };
67
66
}
68
67
69
68
std::vector<float > process_model_output (const std::vector<float >& output,
70
69
const std::string& model_name) {
71
70
bool is_yolo = (model_name.find (" yolo" ) != std::string::npos);
72
71
73
72
if (!is_yolo) {
74
- // Äëÿ íå -YOLO ìîäåëåé èñïîëüçóåì ñòàíäàðòíûé softmax
73
+ // Для не -YOLO моделей используем стандартный softmax
75
74
return softmax<float >(output);
76
75
}
77
76
78
- // Äëÿ YOLO ìîäåëåé àíàëèçèðóåì âûõîäíûå äàííûå
77
+ // Для YOLO моделей анализируем выходные данные
79
78
float sum_val = std::accumulate (output.begin (), output.end (), 0 .0f );
80
79
81
- // Åñëè ñóììà áëèçêà ê 1, âåðîÿòíîñòè óæå íîðìàëèçîâàíû
80
+ // Если сумма близка к 1, вероятности уже нормализованы
82
81
if (std::abs (sum_val - 1 .0f ) < 0 .01f ) {
83
82
std::cout << " YOLO output already normalized, using as-is" << std::endl;
84
83
return output;
85
84
}
86
85
87
- // Èíà÷å ïðèìåíÿåì softmax
86
+ // Иначе применяем softmax
88
87
std::cout << " Applying softmax to YOLO output" << std::endl;
89
88
return softmax<float >(output);
90
89
}
@@ -104,15 +103,15 @@ it_lab_ai::Tensor prepare_image(const cv::Mat& image,
104
103
cv::Mat processed_image;
105
104
cv::Size target_size (width, height);
106
105
107
- bool is_yolo_model =
108
- (model_name. find ( " yolo " ) != std::string::npos || model_name.find (" Google" ));
106
+ bool is_yolo_model = (model_name. find ( " yolo " ) != std::string::npos ||
107
+ model_name.find (" Google" ));
109
108
110
109
if (image.rows == height && image.cols == width) {
111
110
processed_image = image.clone ();
112
111
std::cout << " Image already at target size - no resize needed" << std::endl;
113
112
} else {
114
113
if (is_yolo_model) {
115
- // Äëÿ YOLO: ðåñàéç ñ ñîõðàíåíèåì ñîîòíîøåíèÿ ñòîðîí
114
+ // Для YOLO: ресайз с сохранением соотношения сторон
116
115
double scale = std::min (static_cast <double >(width) / image.cols ,
117
116
static_cast <double >(height) / image.rows );
118
117
int new_width = static_cast <int >(image.cols * scale);
@@ -145,11 +144,11 @@ it_lab_ai::Tensor prepare_image(const cv::Mat& image,
145
144
processed_image.convertTo (float_image, CV_32FC3);
146
145
147
146
if (is_yolo_model) {
148
- // Äëÿ YOLO: ïðîñòàÿ íîðìàëèçàöèÿ 0-1
147
+ // Для YOLO: простая нормализация 0-1
149
148
float_image /= 255.0 ;
150
149
std::cout << " YOLO normalization: 0-1 range" << std::endl;
151
150
} else {
152
- // ImageNet íîðìàëèçàöèÿ äëÿ äðóãèõ ìîäåëåé
151
+ // ImageNet нормализация для других моделей
153
152
float_image /= 255.0 ;
154
153
155
154
if (channels == 3 ) {
@@ -192,6 +191,23 @@ it_lab_ai::Tensor prepare_image(const cv::Mat& image,
192
191
return it_lab_ai::make_tensor (data, shape);
193
192
}
194
193
194
+ it_lab_ai::Tensor prepare_mnist_image (const cv::Mat& image) {
195
+ cv::Mat gray_image;
196
+ cv::cvtColor (image, gray_image, cv::COLOR_BGR2GRAY);
197
+ std::vector<cv::Mat> channels;
198
+ cv::split (image, channels);
199
+
200
+ std::vector<float > res (28 * 28 );
201
+ for (int i = 0 ; i < 28 ; ++i) {
202
+ for (int j = 0 ; j < 28 ; ++j) {
203
+ res[i * 28 + j] = channels[0 ].at <uchar>(j, i);
204
+ }
205
+ }
206
+
207
+ Shape sh ({1 , 1 , 28 , 28 });
208
+ return it_lab_ai::make_tensor (res, sh);
209
+ }
210
+
195
211
int main (int argc, char * argv[]) {
196
212
std::string model_name = " alexnet_mnist" ;
197
213
bool parallel = false ;
@@ -207,20 +223,15 @@ int main(int argc, char* argv[]) {
207
223
std::string json_path = model_paths[model_name];
208
224
209
225
std::vector<int > input_shape;
210
- try {
211
- input_shape = get_input_shape_from_json (json_path);
212
- std::cout << " Input shape: [" ;
213
- for (size_t i = 0 ; i < input_shape.size (); ++i) {
214
- std::cout << input_shape[i];
215
- if (i < input_shape.size () - 1 ) std::cout << " , " ;
216
- }
217
- std::cout << " ]" << std::endl;
218
- } catch (const std::exception& e) {
219
- std::cerr << " Error reading input shape: " << e.what () << std::endl;
220
- return 1 ;
221
- }
226
+ input_shape = get_input_shape_from_json (json_path);
222
227
223
- std::string image_folder = IMAGENET_PATH;
228
+ std::string image_folder;
229
+ if (model_name == " alexnet_mnist" ) {
230
+ image_folder = IMAGE28_PATH;
231
+ }
232
+ else {
233
+ image_folder = IMAGENET_PATH;
234
+ }
224
235
std::cout << " Using image folder: " << image_folder << std::endl;
225
236
226
237
std::vector<std::string> image_paths;
@@ -254,33 +265,59 @@ int main(int argc, char* argv[]) {
254
265
std::cout << " Original size: " << image.cols << " x" << image.rows
255
266
<< " , channels: " << image.channels () << std::endl;
256
267
257
- it_lab_ai::Tensor input = prepare_image (image, input_shape, model_name);
258
-
259
268
if (model_name == " alexnet_mnist" ) {
269
+ // Специальная обработка для MNIST
270
+ it_lab_ai::Tensor input = prepare_mnist_image (image);
271
+
272
+ // Создаем выходной тензор (заглушка - форма не важна для
273
+ // build_graph_linear)
260
274
it_lab_ai::Shape sh1 ({1 , 5 , 5 , 3 });
261
275
std::vector<float > vec (75 , 3 );
262
276
it_lab_ai::Tensor output = it_lab_ai::make_tensor (vec, sh1);
263
277
264
- build_graph_linear (input, output, json_path, true , parallel);
278
+ build_graph_linear (input, output, true , parallel);
265
279
280
+ // Получаем реальные выходы (10 классов для MNIST)
266
281
std::vector<float > tmp_output = softmax<float >(*output.as <float >());
267
- for (size_t i = 0 ; i < tmp_output.size (); i++) {
268
- if (tmp_output[i] >= 1e-6 ) {
269
- std::cout << " Image: " << image_path << " -> Class: " << i
270
- << std::endl;
271
- }
282
+
283
+ // Выводим топ-3 предсказания для MNIST
284
+ int top_n = std::min (3 , static_cast <int >(tmp_output.size ()));
285
+ std::vector<int > indices (tmp_output.size ());
286
+ std::iota (indices.begin (), indices.end (), 0 );
287
+ std::partial_sort (
288
+ indices.begin (), indices.begin () + top_n, indices.end (),
289
+ [&](int a, int b) { return tmp_output[a] > tmp_output[b]; });
290
+
291
+ std::cout << " Top " << top_n << " predictions for MNIST:" << std::endl;
292
+ for (int i = 0 ; i < top_n; i++) {
293
+ int idx = indices[i];
294
+ std::cout << " " << (i + 1 ) << " . Class " << idx << " : "
295
+ << std::fixed << std::setprecision (6 )
296
+ << tmp_output[idx] * 100 << " %" << std::endl;
272
297
}
298
+
299
+ // Итоговый результат
300
+ int max_class = indices[0 ];
301
+ float max_prob = tmp_output[max_class];
302
+ std::cout << " Image: " << fs::path (image_path).filename ().string ()
303
+ << " -> Predicted digit: " << max_class
304
+ << " (probability: " << std::fixed << std::setprecision (6 )
305
+ << max_prob * 100 << " %)" << std::endl;
306
+
273
307
} else {
308
+ // Обычная обработка для других моделей
309
+ it_lab_ai::Tensor input = prepare_image (image, input_shape, model_name);
310
+
274
311
size_t output_classes = 1000 ;
275
312
it_lab_ai::Tensor output ({1 , output_classes}, it_lab_ai::Type::kFloat );
276
313
277
314
build_graph (input, output, json_path, true , parallel);
278
315
279
- // Èñïîëüçóåì óëó÷øåííóþ îáðàáîòêó âûõîäîâ
316
+ // Используем улучшенную обработку выходов
280
317
std::vector<float > tmp_output =
281
318
process_model_output (*output.as <float >(), model_name);
282
319
283
- // Íàõîäèì òîï -5 êëàññîâ
320
+ // Находим топ -5 классов
284
321
int top_n = std::min (5 , static_cast <int >(tmp_output.size ()));
285
322
std::vector<int > indices (tmp_output.size ());
286
323
std::iota (indices.begin (), indices.end (), 0 );
@@ -300,7 +337,7 @@ int main(int argc, char* argv[]) {
300
337
std::cout << std::endl;
301
338
}
302
339
303
- // Èòîãîâûé ðåçóëüòàò
340
+ // Итоговый результат
304
341
int max_class = indices[0 ];
305
342
float max_prob = tmp_output[max_class];
306
343
std::cout << " Image: " << fs::path (image_path).filename ().string ()
0 commit comments