Skip to content

Commit 22dec95

Browse files
committed
yolo&google done
1 parent 3d74386 commit 22dec95

File tree

3 files changed

+115
-92
lines changed

3 files changed

+115
-92
lines changed

app/Graph/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,7 @@ file(DOWNLOAD
7676
)
7777

7878
add_definitions(-DIMAGE28_PATH="${CMAKE_SOURCE_DIR}/docs/input/28/")
79-
add_definitions(-DIMAGE224_PATH="${CMAKE_SOURCE_DIR}/docs/input/224/")
80-
add_definitions(-DIMAGE256_PATH="${CMAKE_SOURCE_DIR}/docs/input/256/")
79+
add_definitions(-DIMAGENET_PATH="${CMAKE_SOURCE_DIR}/docs/input/Imagenet_test/")
8180
add_definitions(-DMODEL_PATH_H5="${CMAKE_SOURCE_DIR}/docs/jsons/model_data_alexnet_1.json")
8281
add_definitions(-DMODEL_PATH_GOOGLENET_ONNX="${CMAKE_SOURCE_DIR}/docs/jsons/googlenet_onnx_model.json")
8382
add_definitions(-DMODEL_PATH_DENSENET_ONNX="${CMAKE_SOURCE_DIR}/docs/jsons/densenet121_Opset16_onnx_model.json")

app/Graph/build.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,11 +1258,11 @@ void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output,
12581258
// Отмечаем, что этот вход подключен
12591259
concat_connected_inputs[target_name].insert(source_name);
12601260

1261-
if (comments) {
1261+
/*if (comments) {
12621262
std::cout << "Concat connection: " << source_name << " -> "
12631263
<< target_name << " (index: " << input_index << ")"
12641264
<< std::endl;
1265-
}
1265+
}*/
12661266

12671267
// Проверяем, все ли входы подключены
12681268
if (concat_connected_inputs[target_name].size() ==
@@ -1274,7 +1274,7 @@ void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output,
12741274
if (concat_layer) {
12751275
concat_layer->setInputOrder(concat_orders[target_name]);
12761276

1277-
if (comments) {
1277+
/*if (comments) {
12781278
std::cout
12791279
<< "=== ALL INPUTS CONNECTED TO CONCAT: " << target_name
12801280
<< " ===" << std::endl;
@@ -1292,7 +1292,7 @@ void build_graph(it_lab_ai::Tensor& input, it_lab_ai::Tensor& output,
12921292
std::cout << ", ";
12931293
}
12941294
std::cout << std::endl;
1295-
}
1295+
}*/
12961296
}
12971297
}
12981298
}

app/Graph/graph_build.cpp

Lines changed: 110 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#include <algorithm>
2+
#include <numeric>
13
#include <unordered_map>
24

35
#include "build.cpp"
@@ -17,13 +19,9 @@ std::unordered_map<int, std::string> load_class_names(
1719
}
1820

1921
while (std::getline(file, line)) {
20-
// Óáèðàåì ïðîáåëû â íà÷àëå è êîíöå
2122
line = std::regex_replace(line, std::regex("^\\s+|\\s+$"), "");
22-
23-
// Ïðîïóñêàåì ïóñòûå ñòðîêè
2423
if (line.empty()) continue;
2524

26-
// Èùåì ôîðìàò: ÷èñëî: 'íàçâàíèå'
2725
std::regex pattern("(\\d+):\\s*'([^']+)'");
2826
std::smatch matches;
2927

@@ -68,8 +66,32 @@ std::vector<int> get_input_shape_from_json(const std::string& json_path) {
6866
throw std::runtime_error("Could not determine input shape from JSON");
6967
}
7068

69+
std::vector<float> process_model_output(const std::vector<float>& output,
70+
const std::string& model_name) {
71+
bool is_yolo = (model_name.find("yolo") != std::string::npos);
72+
73+
if (!is_yolo) {
74+
// Äëÿ íå-YOLO ìîäåëåé èñïîëüçóåì ñòàíäàðòíûé softmax
75+
return softmax<float>(output);
76+
}
77+
78+
// Äëÿ YOLO ìîäåëåé àíàëèçèðóåì âûõîäíûå äàííûå
79+
float sum_val = std::accumulate(output.begin(), output.end(), 0.0f);
80+
81+
// Åñëè ñóììà áëèçêà ê 1, âåðîÿòíîñòè óæå íîðìàëèçîâàíû
82+
if (std::abs(sum_val - 1.0f) < 0.01f) {
83+
std::cout << "YOLO output already normalized, using as-is" << std::endl;
84+
return output;
85+
}
86+
87+
// Èíà÷å ïðèìåíÿåì softmax
88+
std::cout << "Applying softmax to YOLO output" << std::endl;
89+
return softmax<float>(output);
90+
}
91+
7192
it_lab_ai::Tensor prepare_image(const cv::Mat& image,
72-
const std::vector<int>& input_shape) {
93+
const std::vector<int>& input_shape,
94+
const std::string& model_name = "") {
7395
if (input_shape.size() != 4) {
7496
throw std::runtime_error("Input shape must have 4 dimensions");
7597
}
@@ -79,55 +101,70 @@ it_lab_ai::Tensor prepare_image(const cv::Mat& image,
79101
int height = input_shape[2];
80102
int width = input_shape[3];
81103

82-
if (height == 28 && width == 28 && channels == 1) {
83-
cv::Mat processed_image;
84-
85-
if (image.channels() == 3) {
86-
cv::cvtColor(image, processed_image, cv::COLOR_BGR2GRAY);
87-
} else {
88-
processed_image = image.clone();
89-
}
90-
91-
cv::resize(processed_image, processed_image, cv::Size(28, 28));
92-
93-
cv::Mat float_image;
94-
processed_image.convertTo(float_image, CV_32FC1);
95-
float_image /= 255.0;
104+
cv::Mat processed_image;
105+
cv::Size target_size(width, height);
96106

97-
std::vector<float> data;
98-
data.reserve(batch_size * channels * height * width);
107+
bool is_yolo_model =
108+
(model_name.find("yolo") != std::string::npos || model_name.find("Google"));
99109

100-
for (int i = 0; i < 28; ++i) {
101-
for (int j = 0; j < 28; ++j) {
102-
data.push_back(float_image.at<float>(j, i));
110+
if (image.rows == height && image.cols == width) {
111+
processed_image = image.clone();
112+
std::cout << "Image already at target size - no resize needed" << std::endl;
113+
} else {
114+
if (is_yolo_model) {
115+
// Äëÿ YOLO: ðåñàéç ñ ñîõðàíåíèåì ñîîòíîøåíèÿ ñòîðîí
116+
double scale = std::min(static_cast<double>(width) / image.cols,
117+
static_cast<double>(height) / image.rows);
118+
int new_width = static_cast<int>(image.cols * scale);
119+
int new_height = static_cast<int>(image.rows * scale);
120+
121+
cv::Mat resized_image;
122+
cv::resize(image, resized_image, cv::Size(new_width, new_height), 0, 0,
123+
cv::INTER_LINEAR);
124+
125+
processed_image = cv::Mat::zeros(height, width, image.type());
126+
int x_offset = (width - new_width) / 2;
127+
int y_offset = (height - new_height) / 2;
128+
resized_image.copyTo(
129+
processed_image(cv::Rect(x_offset, y_offset, new_width, new_height)));
130+
131+
std::cout << "YOLO resize with padding applied" << std::endl;
132+
} else {
133+
int interpolation = cv::INTER_LINEAR;
134+
if (image.rows < height || image.cols < width) {
135+
interpolation = cv::INTER_CUBIC;
136+
} else if (image.rows > height * 2 || image.cols > width * 2) {
137+
interpolation = cv::INTER_AREA;
103138
}
139+
cv::resize(image, processed_image, target_size, 0, 0, interpolation);
140+
std::cout << "Standard resize applied" << std::endl;
104141
}
105-
106-
it_lab_ai::Shape shape(
107-
{static_cast<size_t>(batch_size), static_cast<size_t>(channels),
108-
static_cast<size_t>(height), static_cast<size_t>(width)});
109-
110-
return it_lab_ai::make_tensor(data, shape);
111142
}
112143

113-
cv::Mat resized;
114-
cv::resize(image, resized, cv::Size(width, height));
115-
116144
cv::Mat float_image;
117-
resized.convertTo(float_image, CV_32FC3);
118-
float_image /= 255.0;
145+
processed_image.convertTo(float_image, CV_32FC3);
119146

120-
if (channels == 3) {
121-
std::vector<cv::Mat> image_channels;
122-
cv::split(float_image, image_channels);
147+
if (is_yolo_model) {
148+
// Äëÿ YOLO: ïðîñòàÿ íîðìàëèçàöèÿ 0-1
149+
float_image /= 255.0;
150+
std::cout << "YOLO normalization: 0-1 range" << std::endl;
151+
} else {
152+
// ImageNet íîðìàëèçàöèÿ äëÿ äðóãèõ ìîäåëåé
153+
float_image /= 255.0;
123154

124-
image_channels[0] = (image_channels[0] - 0.485) / 0.229;
125-
image_channels[1] = (image_channels[1] - 0.456) / 0.224;
126-
image_channels[2] = (image_channels[2] - 0.406) / 0.225;
155+
if (channels == 3) {
156+
std::vector<cv::Mat> image_channels;
157+
cv::split(float_image, image_channels);
127158

128-
cv::merge(image_channels, float_image);
129-
} else if (channels == 1) {
130-
cv::cvtColor(float_image, float_image, cv::COLOR_BGR2GRAY);
159+
image_channels[0] = (image_channels[0] - 0.485) / 0.229;
160+
image_channels[1] = (image_channels[1] - 0.456) / 0.224;
161+
image_channels[2] = (image_channels[2] - 0.406) / 0.225;
162+
163+
cv::merge(image_channels, float_image);
164+
std::cout << "ImageNet normalization applied" << std::endl;
165+
} else if (channels == 1) {
166+
cv::cvtColor(float_image, float_image, cv::COLOR_BGR2GRAY);
167+
}
131168
}
132169

133170
std::vector<float> data;
@@ -136,6 +173,10 @@ it_lab_ai::Tensor prepare_image(const cv::Mat& image,
136173
std::vector<cv::Mat> processed_channels;
137174
cv::split(float_image, processed_channels);
138175

176+
if (!is_yolo_model && channels == 3) {
177+
std::swap(processed_channels[0], processed_channels[2]);
178+
}
179+
139180
for (int c = 0; c < channels; ++c) {
140181
for (int h = 0; h < height; ++h) {
141182
for (int w = 0; w < width; ++w) {
@@ -168,7 +209,7 @@ int main(int argc, char* argv[]) {
168209
std::vector<int> input_shape;
169210
try {
170211
input_shape = get_input_shape_from_json(json_path);
171-
std::cout << "Input shape from JSON: [";
212+
std::cout << "Input shape: [";
172213
for (size_t i = 0; i < input_shape.size(); ++i) {
173214
std::cout << input_shape[i];
174215
if (i < input_shape.size() - 1) std::cout << ", ";
@@ -179,26 +220,14 @@ int main(int argc, char* argv[]) {
179220
return 1;
180221
}
181222

182-
std::string image_folder;
183-
if (input_shape[1] == 1 && input_shape[2] == 28 && input_shape[3] == 28) {
184-
image_folder = IMAGE28_PATH;
185-
std::cout << "Using MNIST image folder: " << image_folder << std::endl;
186-
} else if (input_shape[2] == 224 && input_shape[3] == 224) {
187-
image_folder = IMAGE224_PATH;
188-
std::cout << "Using 224x224 image folder: " << image_folder << std::endl;
189-
} else if (input_shape[2] == 256 && input_shape[3] == 256) {
190-
image_folder = IMAGE256_PATH;
191-
std::cout << "Using 256x256 image folder: " << image_folder << std::endl;
192-
} else {
193-
image_folder = IMAGE28_PATH;
194-
std::cout << "Using default image folder: " << image_folder << std::endl;
195-
}
223+
std::string image_folder = IMAGENET_PATH;
224+
std::cout << "Using image folder: " << image_folder << std::endl;
196225

197226
std::vector<std::string> image_paths;
198-
199227
for (const auto& entry : fs::directory_iterator(image_folder)) {
200228
if (entry.path().extension() == ".png" ||
201-
entry.path().extension() == ".jpg") {
229+
entry.path().extension() == ".jpg" ||
230+
entry.path().extension() == ".jpeg") {
202231
image_paths.push_back(entry.path().string());
203232
}
204233
}
@@ -211,7 +240,6 @@ int main(int argc, char* argv[]) {
211240
class_names = load_class_names(IMAGENET_LABELS);
212241
} catch (const std::exception& e) {
213242
std::cerr << "Warning: " << e.what() << std::endl;
214-
// Ñîçäàåì ïóñòîé ñëîâàðü - áóäóò âûâîäèòüñÿ òîëüêî íîìåðà
215243
}
216244

217245
for (const auto& image_path : image_paths) {
@@ -222,8 +250,11 @@ int main(int argc, char* argv[]) {
222250
}
223251

224252
try {
225-
std::cout << "Processing image: " << image_path << std::endl;
226-
it_lab_ai::Tensor input = prepare_image(image, input_shape);
253+
std::cout << "\nProcessing image: " << image_path << std::endl;
254+
std::cout << "Original size: " << image.cols << "x" << image.rows
255+
<< ", channels: " << image.channels() << std::endl;
256+
257+
it_lab_ai::Tensor input = prepare_image(image, input_shape, model_name);
227258

228259
if (model_name == "alexnet_mnist") {
229260
it_lab_ai::Shape sh1({1, 5, 5, 3});
@@ -245,28 +276,19 @@ int main(int argc, char* argv[]) {
245276

246277
build_graph(input, output, json_path, true, parallel);
247278

248-
std::vector<float> tmp_output = softmax<float>(*output.as<float>());
249-
250-
// Íàõîäèì òîï-1 êëàññ
251-
int max_class = 0;
252-
float max_prob = tmp_output[0];
253-
for (int i = 1; i < tmp_output.size(); i++) {
254-
if (tmp_output[i] > max_prob) {
255-
max_prob = tmp_output[i];
256-
max_class = i;
257-
}
258-
}
279+
// Èñïîëüçóåì óëó÷øåííóþ îáðàáîòêó âûõîäîâ
280+
std::vector<float> tmp_output =
281+
process_model_output(*output.as<float>(), model_name);
259282

260-
// Âûâîä òîï-5 êëàññîâ ñ íàçâàíèÿìè
261-
std::cout << "Top 5 predictions:" << std::endl;
283+
// Íàõîäèì òîï-5 êëàññîâ
262284
int top_n = std::min(5, static_cast<int>(tmp_output.size()));
263-
264285
std::vector<int> indices(tmp_output.size());
265286
std::iota(indices.begin(), indices.end(), 0);
266287
std::partial_sort(
267288
indices.begin(), indices.begin() + top_n, indices.end(),
268289
[&](int a, int b) { return tmp_output[a] > tmp_output[b]; });
269290

291+
std::cout << "Top " << top_n << " predictions:" << std::endl;
270292
for (int i = 0; i < top_n; i++) {
271293
int idx = indices[i];
272294
std::cout << " " << (i + 1) << ". Class " << idx << ": "
@@ -278,20 +300,22 @@ int main(int argc, char* argv[]) {
278300
std::cout << std::endl;
279301
}
280302

281-
// Âûâîä èòîãîâîãî ðåçóëüòàòà
303+
// Èòîãîâûé ðåçóëüòàò
304+
int max_class = indices[0];
305+
float max_prob = tmp_output[max_class];
282306
std::cout << "Image: " << fs::path(image_path).filename().string()
283307
<< " -> Predicted class: " << max_class;
284308
if (class_names.find(max_class) != class_names.end()) {
285309
std::cout << " (" << class_names[max_class] << ")";
286310
}
287-
std::cout << " (probability: " << max_prob << ")" << std::endl;
288-
std::cout << "----------------------------------------" << std::endl;
289-
}
290-
}
291-
catch (const std::exception& e) {
292-
std::cerr << "Error processing image " << image_path << ": " << e.what()
293-
<< std::endl;
311+
std::cout << " (probability: " << std::fixed << std::setprecision(6)
312+
<< max_prob << ")" << std::endl;
294313
}
314+
std::cout << "----------------------------------------" << std::endl;
315+
} catch (const std::exception& e) {
316+
std::cerr << "Error processing image " << image_path << ": " << e.what()
317+
<< std::endl;
295318
}
319+
}
296320
return 0;
297321
}

0 commit comments

Comments
 (0)