Skip to content

Commit bcec666

Browse files
committed
build yolo and start inference -> 250~
1 parent ae55576 commit bcec666

File tree

11 files changed

+627
-286
lines changed

11 files changed

+627
-286
lines changed

app/Graph/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ file(DOWNLOAD
6060
)
6161

6262
file(DOWNLOAD
63-
"https://storage.googleapis.com/kagglesdsdata/datasets/1513816/2500032/test_224/10008.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20250911%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250911T144346Z&X-Goog-Expires=345600&X-Goog-SignedHeaders=host&X-Goog-Signature=07e6d4f5d018e5858d046fc697dbcc726a423cb5c9eff15a6ff973a13060bf9300b1395cb641966de90d11ebf59d7b8650c8c68121bf7e447de375526ab4586b0906db71d5623bee96a9d4e289d15165e3c2b08e04928328f8540b03cb77585082e2acc9be5c61ebc51a08c8b010ba0b6f1192344b6828d3b935dde195ecdca77476483abe7784df5f569b7bd1e4e29b1c670b9f35b76a7e4a9dc6b2b0705654753e81a91a579c0c071338aa215917f29f9ee84c9bef9c805254c917347b2c3a9a31501c1238be23296009d6617f74c1070294f6b56ac0314ea7162a6adddfb9306c5333bd879a24796511261084dd2d2dbbc515c7917ebd91aac735359d1789"
63+
"https://storage.googleapis.com/kagglesdsdata/datasets/1513816/2500032/test_224/10008.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20250916%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20250916T192850Z&X-Goog-Expires=345600&X-Goog-SignedHeaders=host&X-Goog-Signature=90e54a1e36e1b1be1cda07bcd00eb4cdcf504358bf9ce4eccdf0dc6af6adb19ab9fa82689878a3b26cea4e4295501fdba76e8e5dff3ee0aefe8220abd67ced9667d6f4538a7617bbe4e762a6f97907cab112949353f50276d1911c71dab11ce56370694756a2db16f08c8f819c2dbc8e6c11b131f08481962abfad3347a3ff94469310eb22db163b9036b81ce5efc720b2e175e9bb84beb87e849c2158830697328daa344f03f852ab7dad15c3bc13743f8f185dcfffc9898b7ee449800a188b1809d62f9caeb7343a94c24e7b0cae50abb93cd99a2ee679706eccd5cc093c5f4a9d0f096dcbe76be2c891f75541e11d28f47931cb8bef2dc2fea40ce1ffb391"
6464
"${CMAKE_SOURCE_DIR}/docs/input/224/test1.png"
6565
SHOW_PROGRESS
6666
STATUS status_code

app/Graph/build.cpp

Lines changed: 278 additions & 178 deletions
Large diffs are not rendered by default.

include/layers/ConvLayer.hpp

Lines changed: 85 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -134,22 +134,36 @@ template <typename ValueType>
134134
void Conv4D(const Tensor& input, const Tensor& kernel_, const Tensor& bias_,
135135
Tensor& output, size_t stride_, size_t pads_, size_t dilations_) {
136136
size_t batch_size = input.get_shape()[0];
137+
size_t in_channels = input.get_shape()[1];
137138
size_t in_height = input.get_shape()[2];
138139
size_t in_width = input.get_shape()[3];
139-
size_t in_channels = input.get_shape()[1];
140140

141-
size_t kernel_height = kernel_.get_shape()[0];
142-
size_t kernel_width = kernel_.get_shape()[1];
143-
size_t kernel_in_channels = kernel_.get_shape()[2];
144-
size_t kernel_out_channels = kernel_.get_shape()[3];
141+
size_t out_channels = kernel_.get_shape()[0]; // O
142+
size_t kernel_in_channels = kernel_.get_shape()[1]; // I
143+
size_t kernel_height = kernel_.get_shape()[2]; // H
144+
size_t kernel_width = kernel_.get_shape()[3]; // W
145+
146+
/*if (in_channels != kernel_in_channels) {
147+
throw std::runtime_error(
148+
"Input channels don't match kernel input channels");
149+
}*/
150+
151+
// Ðàñ÷åò âûõîäíûõ ðàçìåðîâ
152+
size_t out_height =
153+
(in_height + 2 * pads_ - dilations_ * (kernel_height - 1) - 1) / stride_ +
154+
1;
155+
size_t out_width =
156+
(in_width + 2 * pads_ - dilations_ * (kernel_width - 1) - 1) / stride_ +
157+
1;
158+
159+
// Pad input
160+
std::vector<std::vector<std::vector<std::vector<ValueType>>>> padded_input(
161+
batch_size,
162+
std::vector<std::vector<std::vector<ValueType>>>(
163+
in_height + 2 * pads_,
164+
std::vector<std::vector<ValueType>>(
165+
in_width + 2 * pads_, std::vector<ValueType>(in_channels, 0))));
145166

146-
std::vector<std::vector<std::vector<std::vector<ValueType>>>> padded_input =
147-
std::vector<std::vector<std::vector<std::vector<ValueType>>>>(
148-
batch_size, std::vector<std::vector<std::vector<ValueType>>>(
149-
in_height + 2 * pads_,
150-
std::vector<std::vector<ValueType>>(
151-
in_width + 2 * pads_,
152-
std::vector<ValueType>(in_channels, 0))));
153167
for (size_t b = 0; b < batch_size; ++b) {
154168
for (size_t h = 0; h < in_height; ++h) {
155169
for (size_t w = 0; w < in_width; ++w) {
@@ -160,84 +174,87 @@ void Conv4D(const Tensor& input, const Tensor& kernel_, const Tensor& bias_,
160174
}
161175
}
162176
}
163-
std::vector<std::vector<std::vector<std::vector<ValueType>>>> dil_kernel =
164-
std::vector<std::vector<std::vector<std::vector<ValueType>>>>(
165-
kernel_height * dilations_ + 1 - dilations_,
166-
std::vector<std::vector<std::vector<ValueType>>>(
167-
kernel_width * dilations_ + 1 - dilations_,
168-
std::vector<std::vector<ValueType>>(
169-
kernel_in_channels,
170-
std::vector<ValueType>(kernel_out_channels, 0))));
171-
for (size_t b = 0; b < kernel_out_channels; ++b) {
172-
for (size_t h = 0; h < kernel_height; ++h) {
173-
for (size_t w = 0; w < kernel_width; ++w) {
174-
for (size_t c = 0; c < kernel_in_channels; ++c) {
175-
dil_kernel[h * dilations_][w * dilations_][c][b] =
176-
kernel_.get<ValueType>({h, w, c, b});
177+
178+
// Dilate kernel
179+
size_t dilated_kernel_height = (kernel_height - 1) * dilations_ + 1;
180+
size_t dilated_kernel_width = (kernel_width - 1) * dilations_ + 1;
181+
182+
std::vector<std::vector<std::vector<std::vector<ValueType>>>> dil_kernel(
183+
out_channels,
184+
std::vector<std::vector<std::vector<ValueType>>>(
185+
in_channels, std::vector<std::vector<ValueType>>(
186+
dilated_kernel_height,
187+
std::vector<ValueType>(dilated_kernel_width, 0))));
188+
189+
for (size_t oc = 0; oc < out_channels; ++oc) {
190+
for (size_t ic = 0; ic < in_channels; ++ic) {
191+
for (size_t kh = 0; kh < kernel_height; ++kh) {
192+
for (size_t kw = 0; kw < kernel_width; ++kw) {
193+
dil_kernel[oc][ic][kh * dilations_][kw * dilations_] =
194+
kernel_.get<ValueType>({oc, ic, kh, kw});
177195
}
178196
}
179197
}
180198
}
181199

182-
size_t crat = 0;
183-
if ((in_height + 2 * pads_ - dilations_ * (kernel_height - 1)) % stride_ != 0)
184-
crat = 1;
185-
186-
size_t out_height =
187-
(in_height + 2 * pads_ - dilations_ * (kernel_height - 1)) / stride_ +
188-
crat;
189-
190-
crat = 0;
191-
if ((in_width + 2 * pads_ - dilations_ * (kernel_width - 1)) % stride_ != 0)
192-
crat = 1;
193-
194-
size_t out_width =
195-
(in_width + 2 * pads_ - dilations_ * (kernel_width - 1)) / stride_ + crat;
196-
200+
// Âûïîëíåíèå ñâåðòêè
197201
std::vector<std::vector<std::vector<std::vector<ValueType>>>> output_tensor(
198-
batch_size, std::vector<std::vector<std::vector<ValueType>>>(
199-
kernel_out_channels,
200-
std::vector<std::vector<ValueType>>(
201-
out_height, std::vector<ValueType>(out_width, 0))));
202+
batch_size,
203+
std::vector<std::vector<std::vector<ValueType>>>(
204+
out_channels, std::vector<std::vector<ValueType>>(
205+
out_height, std::vector<ValueType>(out_width, 0))));
206+
202207
for (size_t b = 0; b < batch_size; ++b) {
203-
for (size_t c = 0; c < kernel_out_channels; ++c) {
204-
for (size_t i = 0; i < out_height; i += stride_) {
205-
for (size_t j = 0; j < out_width; j += stride_) {
208+
for (size_t oc = 0; oc < out_channels; ++oc) {
209+
for (size_t oh = 0; oh < out_height; ++oh) {
210+
for (size_t ow = 0; ow < out_width; ++ow) {
206211
ValueType value = 0;
212+
size_t h_start = oh * stride_;
213+
size_t w_start = ow * stride_;
214+
207215
for (size_t ic = 0; ic < in_channels; ++ic) {
208-
for (size_t h = 0; h < kernel_height * dilations_ + 1 - dilations_;
209-
++h) {
210-
for (size_t w = 0; w < kernel_width * dilations_ + 1 - dilations_;
211-
++w) {
212-
value +=
213-
padded_input[b][i + h][j + w][ic] * dil_kernel[h][w][ic][c];
216+
for (size_t kh = 0; kh < dilated_kernel_height; ++kh) {
217+
for (size_t kw = 0; kw < dilated_kernel_width; ++kw) {
218+
size_t h_index = h_start + kh;
219+
size_t w_index = w_start + kw;
220+
221+
if (h_index < padded_input[b].size() &&
222+
w_index < padded_input[b][h_index].size()) {
223+
value += padded_input[b][h_index][w_index][ic] *
224+
dil_kernel[oc][ic][kh][kw];
225+
}
214226
}
215227
}
216228
}
217-
if (!bias_.empty()) {
218-
output_tensor[b][c][i][j] = value + (*bias_.as<ValueType>())[c];
219-
} else {
220-
output_tensor[b][c][i][j] = value;
229+
230+
// Äîáàâëÿåì bias
231+
if (!bias_.empty() && oc < bias_.get_shape()[0]) {
232+
value += bias_.get<ValueType>({oc});
221233
}
234+
235+
output_tensor[b][oc][oh][ow] = value;
222236
}
223237
}
224238
}
225239
}
226240

227-
Shape sh({batch_size, kernel_out_channels, out_height, out_width});
228-
std::vector<ValueType> one_d_vector(batch_size * out_height * out_width *
229-
kernel_out_channels);
230-
size_t index_1d = 0;
231-
for (size_t i = 0; i < batch_size; ++i) {
232-
for (size_t l = 0; l < kernel_out_channels; ++l) {
233-
for (size_t j = 0; j < out_height; ++j) {
234-
for (size_t k = 0; k < out_width; ++k) {
235-
one_d_vector[index_1d++] = output_tensor[i][l][j][k];
241+
// Ïðåîáðàçîâàíèå â 1D tensor
242+
Shape output_shape({batch_size, out_channels, out_height, out_width});
243+
std::vector<ValueType> flat_output(batch_size * out_channels * out_height *
244+
out_width);
245+
246+
size_t index = 0;
247+
for (size_t b = 0; b < batch_size; ++b) {
248+
for (size_t oc = 0; oc < out_channels; ++oc) {
249+
for (size_t h = 0; h < out_height; ++h) {
250+
for (size_t w = 0; w < out_width; ++w) {
251+
flat_output[index++] = output_tensor[b][oc][h][w];
236252
}
237253
}
238254
}
239255
}
240-
output = make_tensor<ValueType>(one_d_vector, sh);
256+
257+
output = make_tensor<ValueType>(flat_output, output_shape);
241258
}
242259

243260
// NCHW -> NCHW only

include/layers/FCLayer.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,10 @@ FCLayerImpl<ValueType>::FCLayerImpl(const std::vector<ValueType>& input_weights,
103103
if (input_weights.empty()) {
104104
throw std::invalid_argument("Empty weights for FCLayer");
105105
}
106-
if (input_weights_shape.dims() != 2 ||
106+
/*if (input_weights_shape.dims() != 2 ||
107107
input_weights_shape[0] != input_bias.size()) {
108108
throw std::invalid_argument("Invalid weights shape");
109-
}
109+
}*/
110110
this->inputShape_[0] = input_weights_shape[1];
111111
this->outputShape_[0] = input_bias.size();
112112
if (this->inputShape_[0] == 0 || this->outputShape_[0] == 0) {

include/layers/FlattenLayer.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@ std::vector<size_t> reorder(std::vector<size_t> order_vec,
1111
class FlattenLayer : public Layer {
1212
private:
1313
std::vector<size_t> order_;
14+
int axis_;
1415

1516
public:
1617
FlattenLayer() : order_({0, 1, 2, 3}) {}
18+
FlattenLayer(int axis = 1) : axis_(axis) {}
1719
FlattenLayer(const std::vector<size_t>& order) : order_(order) {}
1820
static std::string get_name() { return "Flatten layer"; }
1921
void run(const std::vector<Tensor>& input,

include/layers/PoolingLayer.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,25 @@ PoolingLayerImpl<ValueType>::PoolingLayerImpl(
129129
pads_(pads),
130130
dilations_(dilations),
131131
ceil_mode_(ceil_mode) {
132+
133+
if (pooling_shape[0] == 0 && pooling_shape[1] == 0) {
134+
// Global pooling - èñïîëüçóåì âõîäíûå ïðîñòðàíñòâåííûå ðàçìåðû êàê kernel
135+
poolingShape_ = Shape({
136+
input_shape[input_shape.dims() - 2], // Âûñîòà
137+
input_shape[input_shape.dims() - 1] // Øèðèíà
138+
});
139+
strides_ = Shape({1, 1}); // Stride = 1 äëÿ global pooling
140+
pads_ = Shape({0, 0, 0, 0}); // Áåç padding
141+
dilations_ = Shape({1, 1}); // Áåç dilation
142+
143+
// ÏÅÐÅÎÏÐÅÄÅËßÅÌ ÂÛÕÎÄÍÓÞ ÔÎÐÌÓ ÄËß GLOBAL POOLING
144+
this->outputShape_ = input_shape;
145+
// Âñå ïðîñòðàíñòâåííûå èçìåðåíèÿ ñòàíîâÿòñÿ 1
146+
for (size_t i = 2; i < input_shape.dims(); ++i) {
147+
this->outputShape_[i] = 1;
148+
}
149+
return; // Âûõîäèì ðàíüøå, íå èñïîëüçóÿ îáû÷íûé ðàñ÷åò
150+
}
132151
if (input_shape.dims() > 4) {
133152
throw std::invalid_argument("Input dimensions is bigger than 4");
134153
}

src/layers/ConvLayer.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,18 @@ namespace it_lab_ai {
44

55
void ConvolutionalLayer::run(const std::vector<Tensor>& input,
66
std::vector<Tensor>& output) {
7+
// Îòëàäî÷íàÿ èíôîðìàöèÿ ÄÎ ïðîâåðîê
8+
std::cout << "=== CONVOLUTION LAYER DEBUG ===" << std::endl;
9+
std::cout << "Number of inputs: " << input.size() << std::endl;
10+
11+
for (size_t i = 0; i < input.size(); ++i) {
12+
std::cout << "Input " << i << " shape: [";
13+
for (size_t d = 0; d < input[i].get_shape().dims(); ++d) {
14+
std::cout << input[i].get_shape()[d];
15+
if (d < input[i].get_shape().dims() - 1) std::cout << ", ";
16+
}
17+
std::cout << "]" << std::endl;
18+
}
719
if (input.size() != 1) {
820
throw std::runtime_error("ConvolutionalLayer: Input tensors not 1");
921
}

src/layers/FCLayer.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,25 @@ void FCLayer::run(const std::vector<Tensor>& input,
1313
if (bias_.get_type() != weights_.get_type()) {
1414
throw std::invalid_argument("Bias and weights data type aren't same");
1515
}
16+
17+
// Ïîëó÷àåì batch_size è output_size
18+
size_t batch_size = input[0].get_shape()[0];
19+
size_t output_size =
20+
bias_.get_shape()[0]; // Èñïîëüçóåì ðàçìåð bias äëÿ output_size
21+
1622
switch (input[0].get_type()) {
1723
case Type::kInt: {
1824
FCLayerImpl<int> used_impl(*weights_.as<int>(), weights_.get_shape(),
1925
*bias_.as<int>());
20-
output[0] =
21-
make_tensor(used_impl.run(*input[0].as<int>()),
22-
{(*input[0].as<int>()).size() / weights_.get_shape()[1] *
23-
weights_.get_shape()[0]});
26+
output[0] = make_tensor(used_impl.run(*input[0].as<int>()),
27+
{batch_size, output_size});
2428
break;
2529
}
2630
case Type::kFloat: {
2731
FCLayerImpl<float> used_impl(*weights_.as<float>(), weights_.get_shape(),
2832
*bias_.as<float>());
29-
output[0] =
30-
make_tensor(used_impl.run(*input[0].as<float>()),
31-
{(*input[0].as<float>()).size() /
32-
weights_.get_shape()[1] * weights_.get_shape()[0]});
33+
output[0] = make_tensor(used_impl.run(*input[0].as<float>()),
34+
{batch_size, output_size});
3335
break;
3436
}
3537
default: {

0 commit comments

Comments
 (0)