@@ -134,22 +134,36 @@ template <typename ValueType>
134
134
void Conv4D (const Tensor& input, const Tensor& kernel_, const Tensor& bias_,
135
135
Tensor& output, size_t stride_, size_t pads_, size_t dilations_) {
136
136
size_t batch_size = input.get_shape ()[0 ];
137
+ size_t in_channels = input.get_shape ()[1 ];
137
138
size_t in_height = input.get_shape ()[2 ];
138
139
size_t in_width = input.get_shape ()[3 ];
139
- size_t in_channels = input.get_shape ()[1 ];
140
140
141
- size_t kernel_height = kernel_.get_shape ()[0 ];
142
- size_t kernel_width = kernel_.get_shape ()[1 ];
143
- size_t kernel_in_channels = kernel_.get_shape ()[2 ];
144
- size_t kernel_out_channels = kernel_.get_shape ()[3 ];
141
+ size_t out_channels = kernel_.get_shape ()[0 ]; // O
142
+ size_t kernel_in_channels = kernel_.get_shape ()[1 ]; // I
143
+ size_t kernel_height = kernel_.get_shape ()[2 ]; // H
144
+ size_t kernel_width = kernel_.get_shape ()[3 ]; // W
145
+
146
+ /* if (in_channels != kernel_in_channels) {
147
+ throw std::runtime_error(
148
+ "Input channels don't match kernel input channels");
149
+ }*/
150
+
151
+ // Ðàñ÷åò âûõîäíûõ ðàçìåðîâ
152
+ size_t out_height =
153
+ (in_height + 2 * pads_ - dilations_ * (kernel_height - 1 ) - 1 ) / stride_ +
154
+ 1 ;
155
+ size_t out_width =
156
+ (in_width + 2 * pads_ - dilations_ * (kernel_width - 1 ) - 1 ) / stride_ +
157
+ 1 ;
158
+
159
+ // Pad input
160
+ std::vector<std::vector<std::vector<std::vector<ValueType>>>> padded_input (
161
+ batch_size,
162
+ std::vector<std::vector<std::vector<ValueType>>>(
163
+ in_height + 2 * pads_,
164
+ std::vector<std::vector<ValueType>>(
165
+ in_width + 2 * pads_, std::vector<ValueType>(in_channels, 0 ))));
145
166
146
- std::vector<std::vector<std::vector<std::vector<ValueType>>>> padded_input =
147
- std::vector<std::vector<std::vector<std::vector<ValueType>>>>(
148
- batch_size, std::vector<std::vector<std::vector<ValueType>>>(
149
- in_height + 2 * pads_,
150
- std::vector<std::vector<ValueType>>(
151
- in_width + 2 * pads_,
152
- std::vector<ValueType>(in_channels, 0 ))));
153
167
for (size_t b = 0 ; b < batch_size; ++b) {
154
168
for (size_t h = 0 ; h < in_height; ++h) {
155
169
for (size_t w = 0 ; w < in_width; ++w) {
@@ -160,84 +174,87 @@ void Conv4D(const Tensor& input, const Tensor& kernel_, const Tensor& bias_,
160
174
}
161
175
}
162
176
}
163
- std::vector<std::vector<std::vector<std::vector<ValueType>>>> dil_kernel =
164
- std::vector<std::vector<std::vector<std::vector<ValueType>>>>(
165
- kernel_height * dilations_ + 1 - dilations_,
166
- std::vector<std::vector<std::vector<ValueType>>>(
167
- kernel_width * dilations_ + 1 - dilations_,
168
- std::vector<std::vector<ValueType>>(
169
- kernel_in_channels,
170
- std::vector<ValueType>(kernel_out_channels, 0 ))));
171
- for (size_t b = 0 ; b < kernel_out_channels; ++b) {
172
- for (size_t h = 0 ; h < kernel_height; ++h) {
173
- for (size_t w = 0 ; w < kernel_width; ++w) {
174
- for (size_t c = 0 ; c < kernel_in_channels; ++c) {
175
- dil_kernel[h * dilations_][w * dilations_][c][b] =
176
- kernel_.get <ValueType>({h, w, c, b});
177
+
178
+ // Dilate kernel
179
+ size_t dilated_kernel_height = (kernel_height - 1 ) * dilations_ + 1 ;
180
+ size_t dilated_kernel_width = (kernel_width - 1 ) * dilations_ + 1 ;
181
+
182
+ std::vector<std::vector<std::vector<std::vector<ValueType>>>> dil_kernel (
183
+ out_channels,
184
+ std::vector<std::vector<std::vector<ValueType>>>(
185
+ in_channels, std::vector<std::vector<ValueType>>(
186
+ dilated_kernel_height,
187
+ std::vector<ValueType>(dilated_kernel_width, 0 ))));
188
+
189
+ for (size_t oc = 0 ; oc < out_channels; ++oc) {
190
+ for (size_t ic = 0 ; ic < in_channels; ++ic) {
191
+ for (size_t kh = 0 ; kh < kernel_height; ++kh) {
192
+ for (size_t kw = 0 ; kw < kernel_width; ++kw) {
193
+ dil_kernel[oc][ic][kh * dilations_][kw * dilations_] =
194
+ kernel_.get <ValueType>({oc, ic, kh, kw});
177
195
}
178
196
}
179
197
}
180
198
}
181
199
182
- size_t crat = 0 ;
183
- if ((in_height + 2 * pads_ - dilations_ * (kernel_height - 1 )) % stride_ != 0 )
184
- crat = 1 ;
185
-
186
- size_t out_height =
187
- (in_height + 2 * pads_ - dilations_ * (kernel_height - 1 )) / stride_ +
188
- crat;
189
-
190
- crat = 0 ;
191
- if ((in_width + 2 * pads_ - dilations_ * (kernel_width - 1 )) % stride_ != 0 )
192
- crat = 1 ;
193
-
194
- size_t out_width =
195
- (in_width + 2 * pads_ - dilations_ * (kernel_width - 1 )) / stride_ + crat;
196
-
200
+ // Âûïîëíåíèå ñâåðòêè
197
201
std::vector<std::vector<std::vector<std::vector<ValueType>>>> output_tensor (
198
- batch_size, std::vector<std::vector<std::vector<ValueType>>>(
199
- kernel_out_channels,
200
- std::vector<std::vector<ValueType>>(
201
- out_height, std::vector<ValueType>(out_width, 0 ))));
202
+ batch_size,
203
+ std::vector<std::vector<std::vector<ValueType>>>(
204
+ out_channels, std::vector<std::vector<ValueType>>(
205
+ out_height, std::vector<ValueType>(out_width, 0 ))));
206
+
202
207
for (size_t b = 0 ; b < batch_size; ++b) {
203
- for (size_t c = 0 ; c < kernel_out_channels ; ++c ) {
204
- for (size_t i = 0 ; i < out_height; i += stride_ ) {
205
- for (size_t j = 0 ; j < out_width; j += stride_ ) {
208
+ for (size_t oc = 0 ; oc < out_channels ; ++oc ) {
209
+ for (size_t oh = 0 ; oh < out_height; ++oh ) {
210
+ for (size_t ow = 0 ; ow < out_width; ++ow ) {
206
211
ValueType value = 0 ;
212
+ size_t h_start = oh * stride_;
213
+ size_t w_start = ow * stride_;
214
+
207
215
for (size_t ic = 0 ; ic < in_channels; ++ic) {
208
- for (size_t h = 0 ; h < kernel_height * dilations_ + 1 - dilations_;
209
- ++h) {
210
- for (size_t w = 0 ; w < kernel_width * dilations_ + 1 - dilations_;
211
- ++w) {
212
- value +=
213
- padded_input[b][i + h][j + w][ic] * dil_kernel[h][w][ic][c];
216
+ for (size_t kh = 0 ; kh < dilated_kernel_height; ++kh) {
217
+ for (size_t kw = 0 ; kw < dilated_kernel_width; ++kw) {
218
+ size_t h_index = h_start + kh;
219
+ size_t w_index = w_start + kw;
220
+
221
+ if (h_index < padded_input[b].size () &&
222
+ w_index < padded_input[b][h_index].size ()) {
223
+ value += padded_input[b][h_index][w_index][ic] *
224
+ dil_kernel[oc][ic][kh][kw];
225
+ }
214
226
}
215
227
}
216
228
}
217
- if (!bias_. empty ()) {
218
- output_tensor[b][c][i][j] = value + (*bias_. as <ValueType>())[c];
219
- } else {
220
- output_tensor[b][c][i][j] = value ;
229
+
230
+ // Äîáàâëÿåì bias
231
+ if (!bias_. empty () && oc < bias_. get_shape ()[ 0 ]) {
232
+ value += bias_. get <ValueType>({oc}) ;
221
233
}
234
+
235
+ output_tensor[b][oc][oh][ow] = value;
222
236
}
223
237
}
224
238
}
225
239
}
226
240
227
- Shape sh ({batch_size, kernel_out_channels, out_height, out_width});
228
- std::vector<ValueType> one_d_vector (batch_size * out_height * out_width *
229
- kernel_out_channels);
230
- size_t index_1d = 0 ;
231
- for (size_t i = 0 ; i < batch_size; ++i) {
232
- for (size_t l = 0 ; l < kernel_out_channels; ++l) {
233
- for (size_t j = 0 ; j < out_height; ++j) {
234
- for (size_t k = 0 ; k < out_width; ++k) {
235
- one_d_vector[index_1d++] = output_tensor[i][l][j][k];
241
+ // Ïðåîáðàçîâàíèå â 1D tensor
242
+ Shape output_shape ({batch_size, out_channels, out_height, out_width});
243
+ std::vector<ValueType> flat_output (batch_size * out_channels * out_height *
244
+ out_width);
245
+
246
+ size_t index = 0 ;
247
+ for (size_t b = 0 ; b < batch_size; ++b) {
248
+ for (size_t oc = 0 ; oc < out_channels; ++oc) {
249
+ for (size_t h = 0 ; h < out_height; ++h) {
250
+ for (size_t w = 0 ; w < out_width; ++w) {
251
+ flat_output[index++] = output_tensor[b][oc][h][w];
236
252
}
237
253
}
238
254
}
239
255
}
240
- output = make_tensor<ValueType>(one_d_vector, sh);
256
+
257
+ output = make_tensor<ValueType>(flat_output, output_shape);
241
258
}
242
259
243
260
// NCHW -> NCHW only
0 commit comments