@@ -63,22 +63,6 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
63
63
size_t kernel_in_channels = kernel_.get_shape ()[2 ];
64
64
size_t kernel_out_channels = kernel_.get_shape ()[3 ];
65
65
66
- size_t out_height =
67
- (in_height + 2 * pads_ - (dilations_ * (kernel_height - 1 ) + 1 )) /
68
- stride_ +
69
- 1 ;
70
- size_t out_width =
71
- (in_width + 2 * pads_ - (dilations_ * (kernel_width - 1 ) + 1 )) /
72
- stride_ +
73
- 1 ;
74
-
75
- std::vector<std::vector<std::vector<std::vector<int >>>> output_tensor (
76
- batch_size,
77
- std::vector<std::vector<std::vector<int >>>(
78
- out_height,
79
- std::vector<std::vector<int >>(
80
- out_width, std::vector<int >(kernel_out_channels, 0 ))));
81
-
82
66
std::vector<int > t = *input.as <int >();
83
67
std::vector<std::vector<std::vector<std::vector<int >>>> input_tensor (
84
68
batch_size,
@@ -111,6 +95,8 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
111
95
kernel[n_index][h_index][w_index][c_index] = t1[index];
112
96
}
113
97
98
+ pads_ = (kernel_height * (1 + 2 * dilations_) - 1 ) / 2 ;
99
+
114
100
std::vector<std::vector<std::vector<std::vector<int >>>> padded_input =
115
101
input_tensor;
116
102
if (pads_ > 0 ) {
@@ -134,38 +120,93 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
134
120
}
135
121
}
136
122
123
+ std::vector<std::vector<std::vector<std::vector<int >>>> dil_kernel =
124
+ kernel;
125
+ if (dilations_ > 0 ) {
126
+ dil_kernel = std::vector<std::vector<std::vector<std::vector<int >>>>(
127
+ kernel_height * (1 + 2 * dilations_),
128
+ std::vector<std::vector<std::vector<int >>>(
129
+ kernel_width * (1 + 2 * dilations_),
130
+ std::vector<std::vector<int >>(
131
+ kernel_in_channels,
132
+ std::vector<int >(kernel_out_channels, 0 ))));
133
+
134
+ for (size_t b = 0 ; b < kernel_out_channels; ++b) {
135
+ for (size_t h = 0 ; h < kernel_height; ++h) {
136
+ for (size_t w = 0 ; w < kernel_width; ++w) {
137
+ for (size_t c = 0 ; c < kernel_in_channels; ++c) {
138
+ dil_kernel[(h * (1 + 2 * dilations_)) + dilations_]
139
+ [(w * (1 + 2 * dilations_)) + dilations_][c][b] =
140
+ kernel[h][w][c][b];
141
+ }
142
+ }
143
+ }
144
+ }
145
+ }
146
+
147
+ size_t crat = 0 ;
148
+ if ((in_height + 2 * pads_ -
149
+ ((kernel_height * (1 + 2 * dilations_)) - 1 )) %
150
+ stride_ !=
151
+ 0 )
152
+ crat = 1 ;
153
+
154
+ size_t out_height = (in_height + 2 * pads_ -
155
+ ((kernel_height * (1 + 2 * dilations_)) - 1 )) /
156
+ stride_ +
157
+ crat;
158
+
159
+ crat = 0 ;
160
+
161
+ if ((in_width + 2 * pads_ -
162
+ ((kernel_width * (1 + 2 * dilations_)) - 1 )) %
163
+ stride_ !=
164
+ 0 )
165
+ crat = 1 ;
166
+
167
+ size_t out_width = (in_width + 2 * pads_ -
168
+ ((kernel_width * (1 + 2 * dilations_)) - 1 )) /
169
+ stride_ +
170
+ crat;
171
+
172
+ std::vector<std::vector<std::vector<std::vector<int >>>> output_tensor (
173
+ batch_size,
174
+ std::vector<std::vector<std::vector<int >>>(
175
+ out_height,
176
+ std::vector<std::vector<int >>(
177
+ out_width, std::vector<int >(kernel_out_channels, 0 ))));
178
+ size_t one_size = (kernel_height * (1 + 2 * dilations_) - 1 ) / 2 ;
179
+
137
180
for (size_t b = 0 ; b < batch_size; ++b) {
138
- for (size_t oc = 0 ; oc < kernel_out_channels; ++oc ) {
139
- for (size_t i = 0 ; i < out_height; ++i ) {
140
- for (size_t j = 0 ; j < out_width; ++j ) {
181
+ for (size_t c = 0 ; c < kernel_out_channels; ++c ) {
182
+ for (size_t i = 0 ; i < out_height; i += stride_ ) {
183
+ for (size_t j = 0 ; j < out_width; j += stride_ ) {
141
184
int value = 0 ;
142
- for (size_t kh = 0 ; kh < kernel_height; ++kh) {
143
- for (size_t kw = 0 ; kw < kernel_width; ++kw) {
144
- for (size_t ic = 0 ; ic < in_channels; ++ic) {
145
- size_t vert_start = i * stride_ + kh * dilations_;
146
- size_t horiz_start = j * stride_ + kw * dilations_;
147
-
148
- if (vert_start < padded_input[0 ].size () &&
149
- horiz_start < padded_input[0 ][0 ].size ()) {
150
- value += padded_input[b][vert_start][horiz_start][ic] *
151
- kernel[kh][kw][ic][oc];
152
- }
185
+ for (size_t ic = 0 ; ic < in_channels; ++ic) {
186
+ for (int h = (-1 * static_cast <int >(one_size));
187
+ h <= static_cast <int >(one_size); ++h) {
188
+ for (int w = (-1 * static_cast <int >(one_size));
189
+ w <= static_cast <int >(one_size); ++w) {
190
+ value += padded_input[b][i + one_size + h]
191
+ [j + one_size + w][ic] *
192
+ dil_kernel[one_size + h][one_size + w][ic][c];
153
193
}
154
194
}
155
195
}
156
- output_tensor[b][i][j][oc ] = value;
196
+ output_tensor[b][i][j][c ] = value;
157
197
}
158
198
}
159
199
}
160
200
}
161
- Shape sh ({batch_size, out_height, out_width, kernel_out_channels});
201
+
202
+ Shape sh ({batch_size, kernel_out_channels, out_height, out_width});
162
203
std::vector<int > one_d_vector (batch_size * out_height * out_width *
163
204
kernel_out_channels);
164
205
size_t index_1d = 0 ;
165
206
for (size_t i = 0 ; i < batch_size; ++i) {
166
- for (size_t j = 0 ; j < out_height ; ++j ) {
167
- for (size_t k = 0 ; k < out_width ; ++k ) {
168
- for (size_t l = 0 ; l < kernel_out_channels ; ++l ) {
207
+ for (size_t l = 0 ; l < kernel_out_channels ; ++l ) {
208
+ for (size_t j = 0 ; j < out_height ; ++j ) {
209
+ for (size_t k = 0 ; k < out_width ; ++k ) {
169
210
one_d_vector[index_1d++] = output_tensor[i][j][k][l];
170
211
}
171
212
}
@@ -234,28 +275,12 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
234
275
size_t kernel_in_channels = kernel_.get_shape ()[2 ];
235
276
size_t kernel_out_channels = kernel_.get_shape ()[3 ];
236
277
237
- size_t out_height =
238
- (in_height + 2 * pads_ - (dilations_ * (kernel_height - 1 ) + 1 )) /
239
- stride_ +
240
- 1 ;
241
- size_t out_width =
242
- (in_width + 2 * pads_ - (dilations_ * (kernel_width - 1 ) + 1 )) /
243
- stride_ +
244
- 1 ;
245
-
246
- std::vector<std::vector<std::vector<std::vector<float >>>> output_tensor (
247
- batch_size,
248
- std::vector<std::vector<std::vector<float >>>(
249
- out_height,
250
- std::vector<std::vector<float >>(
251
- out_width, std::vector<float >(kernel_out_channels, 0 ))));
252
-
253
278
std::vector<float > t = *input.as <float >();
254
279
std::vector<std::vector<std::vector<std::vector<float >>>> input_tensor (
255
- batch_size, std::vector<std::vector<std::vector< float >>>(
256
- in_height, std::vector<std::vector<float >>(
257
- in_width , std::vector<float >(
258
- in_channels, 1.0 ))));
280
+ batch_size,
281
+ std::vector<std::vector<std::vector< float > >>(
282
+ in_height , std::vector<std::vector< float > >(
283
+ in_width, std::vector< float >( in_channels, 1 ))));
259
284
for (size_t index = 0 ; index < t.size (); ++index) {
260
285
size_t n_index = index / (in_height * in_width * in_channels);
261
286
size_t h_index = (index / (in_width * in_channels)) % in_height;
@@ -268,10 +293,9 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
268
293
std::vector<std::vector<std::vector<std::vector<float >>>> kernel (
269
294
kernel_height,
270
295
std::vector<std::vector<std::vector<float >>>(
271
- kernel_width,
272
- std::vector<std::vector<float >>(
273
- kernel_in_channels,
274
- std::vector<float >(kernel_out_channels, 1.0 ))));
296
+ kernel_width, std::vector<std::vector<float >>(
297
+ kernel_in_channels,
298
+ std::vector<float >(kernel_out_channels, 1 ))));
275
299
for (size_t index = 0 ; index < t1.size (); ++index) {
276
300
size_t n_index =
277
301
index / (kernel_width * kernel_in_channels * kernel_out_channels);
@@ -283,6 +307,8 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
283
307
kernel[n_index][h_index][w_index][c_index] = t1[index];
284
308
}
285
309
310
+ pads_ = (kernel_height * (1 + 2 * dilations_) - 1 ) / 2 ;
311
+
286
312
std::vector<std::vector<std::vector<std::vector<float >>>> padded_input =
287
313
input_tensor;
288
314
if (pads_ > 0 ) {
@@ -306,38 +332,94 @@ void ConvolutionalLayer::run(const Tensor& input, Tensor& output) {
306
332
}
307
333
}
308
334
335
+ std::vector<std::vector<std::vector<std::vector<float >>>> dil_kernel =
336
+ kernel;
337
+ if (dilations_ > 0 ) {
338
+ dil_kernel =
339
+ std::vector<std::vector<std::vector<std::vector<float >>>>(
340
+ kernel_height * (1 + 2 * dilations_),
341
+ std::vector<std::vector<std::vector<float >>>(
342
+ kernel_width * (1 + 2 * dilations_),
343
+ std::vector<std::vector<float >>(
344
+ kernel_in_channels,
345
+ std::vector<float >(kernel_out_channels, 0 ))));
346
+
347
+ for (size_t b = 0 ; b < kernel_out_channels; ++b) {
348
+ for (size_t h = 0 ; h < kernel_height; ++h) {
349
+ for (size_t w = 0 ; w < kernel_width; ++w) {
350
+ for (size_t c = 0 ; c < kernel_in_channels; ++c) {
351
+ dil_kernel[(h * (1 + 2 * dilations_)) + dilations_]
352
+ [(w * (1 + 2 * dilations_)) + dilations_][c][b] =
353
+ kernel[h][w][c][b];
354
+ }
355
+ }
356
+ }
357
+ }
358
+ }
359
+
360
+ size_t crat = 0 ;
361
+ if ((in_height + 2 * pads_ -
362
+ ((kernel_height * (1 + 2 * dilations_)) - 1 )) %
363
+ stride_ !=
364
+ 0 )
365
+ crat = 1 ;
366
+
367
+ size_t out_height = (in_height + 2 * pads_ -
368
+ ((kernel_height * (1 + 2 * dilations_)) - 1 )) /
369
+ stride_ +
370
+ crat;
371
+
372
+ crat = 0 ;
373
+
374
+ if ((in_width + 2 * pads_ -
375
+ ((kernel_width * (1 + 2 * dilations_)) - 1 )) %
376
+ stride_ !=
377
+ 0 )
378
+ crat = 1 ;
379
+
380
+ size_t out_width = (in_width + 2 * pads_ -
381
+ ((kernel_width * (1 + 2 * dilations_)) - 1 )) /
382
+ stride_ +
383
+ crat;
384
+
385
+ std::vector<std::vector<std::vector<std::vector<float >>>> output_tensor (
386
+ batch_size,
387
+ std::vector<std::vector<std::vector<float >>>(
388
+ out_height,
389
+ std::vector<std::vector<float >>(
390
+ out_width, std::vector<float >(kernel_out_channels, 0 ))));
391
+ size_t one_size = (kernel_height * (1 + 2 * dilations_) - 1 ) / 2 ;
392
+
309
393
for (size_t b = 0 ; b < batch_size; ++b) {
310
- for (size_t oc = 0 ; oc < kernel_out_channels; ++oc) {
311
- for (size_t i = 0 ; i < out_height; ++i) {
312
- for (size_t j = 0 ; j < out_width; ++j) {
313
- float value = 0.0 ;
314
- for (size_t kh = 0 ; kh < kernel_height; ++kh) {
315
- for (size_t kw = 0 ; kw < kernel_width; ++kw) {
316
- for (size_t ic = 0 ; ic < in_channels; ++ic) {
317
- size_t vert_start = i * stride_ + kh * dilations_;
318
- size_t horiz_start = j * stride_ + kw * dilations_;
319
-
320
- if (vert_start < padded_input[0 ].size () &&
321
- horiz_start < padded_input[0 ][0 ].size ()) {
322
- value += padded_input[b][vert_start][horiz_start][ic] *
323
- kernel[kh][kw][ic][oc];
324
- }
394
+ for (size_t c = 0 ; c < kernel_out_channels; ++c) {
395
+ for (size_t i = 0 ; i < out_height; i += stride_) {
396
+ for (size_t j = 0 ; j < out_width; j += stride_) {
397
+ float value = 0 ;
398
+ for (size_t ic = 0 ; ic < in_channels; ++ic) {
399
+ for (int h = (-1 * static_cast <int >(one_size));
400
+ h <= static_cast <int >(one_size); ++h) {
401
+ for (int w = (-1 * static_cast <int >(one_size));
402
+ w <= static_cast <int >(one_size); ++w) {
403
+ value += padded_input[b][i + one_size + h]
404
+ [j + one_size + w][ic] *
405
+ dil_kernel[one_size + h][one_size + w][ic][c];
325
406
}
326
407
}
327
408
}
328
- output_tensor[b][i][j][oc ] = value;
409
+ output_tensor[b][i][j][c ] = value;
329
410
}
330
411
}
331
412
}
332
413
}
333
- Shape sh ({batch_size, out_height, out_width, kernel_out_channels});
414
+
415
+ Shape sh ({batch_size, kernel_out_channels, out_height, out_width});
334
416
std::vector<float > one_d_vector (batch_size * out_height * out_width *
335
417
kernel_out_channels);
336
418
size_t index_1d = 0 ;
337
419
for (size_t i = 0 ; i < batch_size; ++i) {
338
- for (size_t j = 0 ; j < out_height ; ++j ) {
339
- for (size_t k = 0 ; k < out_width ; ++k ) {
340
- for (size_t l = 0 ; l < kernel_out_channels ; ++l ) {
420
+ for (size_t l = 0 ; l < kernel_out_channels ; ++l ) {
421
+ for (size_t j = 0 ; j < out_height ; ++j ) {
422
+ for (size_t k = 0 ; k < out_width ; ++k ) {
341
423
one_d_vector[index_1d++] = output_tensor[i][j][k][l];
342
424
}
343
425
}
0 commit comments