@@ -56,14 +56,9 @@ void CpuDeviceInterface::initialize(
56
56
timeBase_ = timeBase;
57
57
outputDims_ = outputDims;
58
58
59
- // TODO: rationalize comment below with new stuff.
60
- // By default, we want to use swscale for color conversion because it is
61
- // faster. However, it has width requirements, so we may need to fall back
62
- // to filtergraph. We also need to respect what was requested from the
63
- // options; we respect the options unconditionally, so it's possible for
64
- // swscale's width requirements to be violated. We don't expose the ability to
65
- // choose color conversion library publicly; we only use this ability
66
- // internally.
59
+ // We want to use swscale for color conversion if possible because it is
60
+ // faster than filtergraph. The following are the conditions we need to meet
61
+ // to use it.
67
62
68
63
// We can only use swscale when we have a single resize transform. Note that
69
64
// this means swscale will not support the case of having several,
@@ -76,12 +71,14 @@ void CpuDeviceInterface::initialize(
76
71
// https://stackoverflow.com/questions/74351955/turn-off-sw-scale-conversion-to-planar-yuv-32-byte-alignment-requirements
77
72
bool isWidthSwScaleCompatible = (outputDims_.width % 32 ) == 0 ;
78
73
74
+ // Note that we do not expose this capability in the public API, only through
75
+ // the core API.
79
76
bool userRequestedSwScale = videoStreamOptions_.colorConversionLibrary ==
80
77
ColorConversionLibrary::SWSCALE;
81
78
82
79
// Note that we treat the transform limitation differently from the width
83
80
// limitation. That is, we consider the transforms being compatible with
84
- // sws_scale as a hard requirement. If the transforms are not compatiable,
81
+ // swscale as a hard requirement. If the transforms are not compatiable,
85
82
// then we will end up not applying the transforms, and that is wrong.
86
83
//
87
84
// The width requirement, however, is a soft requirement. Even if we don't
@@ -94,7 +91,7 @@ void CpuDeviceInterface::initialize(
94
91
colorConversionLibrary_ = ColorConversionLibrary::SWSCALE;
95
92
96
93
// We established above that if the transforms are swscale compatible and
97
- // non-empty, then they must have only one transforms , and that transform is
94
+ // non-empty, then they must have only one transform , and that transform is
98
95
// ResizeTransform.
99
96
if (!transforms.empty ()) {
100
97
auto resize = dynamic_cast <ResizeTransform*>(transforms[0 ].get ());
@@ -207,7 +204,7 @@ void CpuDeviceInterface::convertAVFrameToFrameOutput(
207
204
std::make_unique<FilterGraph>(filtersContext, videoStreamOptions_);
208
205
prevFiltersContext_ = std::move (filtersContext);
209
206
}
210
- outputTensor = toTensor (filterGraphContext_->convert (avFrame));
207
+ outputTensor = rgbAVFrameToTensor (filterGraphContext_->convert (avFrame));
211
208
212
209
// Similarly to above, if this check fails it means the frame wasn't
213
210
// reshaped to its expected dimensions by filtergraph.
@@ -256,21 +253,6 @@ int CpuDeviceInterface::convertAVFrameToTensorUsingSwScale(
256
253
return resultHeight;
257
254
}
258
255
259
- torch::Tensor CpuDeviceInterface::toTensor (const UniqueAVFrame& avFrame) {
260
- TORCH_CHECK_EQ (avFrame->format , AV_PIX_FMT_RGB24);
261
-
262
- int height = avFrame->height ;
263
- int width = avFrame->width ;
264
- std::vector<int64_t > shape = {height, width, 3 };
265
- std::vector<int64_t > strides = {avFrame->linesize [0 ], 3 , 1 };
266
- AVFrame* avFrameClone = av_frame_clone (avFrame.get ());
267
- auto deleter = [avFrameClone](void *) {
268
- UniqueAVFrame avFrameToDelete (avFrameClone);
269
- };
270
- return torch::from_blob (
271
- avFrameClone->data [0 ], shape, strides, deleter, {torch::kUInt8 });
272
- }
273
-
274
256
void CpuDeviceInterface::createSwsContext (
275
257
const SwsFrameContext& swsFrameContext,
276
258
const enum AVColorSpace colorspace) {
0 commit comments