-
Notifications
You must be signed in to change notification settings - Fork 64
VideoEncoder first pass, round trip test #866
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
f4fd9fd
8578060
040d4b2
dc7319b
dfde1cc
bc102f0
c021019
529ae61
1cca109
adbf151
08b5851
b96bd56
68b9de6
49ab01f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -511,4 +511,283 @@ void AudioEncoder::flushBuffers() { | |
|
||
encodeFrame(autoAVPacket, UniqueAVFrame(nullptr)); | ||
} | ||
|
||
namespace { | ||
|
||
torch::Tensor validateFrames(const torch::Tensor& frames) { | ||
TORCH_CHECK( | ||
frames.dtype() == torch::kUInt8, | ||
"frames must have uint8 dtype, got ", | ||
frames.dtype()); | ||
TORCH_CHECK( | ||
frames.dim() == 4, | ||
"frames must have 4 dimensions (N, C, H, W), got ", | ||
frames.dim()); | ||
TORCH_CHECK( | ||
frames.sizes()[1] == 3, | ||
"frame must have 3 channels (R, G, B), got ", | ||
frames.sizes()[1]); | ||
// TODO-VideoEncoder: Investigate if non-contiguous frames can be accepted | ||
return frames.contiguous(); | ||
} | ||
NicolasHug marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
} // namespace | ||
|
||
VideoEncoder::~VideoEncoder() { | ||
if (avFormatContext_ && avFormatContext_->pb) { | ||
avio_flush(avFormatContext_->pb); | ||
avio_close(avFormatContext_->pb); | ||
avFormatContext_->pb = nullptr; | ||
} | ||
} | ||
|
||
VideoEncoder::VideoEncoder( | ||
const torch::Tensor& frames, | ||
int frameRate, | ||
std::string_view fileName, | ||
const VideoStreamOptions& videoStreamOptions) | ||
: frames_(validateFrames(frames)), inFrameRate_(frameRate) { | ||
setFFmpegLogLevel(); | ||
|
||
// Allocate output format context | ||
AVFormatContext* avFormatContext = nullptr; | ||
int status = avformat_alloc_output_context2( | ||
&avFormatContext, nullptr, nullptr, fileName.data()); | ||
|
||
TORCH_CHECK( | ||
avFormatContext != nullptr, | ||
"Couldn't allocate AVFormatContext. ", | ||
"The destination file is ", | ||
fileName, | ||
", check the desired extension? ", | ||
getFFMPEGErrorStringFromErrorCode(status)); | ||
avFormatContext_.reset(avFormatContext); | ||
|
||
status = avio_open(&avFormatContext_->pb, fileName.data(), AVIO_FLAG_WRITE); | ||
TORCH_CHECK( | ||
status >= 0, | ||
"avio_open failed. The destination file is ", | ||
fileName, | ||
", make sure it's a valid path? ", | ||
getFFMPEGErrorStringFromErrorCode(status)); | ||
// TODO-VideoEncoder: Add tests for above fileName related checks | ||
|
||
NicolasHug marked this conversation as resolved.
Show resolved
Hide resolved
|
||
initializeEncoder(videoStreamOptions); | ||
} | ||
|
||
void VideoEncoder::initializeEncoder( | ||
const VideoStreamOptions& videoStreamOptions) { | ||
const AVCodec* avCodec = | ||
avcodec_find_encoder(avFormatContext_->oformat->video_codec); | ||
TORCH_CHECK(avCodec != nullptr, "Video codec not found"); | ||
|
||
AVCodecContext* avCodecContext = avcodec_alloc_context3(avCodec); | ||
TORCH_CHECK(avCodecContext != nullptr, "Couldn't allocate codec context."); | ||
avCodecContext_.reset(avCodecContext); | ||
|
||
// Set encoding options | ||
// TODO-VideoEncoder: Allow bitrate to be set | ||
std::optional<int> desiredBitRate = videoStreamOptions.bitRate; | ||
if (desiredBitRate.has_value()) { | ||
TORCH_CHECK( | ||
*desiredBitRate >= 0, "bit_rate=", *desiredBitRate, " must be >= 0."); | ||
} | ||
avCodecContext_->bit_rate = desiredBitRate.value_or(0); | ||
|
||
// Store dimension order and input pixel format | ||
// TODO-VideoEncoder: Remove assumption that tensor in NCHW format | ||
auto sizes = frames_.sizes(); | ||
inPixelFormat_ = AV_PIX_FMT_GBRP; | ||
inHeight_ = sizes[2]; | ||
inWidth_ = sizes[3]; | ||
|
||
// Use specified dimensions or input dimensions | ||
// TODO-VideoEncoder: Allow height and width to be set | ||
outWidth_ = videoStreamOptions.width.value_or(inWidth_); | ||
outHeight_ = videoStreamOptions.height.value_or(inHeight_); | ||
|
||
// Use YUV420P as default output format | ||
NicolasHug marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// TODO-VideoEncoder: Enable other pixel formats | ||
outPixelFormat_ = AV_PIX_FMT_YUV420P; | ||
|
||
// Configure codec parameters | ||
avCodecContext_->codec_id = avCodec->id; | ||
avCodecContext_->width = outWidth_; | ||
avCodecContext_->height = outHeight_; | ||
avCodecContext_->pix_fmt = outPixelFormat_; | ||
// TODO-VideoEncoder: Verify that frame_rate and time_base are correct | ||
avCodecContext_->time_base = {1, inFrameRate_}; | ||
Dan-Flores marked this conversation as resolved.
Show resolved
Hide resolved
|
||
avCodecContext_->framerate = {inFrameRate_, 1}; | ||
|
||
// TODO-VideoEncoder: Allow GOP size and max B-frames to be set | ||
if (videoStreamOptions.gopSize.has_value()) { | ||
avCodecContext_->gop_size = *videoStreamOptions.gopSize; | ||
} else { | ||
avCodecContext_->gop_size = 12; // Default GOP size | ||
} | ||
|
||
if (videoStreamOptions.maxBFrames.has_value()) { | ||
avCodecContext_->max_b_frames = *videoStreamOptions.maxBFrames; | ||
} else { | ||
avCodecContext_->max_b_frames = 0; // No max B-frames to reduce compression | ||
} | ||
|
||
int status = avcodec_open2(avCodecContext_.get(), avCodec, nullptr); | ||
TORCH_CHECK( | ||
status == AVSUCCESS, | ||
"avcodec_open2 failed: ", | ||
getFFMPEGErrorStringFromErrorCode(status)); | ||
|
||
AVStream* avStream = avformat_new_stream(avFormatContext_.get(), nullptr); | ||
TORCH_CHECK(avStream != nullptr, "Couldn't create new stream."); | ||
|
||
// Set the stream time base to encode correct frame timestamps | ||
avStream->time_base = avCodecContext_->time_base; | ||
status = avcodec_parameters_from_context( | ||
avStream->codecpar, avCodecContext_.get()); | ||
TORCH_CHECK( | ||
status == AVSUCCESS, | ||
"avcodec_parameters_from_context failed: ", | ||
getFFMPEGErrorStringFromErrorCode(status)); | ||
streamIndex_ = avStream->index; | ||
} | ||
|
||
NicolasHug marked this conversation as resolved.
Show resolved
Hide resolved
|
||
void VideoEncoder::encode() { | ||
// To be on the safe side we enforce that encode() can only be called once | ||
TORCH_CHECK(!encodeWasCalled_, "Cannot call encode() twice."); | ||
encodeWasCalled_ = true; | ||
|
||
int status = avformat_write_header(avFormatContext_.get(), nullptr); | ||
TORCH_CHECK( | ||
status == AVSUCCESS, | ||
"Error in avformat_write_header: ", | ||
getFFMPEGErrorStringFromErrorCode(status)); | ||
|
||
AutoAVPacket autoAVPacket; | ||
int numFrames = frames_.sizes()[0]; | ||
for (int i = 0; i < numFrames; ++i) { | ||
torch::Tensor currFrame = frames_[i]; | ||
UniqueAVFrame avFrame = convertTensorToAVFrame(currFrame, i); | ||
encodeFrame(autoAVPacket, avFrame); | ||
} | ||
|
||
flushBuffers(); | ||
|
||
status = av_write_trailer(avFormatContext_.get()); | ||
TORCH_CHECK( | ||
status == AVSUCCESS, | ||
"Error in av_write_trailer: ", | ||
getFFMPEGErrorStringFromErrorCode(status)); | ||
} | ||
|
||
UniqueAVFrame VideoEncoder::convertTensorToAVFrame( | ||
const torch::Tensor& frame, | ||
int frameIndex) { | ||
// Initialize and cache scaling context if it does not exist | ||
if (!swsContext_) { | ||
swsContext_.reset(sws_getContext( | ||
inWidth_, | ||
inHeight_, | ||
inPixelFormat_, | ||
outWidth_, | ||
outHeight_, | ||
outPixelFormat_, | ||
SWS_BILINEAR, | ||
nullptr, | ||
nullptr, | ||
nullptr)); | ||
TORCH_CHECK(swsContext_ != nullptr, "Failed to create scaling context"); | ||
} | ||
|
||
UniqueAVFrame avFrame(av_frame_alloc()); | ||
TORCH_CHECK(avFrame != nullptr, "Failed to allocate AVFrame"); | ||
|
||
// Set output frame properties | ||
avFrame->format = outPixelFormat_; | ||
avFrame->width = outWidth_; | ||
avFrame->height = outHeight_; | ||
avFrame->pts = frameIndex; | ||
|
||
int status = av_frame_get_buffer(avFrame.get(), 0); | ||
TORCH_CHECK(status >= 0, "Failed to allocate frame buffer"); | ||
|
||
// Need to convert/scale the frame | ||
// Create temporary frame with input format | ||
UniqueAVFrame inputFrame(av_frame_alloc()); | ||
TORCH_CHECK(inputFrame != nullptr, "Failed to allocate input AVFrame"); | ||
|
||
inputFrame->format = inPixelFormat_; | ||
inputFrame->width = inWidth_; | ||
inputFrame->height = inHeight_; | ||
|
||
uint8_t* tensorData = static_cast<uint8_t*>(frame.data_ptr()); | ||
|
||
// TODO-VideoEncoder: Reorder tensor if in NHWC format | ||
int channelSize = inHeight_ * inWidth_; | ||
// Reorder RGB -> GBR for AV_PIX_FMT_GBRP format | ||
// TODO-VideoEncoder: Determine if FFmpeg supports RGB input format directly | ||
|
||
inputFrame->data[0] = tensorData + channelSize; | ||
inputFrame->data[1] = tensorData + (2 * channelSize); | ||
inputFrame->data[2] = tensorData; | ||
|
||
inputFrame->linesize[0] = inWidth_; | ||
inputFrame->linesize[1] = inWidth_; | ||
inputFrame->linesize[2] = inWidth_; | ||
|
||
status = sws_scale( | ||
swsContext_.get(), | ||
inputFrame->data, | ||
inputFrame->linesize, | ||
0, | ||
inputFrame->height, | ||
avFrame->data, | ||
avFrame->linesize); | ||
TORCH_CHECK(status == outHeight_, "sws_scale failed"); | ||
return avFrame; | ||
} | ||
|
||
void VideoEncoder::encodeFrame( | ||
AutoAVPacket& autoAVPacket, | ||
const UniqueAVFrame& avFrame) { | ||
auto status = avcodec_send_frame(avCodecContext_.get(), avFrame.get()); | ||
TORCH_CHECK( | ||
status == AVSUCCESS, | ||
"Error while sending frame: ", | ||
getFFMPEGErrorStringFromErrorCode(status)); | ||
|
||
while (true) { | ||
ReferenceAVPacket packet(autoAVPacket); | ||
status = avcodec_receive_packet(avCodecContext_.get(), packet.get()); | ||
if (status == AVERROR(EAGAIN) || status == AVERROR_EOF) { | ||
if (status == AVERROR_EOF) { | ||
// Flush remaining buffered packets | ||
status = av_interleaved_write_frame(avFormatContext_.get(), nullptr); | ||
TORCH_CHECK( | ||
status == AVSUCCESS, | ||
"Failed to flush packet: ", | ||
getFFMPEGErrorStringFromErrorCode(status)); | ||
} | ||
return; | ||
} | ||
TORCH_CHECK( | ||
status >= 0, | ||
"Error receiving packet: ", | ||
getFFMPEGErrorStringFromErrorCode(status)); | ||
|
||
packet->stream_index = streamIndex_; | ||
|
||
status = av_interleaved_write_frame(avFormatContext_.get(), packet.get()); | ||
TORCH_CHECK( | ||
status == AVSUCCESS, | ||
"Error in av_interleaved_write_frame: ", | ||
getFFMPEGErrorStringFromErrorCode(status)); | ||
} | ||
} | ||
|
||
void VideoEncoder::flushBuffers() { | ||
AutoAVPacket autoAVPacket; | ||
// Send null frame to signal end of input | ||
encodeFrame(autoAVPacket, UniqueAVFrame(nullptr)); | ||
} | ||
|
||
} // namespace facebook::torchcodec |
Uh oh!
There was an error while loading. Please reload this page.