meta-pytorch
diff --git a/‎README.md‎
Lines changed: 23 additions & 12 deletions b/‎README.md‎
Lines changed: 23 additions & 12 deletions
diff --git a/‎src/torchcodec/_core/Encoder.cpp‎
Lines changed: 279 additions & 0 deletions b/‎src/torchcodec/_core/Encoder.cpp‎
Lines changed: 279 additions & 0 deletions
@@ -131,6 +131,7 @@ The following table indicates the compatibility between versions of
 | `torchcodec`       | `torch`            | Python              |
 | ------------------ | ------------------ | ------------------- |
 | `main` / `nightly` | `main` / `nightly` | `>=3.10`, `<=3.13`   |
+| `0.7`              | `2.8`              | `>=3.9`, `<=3.13`   |
 | `0.6`              | `2.8`              | `>=3.9`, `<=3.13`   |
 | `0.5`              | `2.7`              | `>=3.9`, `<=3.13`   |
 | `0.4`              | `2.7`              | `>=3.9`, `<=3.13`   |
@@ -145,12 +146,7 @@ First, make sure you have a GPU that has NVDEC hardware that can decode the
 format you want. Refer to Nvidia's GPU support matrix for more details
 [here](https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new).
 
-1. Install Pytorch corresponding to your CUDA Toolkit using the
-   [official instructions](https://pytorch.org/get-started/locally/). You'll
-   need the `libnpp` and `libnvrtc` CUDA libraries, which are usually part of
-   the CUDA Toolkit.
-
-2. Install or compile FFmpeg with NVDEC support.
+1. Install FFmpeg with NVDEC support.
    TorchCodec with CUDA should work with FFmpeg versions in [4, 7].
 
    If FFmpeg is not already installed, or you need a more recent version, an
@@ -162,10 +158,6 @@ format you want. Refer to Nvidia's GPU support matrix for more details
    conda install "ffmpeg<8" -c conda-forge
    ```
 
-   If you are building FFmpeg from source you can follow Nvidia's guide to
-   configuring and installing FFmpeg with NVDEC support
-   [here](https://docs.nvidia.com/video-technologies/video-codec-sdk/12.0/ffmpeg-with-nvidia-gpu/index.html).
-
    After installing FFmpeg make sure it has NVDEC support when you list the supported
    decoders:
 
@@ -181,8 +173,18 @@ format you want. Refer to Nvidia's GPU support matrix for more details
    ffmpeg -hwaccel cuda -hwaccel_output_format cuda -i test/resources/nasa_13013.mp4 -f null -
    ```
 
-3. Install TorchCodec by passing in an `--index-url` parameter that corresponds
-   to your CUDA Toolkit version, example:
+#### Linux
+
+2. Install Pytorch corresponding to your CUDA Toolkit using the
+   [official instructions](https://pytorch.org/get-started/locally/). You'll
+   need the `libnpp` and `libnvrtc` CUDA libraries, which are usually part of
+   the CUDA Toolkit.
+
+
+3. Install TorchCodec
+
+   Pass in an `--index-url` parameter that corresponds to your CUDA Toolkit
+   version, for example:
 
    ```bash
    # This corresponds to CUDA Toolkit version 12.6. It should be the same one
@@ -193,6 +195,15 @@ format you want. Refer to Nvidia's GPU support matrix for more details
    Note that without passing in the `--index-url` parameter, `pip` installs
    the CPU-only version of TorchCodec.
 
+#### Windows
+
+2. On Windows (experimental support), you'll need to rely on `conda` to install
+   both pytorch and TorchCodec:
+
+   ```bash
+   conda install -c conda-forge "torchcodec=*=*cuda*"
+   ```
+
 ## Benchmark Results
 
 The following was generated by running [our benchmark script](./benchmarks/decoders/generate_readme_data.py) on a lightly loaded 22-core machine with an Nvidia A100 with
 
@@ -511,4 +511,283 @@ void AudioEncoder::flushBuffers() {
 
   encodeFrame(autoAVPacket, UniqueAVFrame(nullptr));
 }
+
+namespace {
+
+torch::Tensor validateFrames(const torch::Tensor& frames) {
+  TORCH_CHECK(
+      frames.dtype() == torch::kUInt8,
+      "frames must have uint8 dtype, got ",
+      frames.dtype());
+  TORCH_CHECK(
+      frames.dim() == 4,
+      "frames must have 4 dimensions (N, C, H, W), got ",
+      frames.dim());
+  TORCH_CHECK(
+      frames.sizes()[1] == 3,
+      "frame must have 3 channels (R, G, B), got ",
+      frames.sizes()[1]);
+  // TODO-VideoEncoder: Investigate if non-contiguous frames can be accepted
+  return frames.contiguous();
+}
+
+} // namespace
+
+VideoEncoder::~VideoEncoder() {
+  if (avFormatContext_ && avFormatContext_->pb) {
+    avio_flush(avFormatContext_->pb);
+    avio_close(avFormatContext_->pb);
+    avFormatContext_->pb = nullptr;
+  }
+}
+
+VideoEncoder::VideoEncoder(
+    const torch::Tensor& frames,
+    int frameRate,
+    std::string_view fileName,
+    const VideoStreamOptions& videoStreamOptions)
+    : frames_(validateFrames(frames)), inFrameRate_(frameRate) {
+  setFFmpegLogLevel();
+
+  // Allocate output format context
+  AVFormatContext* avFormatContext = nullptr;
+  int status = avformat_alloc_output_context2(
+      &avFormatContext, nullptr, nullptr, fileName.data());
+
+  TORCH_CHECK(
+      avFormatContext != nullptr,
+      "Couldn't allocate AVFormatContext. ",
+      "The destination file is ",
+      fileName,
+      ", check the desired extension? ",
+      getFFMPEGErrorStringFromErrorCode(status));
+  avFormatContext_.reset(avFormatContext);
+
+  status = avio_open(&avFormatContext_->pb, fileName.data(), AVIO_FLAG_WRITE);
+  TORCH_CHECK(
+      status >= 0,
+      "avio_open failed. The destination file is ",
+      fileName,
+      ", make sure it's a valid path? ",
+      getFFMPEGErrorStringFromErrorCode(status));
+  // TODO-VideoEncoder: Add tests for above fileName related checks
+
+  initializeEncoder(videoStreamOptions);
+}
+
+void VideoEncoder::initializeEncoder(
+    const VideoStreamOptions& videoStreamOptions) {
+  const AVCodec* avCodec =
+      avcodec_find_encoder(avFormatContext_->oformat->video_codec);
+  TORCH_CHECK(avCodec != nullptr, "Video codec not found");
+
+  AVCodecContext* avCodecContext = avcodec_alloc_context3(avCodec);
+  TORCH_CHECK(avCodecContext != nullptr, "Couldn't allocate codec context.");
+  avCodecContext_.reset(avCodecContext);
+
+  // Set encoding options
+  // TODO-VideoEncoder: Allow bitrate to be set
+  std::optional<int> desiredBitRate = videoStreamOptions.bitRate;
+  if (desiredBitRate.has_value()) {
+    TORCH_CHECK(
+        *desiredBitRate >= 0, "bit_rate=", *desiredBitRate, " must be >= 0.");
+  }
+  avCodecContext_->bit_rate = desiredBitRate.value_or(0);
+
+  // Store dimension order and input pixel format
+  // TODO-VideoEncoder: Remove assumption that tensor in NCHW format
+  auto sizes = frames_.sizes();
+  inPixelFormat_ = AV_PIX_FMT_GBRP;
+  inHeight_ = sizes[2];
+  inWidth_ = sizes[3];
+
+  // Use specified dimensions or input dimensions
+  // TODO-VideoEncoder: Allow height and width to be set
+  outWidth_ = videoStreamOptions.width.value_or(inWidth_);
+  outHeight_ = videoStreamOptions.height.value_or(inHeight_);
+
+  // Use YUV420P as default output format
+  // TODO-VideoEncoder: Enable other pixel formats
+  outPixelFormat_ = AV_PIX_FMT_YUV420P;
+
+  // Configure codec parameters
+  avCodecContext_->codec_id = avCodec->id;
+  avCodecContext_->width = outWidth_;
+  avCodecContext_->height = outHeight_;
+  avCodecContext_->pix_fmt = outPixelFormat_;
+  // TODO-VideoEncoder: Verify that frame_rate and time_base are correct
+  avCodecContext_->time_base = {1, inFrameRate_};
+  avCodecContext_->framerate = {inFrameRate_, 1};
+
+  // TODO-VideoEncoder: Allow GOP size and max B-frames to be set
+  if (videoStreamOptions.gopSize.has_value()) {
+    avCodecContext_->gop_size = *videoStreamOptions.gopSize;
+  } else {
+    avCodecContext_->gop_size = 12; // Default GOP size
+  }
+
+  if (videoStreamOptions.maxBFrames.has_value()) {
+    avCodecContext_->max_b_frames = *videoStreamOptions.maxBFrames;
+  } else {
+    avCodecContext_->max_b_frames = 0; // No max B-frames to reduce compression
+  }
+
+  int status = avcodec_open2(avCodecContext_.get(), avCodec, nullptr);
+  TORCH_CHECK(
+      status == AVSUCCESS,
+      "avcodec_open2 failed: ",
+      getFFMPEGErrorStringFromErrorCode(status));
+
+  AVStream* avStream = avformat_new_stream(avFormatContext_.get(), nullptr);
+  TORCH_CHECK(avStream != nullptr, "Couldn't create new stream.");
+
+  // Set the stream time base to encode correct frame timestamps
+  avStream->time_base = avCodecContext_->time_base;
+  status = avcodec_parameters_from_context(
+      avStream->codecpar, avCodecContext_.get());
+  TORCH_CHECK(
+      status == AVSUCCESS,
+      "avcodec_parameters_from_context failed: ",
+      getFFMPEGErrorStringFromErrorCode(status));
+  streamIndex_ = avStream->index;
+}
+
+void VideoEncoder::encode() {
+  // To be on the safe side we enforce that encode() can only be called once
+  TORCH_CHECK(!encodeWasCalled_, "Cannot call encode() twice.");
+  encodeWasCalled_ = true;
+
+  int status = avformat_write_header(avFormatContext_.get(), nullptr);
+  TORCH_CHECK(
+      status == AVSUCCESS,
+      "Error in avformat_write_header: ",
+      getFFMPEGErrorStringFromErrorCode(status));
+
+  AutoAVPacket autoAVPacket;
+  int numFrames = frames_.sizes()[0];
+  for (int i = 0; i < numFrames; ++i) {
+    torch::Tensor currFrame = frames_[i];
+    UniqueAVFrame avFrame = convertTensorToAVFrame(currFrame, i);
+    encodeFrame(autoAVPacket, avFrame);
+  }
+
+  flushBuffers();
+
+  status = av_write_trailer(avFormatContext_.get());
+  TORCH_CHECK(
+      status == AVSUCCESS,
+      "Error in av_write_trailer: ",
+      getFFMPEGErrorStringFromErrorCode(status));
+}
+
+UniqueAVFrame VideoEncoder::convertTensorToAVFrame(
+    const torch::Tensor& frame,
+    int frameIndex) {
+  // Initialize and cache scaling context if it does not exist
+  if (!swsContext_) {
+    swsContext_.reset(sws_getContext(
+        inWidth_,
+        inHeight_,
+        inPixelFormat_,
+        outWidth_,
+        outHeight_,
+        outPixelFormat_,
+        SWS_BILINEAR,
+        nullptr,
+        nullptr,
+        nullptr));
+    TORCH_CHECK(swsContext_ != nullptr, "Failed to create scaling context");
+  }
+
+  UniqueAVFrame avFrame(av_frame_alloc());
+  TORCH_CHECK(avFrame != nullptr, "Failed to allocate AVFrame");
+
+  // Set output frame properties
+  avFrame->format = outPixelFormat_;
+  avFrame->width = outWidth_;
+  avFrame->height = outHeight_;
+  avFrame->pts = frameIndex;
+
+  int status = av_frame_get_buffer(avFrame.get(), 0);
+  TORCH_CHECK(status >= 0, "Failed to allocate frame buffer");
+
+  // Need to convert/scale the frame
+  // Create temporary frame with input format
+  UniqueAVFrame inputFrame(av_frame_alloc());
+  TORCH_CHECK(inputFrame != nullptr, "Failed to allocate input AVFrame");
+
+  inputFrame->format = inPixelFormat_;
+  inputFrame->width = inWidth_;
+  inputFrame->height = inHeight_;
+
+  uint8_t* tensorData = static_cast<uint8_t*>(frame.data_ptr());
+
+  // TODO-VideoEncoder: Reorder tensor if in NHWC format
+  int channelSize = inHeight_ * inWidth_;
+  // Reorder RGB -> GBR for AV_PIX_FMT_GBRP format
+  // TODO-VideoEncoder: Determine if FFmpeg supports planar RGB input format
+  inputFrame->data[0] = tensorData + channelSize;
+  inputFrame->data[1] = tensorData + (2 * channelSize);
+  inputFrame->data[2] = tensorData;
+
+  inputFrame->linesize[0] = inWidth_;
+  inputFrame->linesize[1] = inWidth_;
+  inputFrame->linesize[2] = inWidth_;
+
+  status = sws_scale(
+      swsContext_.get(),
+      inputFrame->data,
+      inputFrame->linesize,
+      0,
+      inputFrame->height,
+      avFrame->data,
+      avFrame->linesize);
+  TORCH_CHECK(status == outHeight_, "sws_scale failed");
+  return avFrame;
+}
+
+void VideoEncoder::encodeFrame(
+    AutoAVPacket& autoAVPacket,
+    const UniqueAVFrame& avFrame) {
+  auto status = avcodec_send_frame(avCodecContext_.get(), avFrame.get());
+  TORCH_CHECK(
+      status == AVSUCCESS,
+      "Error while sending frame: ",
+      getFFMPEGErrorStringFromErrorCode(status));
+
+  while (true) {
+    ReferenceAVPacket packet(autoAVPacket);
+    status = avcodec_receive_packet(avCodecContext_.get(), packet.get());
+    if (status == AVERROR(EAGAIN) || status == AVERROR_EOF) {
+      if (status == AVERROR_EOF) {
+        // Flush remaining buffered packets
+        status = av_interleaved_write_frame(avFormatContext_.get(), nullptr);
+        TORCH_CHECK(
+            status == AVSUCCESS,
+            "Failed to flush packet: ",
+            getFFMPEGErrorStringFromErrorCode(status));
+      }
+      return;
+    }
+    TORCH_CHECK(
+        status >= 0,
+        "Error receiving packet: ",
+        getFFMPEGErrorStringFromErrorCode(status));
+
+    packet->stream_index = streamIndex_;
+
+    status = av_interleaved_write_frame(avFormatContext_.get(), packet.get());
+    TORCH_CHECK(
+        status == AVSUCCESS,
+        "Error in av_interleaved_write_frame: ",
+        getFFMPEGErrorStringFromErrorCode(status));
+  }
+}
+
+void VideoEncoder::flushBuffers() {
+  AutoAVPacket autoAVPacket;
+  // Send null frame to signal end of input
+  encodeFrame(autoAVPacket, UniqueAVFrame(nullptr));
+}
+
 } // namespace facebook::torchcodec