meta-pytorch
diff --git a/‎docs/source/api_ref_encoders.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/api_ref_encoders.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/conf.py‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/conf.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/index.rst‎
Lines changed: 8 additions & 0 deletions b/‎docs/source/index.rst‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎examples/encoding/video_encoding.py‎
Lines changed: 282 additions & 0 deletions b/‎examples/encoding/video_encoding.py‎
Lines changed: 282 additions & 0 deletions
diff --git a/‎src/torchcodec/_core/Encoder.cpp‎
Lines changed: 9 additions & 5 deletions b/‎src/torchcodec/_core/Encoder.cpp‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎src/torchcodec/_core/Encoder.h‎
Lines changed: 3 additions & 3 deletions b/‎src/torchcodec/_core/Encoder.h‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/torchcodec/_core/SingleStreamDecoder.cpp‎
Lines changed: 1 addition & 1 deletion b/‎src/torchcodec/_core/SingleStreamDecoder.cpp‎
Lines changed: 1 addition & 1 deletion
@@ -16,3 +16,4 @@ For an audio decoder tutorial, see: :ref:`sphx_glr_generated_examples_encoding_a
     :template: class.rst
 
     AudioEncoder
+    VideoEncoder
@@ -87,6 +87,7 @@ def __call__(self, filename):
             assert "examples/encoding" in self.src_dir
             order = [
                 "audio_encoding.py",
+                "video_encoding.py",
             ]
 
         try:
 
@@ -98,6 +98,14 @@ Encoding
 
         How encode audio samples
 
+     .. grid-item-card:: :octicon:`file-code;1em`
+        Video Encoding
+        :img-top: _static/img/card-background.svg
+        :link: generated_examples/encoding/video_encoding.html
+        :link-type: url
+
+        How to encode video frames
+
 .. toctree::
    :maxdepth: 1
    :caption: TorchCodec documentation
 
@@ -0,0 +1,282 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+=======================================
+Encoding video frames with VideoEncoder
+=======================================
+
+In this example, we'll learn how to encode video frames to a file or to raw
+bytes using the :class:`~torchcodec.encoders.VideoEncoder` class.
+"""
+
+# %%
+# First, we'll download a video and decode some frames to tensors.
+# These will be the input to the :class:`~torchcodec.encoders.VideoEncoder`. For more details on decoding,
+# see :ref:`sphx_glr_generated_examples_decoding_basic_example.py`.
+# Otherwise, skip ahead to :ref:`creating_encoder`.
+
+import requests
+from torchcodec.decoders import VideoDecoder
+from IPython.display import Video
+
+
+def play_video(encoded_bytes):
+    return Video(
+        data=encoded_bytes.numpy().tobytes(),
+        embed=True,
+        width=640,
+        height=360,
+        mimetype="video/mp4",
+    )
+
+
+# Video source: https://www.pexels.com/video/adorable-cats-on-the-lawn-4977395/
+# Author: Altaf Shah.
+url = "https://videos.pexels.com/video-files/4977395/4977395-hd_1920_1080_24fps.mp4"
+
+response = requests.get(url, headers={"User-Agent": ""})
+if response.status_code != 200:
+    raise RuntimeError(f"Failed to download video. {response.status_code = }.")
+
+raw_video_bytes = response.content
+
+decoder = VideoDecoder(raw_video_bytes)
+frames = decoder.get_frames_in_range(0, 60).data  # Get first 60 frames
+frame_rate = decoder.metadata.average_fps
+
+# %%
+# .. _creating_encoder:
+#
+# Creating an encoder
+# -------------------
+#
+# Let's instantiate a :class:`~torchcodec.encoders.VideoEncoder`. We will need to provide
+# the frames to be encoded as a 4D tensor of shape
+# ``(num_frames, num_channels, height, width)`` with values in the ``[0, 255]``
+# range and ``torch.uint8`` dtype. We will also need to provide the frame rate of the input
+# video.
+#
+# .. note::
+#
+#     The ``frame_rate`` parameter corresponds to the frame rate of the
+#     *input* video. It will also be used for the frame rate of the *output* encoded video.
+from torchcodec.encoders import VideoEncoder
+
+print(f"{frames.shape = }, {frames.dtype = }")
+print(f"{frame_rate = } fps")
+
+encoder = VideoEncoder(frames=frames, frame_rate=frame_rate)
+
+# %%
+# Encoding to file, bytes, or file-like
+# -------------------------------------
+#
+# :class:`~torchcodec.encoders.VideoEncoder` supports encoding frames into a
+# file via the :meth:`~torchcodec.encoders.VideoEncoder.to_file` method, to
+# file-like objects via the :meth:`~torchcodec.encoders.VideoEncoder.to_file_like`
+# method, or to raw bytes via :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`.
+# For now we will use :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`, so we
+# can easily inspect and display the encoded video.
+
+encoded_frames = encoder.to_tensor(format="mp4")
+play_video(encoded_frames)
+
+# %%
+#
+# Now that we have encoded data, we can decode it back to verify the
+# round-trip encode/decode process works as expected:
+
+decoder_verify = VideoDecoder(encoded_frames)
+decoded_frames = decoder_verify.get_frames_in_range(0, 60).data
+
+print(f"Re-decoded video: {decoded_frames.shape = }")
+print(f"Original frames: {frames.shape = }")
+
+# %%
+# .. _codec_selection:
+#
+# Codec Selection
+# ---------------
+#
+# By default, the codec used is selected automatically using the file extension provided
+# in the ``dest`` parameter for the :meth:`~torchcodec.encoders.VideoEncoder.to_file` method,
+# or using the ``format`` parameter for the
+# :meth:`~torchcodec.encoders.VideoEncoder.to_file_like` and
+# :meth:`~torchcodec.encoders.VideoEncoder.to_tensor` methods.
+#
+# For example, when encoding to MP4 format, the default codec is typically ``H.264``.
+#
+# To use a codec other than the default, use the ``codec`` parameter.
+# You can specify either a specific codec implementation (e.g., ``"libx264"``)
+# or a codec specification (e.g., ``"h264"``). Different codecs offer
+# different tradeoffs between quality, file size, and encoding speed.
+#
+# .. note::
+#
+#     To see available encoders on your system, run ``ffmpeg -encoders``.
+#
+# Let's encode the same frames using different codecs:
+
+import tempfile
+from pathlib import Path
+
+# H.264 encoding
+h264_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+encoder.to_file(h264_output, codec="libx264")
+
+# H.265 encoding
+hevc_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+encoder.to_file(hevc_output, codec="hevc")
+
+# Now let's use ffprobe to verify the codec used in the output files
+import subprocess
+
+for output, name in [(h264_output, "h264_output"), (hevc_output, "hevc_output")]:
+    result = subprocess.run(
+        [
+            "ffprobe",
+            "-v",
+            "error",
+            "-select_streams",
+            "v:0",
+            "-show_entries",
+            "stream=codec_name",
+            "-of",
+            "default=noprint_wrappers=1:nokey=1",
+            output,
+        ],
+        capture_output=True,
+        text=True,
+    )
+    print(f"Codec used in {name}: {result.stdout.strip()}")
+
+
+# %%
+# .. _pixel_format:
+#
+# Pixel Format
+# ------------
+#
+# The ``pixel_format`` parameter controls the color sampling (chroma subsampling)
+# of the output video. This affects both quality and file size.
+#
+# Common pixel formats:
+#
+# - ``"yuv420p"`` - 4:2:0 chroma subsampling (standard quality, smaller file size, widely compatible)
+# - ``"yuv444p"`` - 4:4:4 chroma subsampling (full chroma resolution, higher quality, larger file size)
+#
+# Most playback devices and platforms support ``yuv420p``, making it the most
+# common choice for video encoding.
+#
+# .. note::
+#
+#     Pixel format support depends on the codec used. Use ``ffmpeg -h encoder=<codec_name>``
+#     to check available options for your selected codec.
+
+# Standard pixel format
+yuv420_encoded_frames = encoder.to_tensor(
+    format="mp4", codec="libx264", pixel_format="yuv420p"
+)
+play_video(yuv420_encoded_frames)
+
+# %%
+# .. _crf:
+#
+# CRF (Constant Rate Factor)
+# --------------------------
+#
+# The ``crf`` parameter controls video quality, where lower values produce higher quality output.
+#
+# For example, with the commonly used H.264 codec, ``libx264``:
+#
+# - Values range from 0 (lossless) to 51 (worst quality)
+# - Values 17 or 18 are considered visually lossless, and the default is 23.
+#
+# .. note::
+#
+#     The range and interpretation of CRF values depend on the codec used, and
+#     not all codecs support CRF. Use ``ffmpeg -h encoder=<codec_name>`` to
+#     check available options for your selected codec.
+#
+
+# High quality (low CRF)
+high_quality_output = encoder.to_tensor(format="mp4", codec="libx264", crf=0)
+play_video(high_quality_output)
+
+# %%
+# Low quality (high CRF)
+low_quality_output = encoder.to_tensor(format="mp4", codec="libx264", crf=50)
+play_video(low_quality_output)
+
+
+# %%
+# .. _preset:
+#
+# Preset
+# ------
+#
+# The ``preset`` parameter controls the tradeoff between encoding speed and file compression.
+# Faster presets encode faster but produce larger files, while slower
+# presets take more time to encode but result in better compression.
+#
+# For example, with the commonly used H.264 codec, ``libx264`` presets include
+# ``"ultrafast"`` (fastest), ``"fast"``, ``"medium"`` (default), ``"slow"``, and
+# ``"veryslow"`` (slowest, best compression). See the
+# `H.264 Video Encoding Guide <https://trac.ffmpeg.org/wiki/Encode/H.264#a2.Chooseapresetandtune>`_
+# for additional details.
+#
+# .. note::
+#
+#     Not all codecs support the ``presets`` option. Use ``ffmpeg -h encoder=<codec_name>``
+#     to check available options for your selected codec.
+#
+
+# Fast encoding with a larger file size
+fast_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+encoder.to_file(fast_output, codec="libx264", preset="ultrafast")
+print(f"Size of fast encoded file: {Path(fast_output).stat().st_size} bytes")
+
+# Slow encoding for a smaller file size
+slow_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+encoder.to_file(slow_output, codec="libx264", preset="veryslow")
+print(f"Size of slow encoded file: {Path(slow_output).stat().st_size} bytes")
+
+# %%
+# .. _extra_options:
+#
+# Extra Options
+# -------------
+#
+# The ``extra_options`` parameter accepts a dictionary of codec-specific options
+# that would normally be set via FFmpeg command-line arguments. This enables
+# control of encoding settings beyond the common parameters.
+#
+# For example, some potential extra options for the commonly used H.264 codec, ``libx264`` include:
+#
+# - ``"g"`` - GOP (Group of Pictures) size / keyframe interval
+# - ``"max_b_frames"`` - Maximum number of B-frames between I and P frames
+# - ``"tune"`` - Tuning preset (e.g., ``"film"``, ``"animation"``, ``"grain"``)
+#
+# .. note::
+#
+#     Use ``ffmpeg -h encoder=<codec_name>`` to see all available options for
+#     a specific codec.
+#
+
+
+custom_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+encoder.to_file(
+    custom_output,
+    codec="libx264",
+    extra_options={
+        "g": 50,                # Keyframe every 50 frames
+        "max_b_frames": 0,      # Disable B-frames for faster decoding
+        "tune": "fastdecode",   # Optimize for fast decoding
+    }
+)
+
+# %%
@@ -662,7 +662,7 @@ VideoEncoder::~VideoEncoder() {
 
 VideoEncoder::VideoEncoder(
     const torch::Tensor& frames,
-    int frameRate,
+    double frameRate,
     std::string_view fileName,
     const VideoStreamOptions& videoStreamOptions)
     : frames_(validateFrames(frames)), inFrameRate_(frameRate) {
@@ -694,7 +694,7 @@ VideoEncoder::VideoEncoder(
 
 VideoEncoder::VideoEncoder(
     const torch::Tensor& frames,
-    int frameRate,
+    double frameRate,
     std::string_view formatName,
     std::unique_ptr<AVIOContextHolder> avioContextHolder,
     const VideoStreamOptions& videoStreamOptions)
@@ -787,9 +787,9 @@ void VideoEncoder::initializeEncoder(
   avCodecContext_->width = outWidth_;
   avCodecContext_->height = outHeight_;
   avCodecContext_->pix_fmt = outPixelFormat_;
-  // TODO-VideoEncoder: Verify that frame_rate and time_base are correct
-  avCodecContext_->time_base = {1, inFrameRate_};
-  avCodecContext_->framerate = {inFrameRate_, 1};
+  // TODO-VideoEncoder: Add and utilize output frame_rate option
+  avCodecContext_->framerate = av_d2q(inFrameRate_, INT_MAX);
+  avCodecContext_->time_base = av_inv_q(avCodecContext_->framerate);
 
   // Set flag for containers that require extradata to be in the codec context
   if (avFormatContext_->oformat->flags & AVFMT_GLOBALHEADER) {
@@ -833,6 +833,10 @@ void VideoEncoder::initializeEncoder(
 
   // Set the stream time base to encode correct frame timestamps
   avStream_->time_base = avCodecContext_->time_base;
+  // Set the stream frame rate to store correct frame durations for some
+  // containers (webm, mkv)
+  avStream_->r_frame_rate = avCodecContext_->framerate;
+
   status = avcodec_parameters_from_context(
       avStream_->codecpar, avCodecContext_.get());
   TORCH_CHECK(
 
@@ -143,13 +143,13 @@ class VideoEncoder {
 
   VideoEncoder(
       const torch::Tensor& frames,
-      int frameRate,
+      double frameRate,
       std::string_view fileName,
       const VideoStreamOptions& videoStreamOptions);
 
   VideoEncoder(
       const torch::Tensor& frames,
-      int frameRate,
+      double frameRate,
       std::string_view formatName,
       std::unique_ptr<AVIOContextHolder> avioContextHolder,
       const VideoStreamOptions& videoStreamOptions);
@@ -172,7 +172,7 @@ class VideoEncoder {
   UniqueSwsContext swsContext_;
 
   const torch::Tensor frames_;
-  int inFrameRate_;
+  double inFrameRate_;
 
   int inWidth_ = -1;
   int inHeight_ = -1;
 
@@ -1039,7 +1039,7 @@ AudioFramesOutput SingleStreamDecoder::getFramesPlayedInRangeAudio(
         firstFramePtsSeconds = frameOutput.ptsSeconds;
       }
       frames.push_back(frameOutput.data);
-    } catch (const EndOfFileException& e) {
+    } catch (const EndOfFileException&) {
       finished = true;
     }
Original file line number	Diff line number	Diff line change
@@ -16,3 +16,4 @@ For an audio decoder tutorial, see: :ref:`sphx_glr_generated_examples_encoding_a
`16`	`16`	`:template: class.rst`
`17`	`17`
`18`	`18`	`AudioEncoder`
	`19`	`+ VideoEncoder`
Original file line number	Diff line number	Diff line change
`@@ -87,6 +87,7 @@ def __call__(self, filename):`
`87`	`87`	`assert "examples/encoding" in self.src_dir`
`88`	`88`	`order = [`
`89`	`89`	`"audio_encoding.py",`
	`90`	`+ "video_encoding.py",`
`90`	`91`	`]`
`91`	`92`
`92`	`93`	`try:`
Original file line number	Diff line number	Diff line change
`@@ -1039,7 +1039,7 @@ AudioFramesOutput SingleStreamDecoder::getFramesPlayedInRangeAudio(`
`1039`	`1039`	`firstFramePtsSeconds = frameOutput.ptsSeconds;`
`1040`	`1040`	`}`
`1041`	`1041`	`frames.push_back(frameOutput.data);`
`1042`		`- } catch (const EndOfFileException& e) {`
	`1042`	`+ } catch (const EndOfFileException&) {`
`1043`	`1043`	`finished = true;`
`1044`	`1044`	`}`
`1045`	`1045`