Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def __call__(self, filename):
assert "examples/encoding" in self.src_dir
order = [
"audio_encoding.py",
"video_encoding.py",
]

try:
Expand Down
262 changes: 262 additions & 0 deletions examples/encoding/video_encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
=======================================
Encoding video frames with VideoEncoder
=======================================
In this example, we'll learn how to encode video frames to a file or to raw
bytes using the :class:`~torchcodec.encoders.VideoEncoder` class.
"""

# %%
# First, we'll download a video and decode some frames to tensors.
# These will be the input to the VideoEncoder. For more details on decoding,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sure to use :class:`~torchcodec.encoders.VideoEncoder` everywhere.

It should link to the docstring page. Right now it doesn't, because you need to add it to https://github.com/meta-pytorch/torchcodec/blob/main/docs/source/api_ref_encoders.rst?plain=1

# see :ref:`sphx_glr_generated_examples_decoding_basic_example.py`.
# Otherwise, skip ahead to :ref:`creating_encoder`.

import requests
from torchcodec.decoders import VideoDecoder
from IPython.display import Video


def play_video(encoded_bytes):
return Video(
data=encoded_bytes.numpy().tobytes(),
embed=True,
width=640,
height=360,
mimetype="video/mp4",
)


# Video source: https://www.pexels.com/video/adorable-cats-on-the-lawn-4977395/
# License: CC0. Author: Altaf Shah.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can still use it, but I don't see the license being explicitly CC0.

Suggested change
# License: CC0. Author: Altaf Shah.
# Author: Altaf Shah.

url = "https://videos.pexels.com/video-files/4977395/4977395-hd_1920_1080_24fps.mp4"

response = requests.get(url, headers={"User-Agent": ""})
if response.status_code != 200:
raise RuntimeError(f"Failed to download video. {response.status_code = }.")

raw_video_bytes = response.content

decoder = VideoDecoder(raw_video_bytes)
frames = decoder[:60] # Get first 60 frames
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use get_frames_in_range instead, it's more efficient, and we want users to use the most efficient decoding methods.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Slicing actually calls get_frames_in_range():

def _getitem_slice(self, key: slice) -> Tensor:
assert isinstance(key, slice)
start, stop, step = key.indices(len(self))
frame_data, *_ = core.get_frames_in_range(
self._decoder,
start=start,
stop=stop,
step=step,
)
return frame_data

# TODO: use float once other PR lands
frame_rate = int(decoder.metadata.average_fps)

# %%
# .. _creating_encoder:
#
# Creating an encoder
# -------------------
#
# Let's instantiate a :class:`~torchcodec.encoders.VideoEncoder`. We will need to provide
# the frames to be encoded as a 4D tensor of shape
# ``(num_frames, num_channels, height, width)`` with values in the ``[0, 255]``
# range and ``torch.uint8`` dtype. We will also need to provide the frame rate of the input
# video.
#
# .. note::
#
# The ``frame_rate`` parameter corresponds to the frame rate of the
# *input* video. It will also be used for the frame rate of the *output* encoded video.
from torchcodec.encoders import VideoEncoder

print(f"{frames.shape = }, {frames.dtype = }")
print(f"{frame_rate = } fps")

encoder = VideoEncoder(frames=frames, frame_rate=frame_rate)

# %%
# Encoding to file, bytes, or file-like
# -------------------------------------
#
# :class:`~torchcodec.encoders.VideoEncoder` supports encoding frames into a
# file via the :meth:`~torchcodec.encoders.VideoEncoder.to_file` method, to
# file-like objects via the :meth:`~torchcodec.encoders.VideoEncoder.to_filelike`
Copy link
Contributor

@scotts scotts Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# file-like objects via the :meth:`~torchcodec.encoders.VideoEncoder.to_filelike`
# file-like objects via the :meth:`~torchcodec.encoders.VideoEncoder.to_file_like`

# method, or to raw bytes via :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`.
# For now we will use :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`, so we
# can easily inspect and display the encoded video.

encoded_frames = encoder.to_tensor(format="mp4")
play_video(encoded_frames)

# %%
#
# Now that we have encoded data, we can decode it back to verify the
# round-trip encode/decode process works as expected:

decoder_verify = VideoDecoder(encoded_frames)
decoded_frames = decoder_verify[:]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here


print(f"Re-decoded video: {decoded_frames.shape = }")
print(f"Original frames: {frames.shape = }")

Copy link
Contributor

@scotts scotts Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is an excellent place to explain that the format parameter selects the default codec - we can also briefly explain the difference between, say, an mp4 video file and the actual codec used to decode and encode the video streams in that file. If this is well explained in any externall FFmpeg docs, we can link to those as well.

That then sets us up for the next section, as the natural next question a reader may have is, what if I don't want the default codec?

At the end of the "Codec Selection" section, we should give some guidance on when to just use format and when to specify codec as well. Nothing elaborate, just a sentence or two. I think that will go a long way to informing our about the relationship between these two options.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestions, I added some brief guidance on codec vs format at the end.

I drafted text to explain the difference between container-format and codec, but I am worried it dilutes the "Codec Selection" section with text that is not specific to the API. I would be happy to add a link, but I was not able to find useful FFmpeg docs on this subject.

# %%
# Codec Selection
# ---------------
#
# The ``codec`` parameter specifies which video codec to use for encoding.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could start this section by indicating that by default, the codec is selected automatically based on the container format, for example "mp4" tends to default to h264 (I think? Please check me on this)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@NicolasHug, made a similar comment to what I did above. :) Doing it here or in the previous section are both great to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added an intro here explaining the default behavior, this way all codec related text is under the same header. I added the mp4 -> h264 example, as it is often the case in my experience.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this works well. My one follow-up suggestion is to connect the sentence about codec selection with the default. Something like, "If you want a codec other than the default, use the codec parameter." Followed by the explanation of what it is.

# You can specify either a specific codec implementation (e.g., ``"libx264"``)
# or a codec specification (e.g., ``"h264"``). Different codecs offer
# different tradeoffs between quality, file size, and encoding speed.
#
# .. note::
#
# To see available encoders on your system, run ``ffmpeg -encoders``.
#
# Let's encode the same frames using different codecs:

# H.264 encoding
h264_output = "libx264_encoded.mp4"
encoder.to_file(h264_output, codec="libx264")

# H.265 encoding
hevc_output = "hevc_encoded.mp4"
encoder.to_file(hevc_output, codec="hevc")

# Now let's use ffprobe to verify the codec used in the output files
import subprocess

for output in [h264_output, hevc_output]:
result = subprocess.run(
[
"ffprobe",
"-v",
"error",
"-select_streams",
"v:0",
"-show_entries",
"stream=codec_name",
"-of",
"default=noprint_wrappers=1:nokey=1",
output,
],
capture_output=True,
text=True,
)
print(f"Codec used in {output}: {result.stdout.strip()}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great section above. The only issue is that it pollutes the codespace with libx264_encoded.mp4 and hevc_encoded.mp4. Let's use temporary files instead, with e.g. https://docs.python.org/3/library/tempfile.html


# %%
# Pixel Format
# ------------
#
# The ``pixel_format`` parameter controls the color sampling (chroma subsampling)
# of the output video. This affects both quality and file size.
#
# Common pixel formats:
#
# - ``"yuv420p"`` - 4:2:0 chroma subsampling (standard quality, smaller file size, widely compatible)
# - ``"yuv444p"`` - 4:4:4 chroma subsampling (full chroma resolution, higher quality, larger file size)
#
# Most playback devices and platforms support ``yuv420p``, making it the most
# common choice for video encoding.
#
# .. note::
#
# Pixel format support depends on the codec used. Use ``ffmpeg -h encoder=<codec_name>``
# to check available options for your selected codec.

# Standard pixel format
yuv420_encoded_frames = encoder.to_tensor(
format="mp4", codec="libx264", pixel_format="yuv420p"
)
play_video(yuv420_encoded_frames)

# %%
# CRF (Constant Rate Factor)
# --------------------------
#
# The ``crf`` parameter controls video quality, where lower values produce higher quality output.
#
# For example, with the commonly used H.264 codec, ``libx264``:
#
# - Values range from 0 (lossless) to 51 (worst quality)
# - Values 17 or 18 are conisdered visually lossless, and the default is 23.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# - Values 17 or 18 are conisdered visually lossless, and the default is 23.
# - Values 17 or 18 are considered visually lossless, and the default is 23.

#
# .. note::
#
# The range and interpretation of CRF values depend on the codec used, and
# not all codecs support CRF. Use ``ffmpeg -h encoder=<codec_name>`` to
# check available options for your selected codec.
#

# High quality (low CRF)
high_quality_output = encoder.to_tensor(format="mp4", codec="libx264", crf=0)
play_video(high_quality_output)

# %%
# Low quality (high CRF)
low_quality_output = encoder.to_tensor(format="mp4", codec="libx264", crf=50)
play_video(low_quality_output)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well done, it's really cool to visually see the effect it has on quality!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, I came here to say the same thing. :)



# %%
# Preset
# ------
#
# The ``preset`` parameter controls the tradeoff between encoding speed and file compression.
# Faster presets encode faster but produce larger files, while slower
# presets take more time to encode but result in better compression.
#
# For example, with the commonly used H.264 codec, ``libx264`` presets include:
#
# - ``"ultrafast"`` (fastest), ``"fast"``, ``"medium"`` (default), ``"slow"``, ``"veryslow"`` (slowest, best compression).
#
# .. note::
#
# Not all codecs support the ``presets`` option. Use ``ffmpeg -h encoder=<codec_name>``
# to check available options for your selected codec.
#

import os
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

# Fast encoding with a larger file size
fast_output = "fast_encoded.mp4"
encoder.to_file(fast_output, codec="libx264", preset="ultrafast")
print(f"Size of fast encoded file: {os.path.getsize(fast_output)} bytes")

# Slow encoding for a smaller file size
slow_output = "slow_encoded.mp4"
encoder.to_file(slow_output, codec="libx264", preset="veryslow")
print(f"Size of slow encoded file: {os.path.getsize(slow_output)} bytes")

# %%
# Extra Options
# -------------
#
# The ``extra_options`` parameter accepts a dictionary of codec-specific options
# that would normally be set via FFmpeg command-line arguments. This enables
# control of encoding settings beyond the common parameters.
#
# For example, some potential extra options for the commonly used H.264 codec, ``libx264`` include:
# For example, with , ``libx264``:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like the second "For example" is from before adding the first.

#
# - ``"g"`` - GOP (Group of Pictures) size / keyframe interval
# - ``"max_b_frames"`` - Maximum number of B-frames between I and P frames
# - ``"tune"`` - Tuning preset (e.g., ``"film"``, ``"animation"``, ``"grain"``)
#
# .. note::
#
# Use ``ffmpeg -h encoder=<codec_name>`` to see all available options for
# a specific codec.
#


# Custom GOP size and tuning
custom_output = "custom_encoded.mp4"
encoder.to_file(
custom_output,
codec="libx264",
extra_options={
"g": 50, # Keyframe every 50 frames
"max_b_frames": 0, # Disable B-frames for faster decoding
"tune": "fastdecode", # Optimize for fast decoding
}
)

# %%
Loading