Skip to content

Commit 2cd3f65

Browse files
committed
Changes
1 parent a2da767 commit 2cd3f65

File tree

2 files changed

+14
-12
lines changed

2 files changed

+14
-12
lines changed

src/torchcodec/decoders/_video_decoder.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from typing import List, Literal, Optional, Sequence, Tuple, Union
1212

1313
import torch
14-
from torch import device as torch_device, Tensor
14+
from torch import device as torch_device, nn, Tensor
1515

1616
from torchcodec import _core as core, Frame, FrameBatch
1717
from torchcodec.decoders._decoder_utils import (
@@ -69,8 +69,10 @@ class VideoDecoder:
6969
:ref:`sphx_glr_generated_examples_decoding_approximate_mode.py`
7070
transforms (sequence of transform objects, optional): Sequence of transforms to be
7171
applied to the decoded frames by the decoder itself, in order. Accepts both
72-
``torchcodec.transforms.DecoderTransform`` and ``torchvision.transforms.v2.Transform``
73-
objects. All transforms are applied in the ouput pixel format and colorspace.
72+
:class:`torchcodec.transforms.DecoderTransform` and
73+
:class:`torchvision.transforms.v2.Transform` objects. All transforms are applied
74+
in the ouput pixel format and colorspace. Read more about this parameter in:
75+
SCOTT_NEEDS_TO_WRITE_A_TUTORIAL.
7476
custom_frame_mappings (str, bytes, or file-like object, optional):
7577
Mapping of frames to their metadata, typically generated via ffprobe.
7678
This enables accurate frame seeking without requiring a full video scan.
@@ -109,7 +111,7 @@ def __init__(
109111
num_ffmpeg_threads: int = 1,
110112
device: Optional[Union[str, torch_device]] = "cpu",
111113
seek_mode: Literal["exact", "approximate"] = "exact",
112-
transforms: Optional[Sequence[DecoderTransform]] = None,
114+
transforms: Optional[Sequence[Union[DecoderTransform, nn.Module]]] = None,
113115
custom_frame_mappings: Optional[
114116
Union[str, bytes, io.RawIOBase, io.BufferedReader]
115117
] = None,
@@ -442,7 +444,7 @@ def _get_and_validate_stream_metadata(
442444

443445

444446
def _convert_to_decoder_native_transforms(
445-
transforms: Sequence[DecoderTransform],
447+
transforms: Sequence[Union[DecoderTransform, nn.Module]],
446448
) -> List[DecoderTransform]:
447449
"""Convert a sequence of transforms that may contain TorchVision transform
448450
objects into a list of only TorchCodec transform objects.
@@ -494,7 +496,7 @@ def _convert_to_decoder_native_transforms(
494496

495497

496498
def _make_transform_specs(
497-
transforms: Optional[Sequence[DecoderTransform]],
499+
transforms: Optional[Sequence[Union[DecoderTransform, nn.Module]]],
498500
) -> str:
499501
"""Given a sequence of transforms, turn those into the specification string
500502
the core API expects.

src/torchcodec/transforms/_decoder_transforms.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,21 @@ class DecoderTransform(ABC):
1414
"""Base class for all decoder transforms.
1515
1616
A DecoderTransform is a transform that is applied by the decoder before
17-
returning the decoded frame. The implementation does not live in TorchCodec
18-
itself, but in the underyling decoder. Applying DecoderTransforms to frames
17+
returning the decoded frame. Applying DecoderTransforms to frames
1918
should be both faster and more memory efficient than receiving normally
2019
decoded frames and applying the same kind of transform.
2120
2221
Most DecoderTransforms have a complementary transform in TorchVision,
2322
specificially in torchvision.transforms.v2. For such transforms, we ensure
2423
that:
2524
26-
1. Default behaviors are the same.
27-
2. The parameters for the DecoderTransform are a subset of the
25+
1. The names are the same.
26+
2. Default behaviors are the same.
27+
3. The parameters for the DecoderTransform are a subset of the
2828
TorchVision transform.
29-
3. Parameters with the same name control the same behavior and accept a
29+
4. Parameters with the same name control the same behavior and accept a
3030
subset of the same types.
31-
4. The difference between the frames returned by a DecoderTransform and
31+
5. The difference between the frames returned by a DecoderTransform and
3232
the complementary TorchVision transform are small.
3333
3434
All DecoderTranforms are applied in the output pixel format and colorspace.

0 commit comments

Comments
 (0)