Skip to content

Commit 70b5976

Browse files
committed
Better importing, better docs
1 parent 105c77f commit 70b5976

File tree

4 files changed

+30
-23
lines changed

4 files changed

+30
-23
lines changed

docs/source/api_ref_transforms.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
.. _samplers:
1+
.. _transforms:
22

3-
===================
3+
=====================
44
torchcodec.transforms
5-
===================
5+
=====================
66

77
.. currentmodule:: torchcodec.transforms
88

docs/source/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ def __call__(self, filename):
209209
intersphinx_mapping = {
210210
"python": ("https://docs.python.org/3/", None),
211211
"torch": ("https://pytorch.org/docs/stable/", None),
212+
"torchvision": ("https://docs.pytorch.org/vision/stable/", None),
212213
"numpy": ("https://numpy.org/doc/stable/", None),
213214
"PIL": ("https://pillow.readthedocs.io/en/stable/", None),
214215
"matplotlib": ("https://matplotlib.org/stable/", None),

src/torchcodec/decoders/_video_decoder.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,8 @@ class VideoDecoder:
7070
transforms (sequence of transform objects, optional): Sequence of transforms to be
7171
applied to the decoded frames by the decoder itself, in order. Accepts both
7272
:class:`~torchcodec.transforms.DecoderTransform` and
73-
`torchvision.transforms.v2.Transform <https://docs.pytorch.org/vision/stable/transforms.html#v2-api-reference-recommended>`_
74-
objects. All transforms are applied
75-
in the ouput pixel format and colorspace. Read more about this parameter in:
76-
TODO_DECODER_TRANSFORMS_TUTORIAL.
73+
:class:`~torchvision.transforms.v2.Transform`
74+
objects. Read more about this parameter in: TODO_DECODER_TRANSFORMS_TUTORIAL.
7775
custom_frame_mappings (str, bytes, or file-like object, optional):
7876
Mapping of frames to their metadata, typically generated via ffprobe.
7977
This enables accurate frame seeking without requiring a full video scan.
@@ -477,7 +475,7 @@ def _convert_to_decoder_transforms(
477475
" DecoderTransform. TorchCodec also accept TorchVision "
478476
"v2 transforms, but TorchVision is not installed."
479477
)
480-
if isinstance(transform, v2.Resize):
478+
elif isinstance(transform, v2.Resize):
481479
converted_transforms.append(Resize._from_torchvision(transform))
482480
else:
483481
raise ValueError(
@@ -513,7 +511,7 @@ def _make_transform_specs(
513511
return ""
514512

515513
transforms = _convert_to_decoder_transforms(transforms)
516-
return ";".join([t._make_params() for t in transforms])
514+
return ";".join([t._make_transform_spec() for t in transforms])
517515

518516

519517
def _read_custom_frame_mappings(

src/torchcodec/transforms/_decoder_transforms.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from abc import ABC, abstractmethod
88
from dataclasses import dataclass
9+
from types import ModuleType
910
from typing import Sequence
1011

1112
from torch import nn
@@ -20,34 +21,41 @@ class DecoderTransform(ABC):
2021
should be both faster and more memory efficient than receiving normally
2122
decoded frames and applying the same kind of transform.
2223
23-
Most `DecoderTransform` objects have a complementary transform in TorchVision,
24-
specificially in
25-
`torchvision.transforms.v2 <https://docs.pytorch.org/vision/stable/transforms.html#v2-api-reference-recommended>`_.
26-
For such transforms, we ensure that:
24+
Most ``DecoderTransform`` objects have a complementary transform in TorchVision,
25+
specificially in `torchvision.transforms.v2 <https://docs.pytorch.org/vision/stable/transforms.html>`_. For such transforms, we
26+
ensure that:
2727
2828
1. The names are the same.
2929
2. Default behaviors are the same.
30-
3. The parameters for the `DecoderTransform` object are a subset of the
31-
TorchVision transform object.
30+
3. The parameters for the ``DecoderTransform`` object are a subset of the
31+
TorchVision :class:`~torchvision.transforms.v2.Transform` object.
3232
4. Parameters with the same name control the same behavior and accept a
3333
subset of the same types.
3434
5. The difference between the frames returned by a decoder transform and
35-
the complementary TorchVision transform are small.
36-
37-
All decoder transforms are applied in the output pixel format and colorspace.
35+
the complementary TorchVision transform are such that a model should
36+
not be able to tell the difference.
3837
"""
3938

4039
@abstractmethod
41-
def _make_params(self) -> str:
40+
def _make_transform_spec(self) -> str:
4241
pass
4342

4443

44+
def import_torchvision_transforms_v2() -> ModuleType:
45+
try:
46+
from torchvision.transforms import v2
47+
except ImportError as e:
48+
raise RuntimeError(
49+
"Cannot import TorchVision; this should never happen, please report a bug."
50+
) from e
51+
return v2
52+
53+
4554
@dataclass
4655
class Resize(DecoderTransform):
4756
"""Resize the decoded frame to a given size.
4857
49-
Complementary TorchVision transform:
50-
`torchvision.transforms.v2.Resize <https://docs.pytorch.org/vision/stable/generated/torchvision.transforms.v2.Resize.html#torchvision.transforms.v2.Resize>`_.
58+
Complementary TorchVision transform: :class:`~torchvision.transforms.v2.Resize`.
5159
Interpolation is always bilinear. Anti-aliasing is always on.
5260
5361
Args:
@@ -57,13 +65,13 @@ class Resize(DecoderTransform):
5765

5866
size: Sequence[int]
5967

60-
def _make_params(self) -> str:
68+
def _make_transform_spec(self) -> str:
6169
assert len(self.size) == 2
6270
return f"resize, {self.size[0]}, {self.size[1]}"
6371

6472
@classmethod
6573
def _from_torchvision(cls, resize_tv: nn.Module):
66-
from torchvision.transforms import v2
74+
v2 = import_torchvision_transforms_v2()
6775

6876
assert isinstance(resize_tv, v2.Resize)
6977

0 commit comments

Comments
 (0)