Skip to content

Commit b943504

Browse files
authored
Merge branch 'main' into codec_select_encode_option
2 parents 8b48e18 + 0535b00 commit b943504

File tree

16 files changed

+421
-95
lines changed

16 files changed

+421
-95
lines changed

.github/workflows/lint.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
run: python -m pip install --upgrade pip
6363
- name: Install dependencies and FFmpeg
6464
run: |
65-
python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
65+
python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
6666
conda install "ffmpeg=7.0.1" pkg-config pybind11 -c conda-forge
6767
ffmpeg -version
6868
- name: Build and install torchcodec

docs/source/api_ref_transforms.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
.. _transforms:
2+
3+
=====================
4+
torchcodec.transforms
5+
=====================
6+
7+
.. currentmodule:: torchcodec.transforms
8+
9+
For a tutorial, see: TODO_DECODER_TRANSFORMS_TUTORIAL.
10+
11+
.. autosummary::
12+
:toctree: generated/
13+
:nosignatures:
14+
:template: dataclass.rst
15+
16+
DecoderTransform
17+
Resize

docs/source/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ def __call__(self, filename):
209209
intersphinx_mapping = {
210210
"python": ("https://docs.python.org/3/", None),
211211
"torch": ("https://pytorch.org/docs/stable/", None),
212+
"torchvision": ("https://docs.pytorch.org/vision/stable/", None),
212213
"numpy": ("https://numpy.org/doc/stable/", None),
213214
"PIL": ("https://pillow.readthedocs.io/en/stable/", None),
214215
"matplotlib": ("https://matplotlib.org/stable/", None),

docs/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,4 @@ Encoding
125125
api_ref_decoders
126126
api_ref_encoders
127127
api_ref_samplers
128+
api_ref_transforms

mypy.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ files = src/torchcodec
44
show_error_codes = True
55
pretty = True
66
allow_redefinition = True
7+
follow_untyped_imports = True

src/torchcodec/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# Note: usort wants to put Frame and FrameBatch after decoders and samplers,
1010
# but that results in circular import.
1111
from ._frame import AudioSamples, Frame, FrameBatch # usort:skip # noqa
12-
from . import decoders, encoders, samplers # noqa
12+
from . import decoders, encoders, samplers, transforms # noqa
1313

1414
try:
1515
# Note that version.py is generated during install.

src/torchcodec/_core/Encoder.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -766,6 +766,10 @@ void VideoEncoder::initializeEncoder(
766766
std::to_string(videoStreamOptions.crf.value()).c_str(),
767767
0);
768768
}
769+
if (videoStreamOptions.preset.has_value()) {
770+
av_dict_set(
771+
&options, "preset", videoStreamOptions.preset.value().c_str(), 0);
772+
}
769773
int status = avcodec_open2(avCodecContext_.get(), avCodec, &options);
770774
av_dict_free(&options);
771775

src/torchcodec/_core/StreamOptions.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,11 @@ struct VideoStreamOptions {
4646

4747
// Encoding options
4848
std::optional<std::string> codec;
49-
// TODO-VideoEncoder: Consider adding other optional fields here
50-
// (bit rate, gop size, max b frames, preset)
51-
std::optional<double> crf;
52-
5349
// Optional pixel format for video encoding (e.g., "yuv420p", "yuv444p")
5450
// If not specified, uses codec's default format.
5551
std::optional<std::string> pixelFormat;
52+
std::optional<double> crf;
53+
std::optional<std::string> preset;
5654
};
5755

5856
struct AudioStreamOptions {

src/torchcodec/_core/custom_ops.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
3737
m.def(
3838
"_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
3939
m.def(
40-
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None) -> ()");
40+
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
4141
m.def(
42-
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None) -> Tensor");
42+
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None) -> Tensor");
4343
m.def(
44-
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None) -> ()");
44+
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
4545
m.def(
4646
"create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
4747
m.def(
@@ -604,12 +604,14 @@ void encode_video_to_file(
604604
int64_t frame_rate,
605605
std::string_view file_name,
606606
std::optional<std::string> codec = std::nullopt,
607-
std::optional<std::string> pixel_format = std::nullopt,
608-
std::optional<double> crf = std::nullopt) {
607+
std::optional<std::string_view> pixel_format = std::nullopt,
608+
std::optional<double> crf = std::nullopt,
609+
std::optional<std::string_view> preset = std::nullopt) {
609610
VideoStreamOptions videoStreamOptions;
610611
videoStreamOptions.codec = codec;
611612
videoStreamOptions.pixelFormat = pixel_format;
612613
videoStreamOptions.crf = crf;
614+
videoStreamOptions.preset = preset;
613615
VideoEncoder(
614616
frames,
615617
validateInt64ToInt(frame_rate, "frame_rate"),
@@ -623,13 +625,15 @@ at::Tensor encode_video_to_tensor(
623625
int64_t frame_rate,
624626
std::string_view format,
625627
std::optional<std::string> codec = std::nullopt,
626-
std::optional<std::string> pixel_format = std::nullopt,
627-
std::optional<double> crf = std::nullopt) {
628+
std::optional<std::string_view> pixel_format = std::nullopt,
629+
std::optional<double> crf = std::nullopt,
630+
std::optional<std::string_view> preset = std::nullopt) {
628631
auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
629632
VideoStreamOptions videoStreamOptions;
630633
videoStreamOptions.codec = codec;
631634
videoStreamOptions.pixelFormat = pixel_format;
632635
videoStreamOptions.crf = crf;
636+
videoStreamOptions.preset = preset;
633637
return VideoEncoder(
634638
frames,
635639
validateInt64ToInt(frame_rate, "frame_rate"),
@@ -645,8 +649,9 @@ void _encode_video_to_file_like(
645649
std::string_view format,
646650
int64_t file_like_context,
647651
std::optional<std::string> codec = std::nullopt,
648-
std::optional<std::string> pixel_format = std::nullopt,
649-
std::optional<double> crf = std::nullopt) {
652+
std::optional<std::string_view> pixel_format = std::nullopt,
653+
std::optional<double> crf = std::nullopt,
654+
std::optional<std::string_view> preset = std::nullopt) {
650655
auto fileLikeContext =
651656
reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
652657
TORCH_CHECK(
@@ -657,6 +662,7 @@ void _encode_video_to_file_like(
657662
videoStreamOptions.codec = codec;
658663
videoStreamOptions.pixelFormat = pixel_format;
659664
videoStreamOptions.crf = crf;
665+
videoStreamOptions.preset = preset;
660666

661667
VideoEncoder encoder(
662668
frames,

src/torchcodec/_core/ops.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ def encode_video_to_file_like(
216216
codec: Optional[str] = None,
217217
pixel_format: Optional[str] = None,
218218
crf: Optional[Union[int, float]] = None,
219+
preset: Optional[str] = None,
219220
) -> None:
220221
"""Encode video frames to a file-like object.
221222
@@ -227,6 +228,7 @@ def encode_video_to_file_like(
227228
codec: Optional codec name (e.g., "libx264", "h264")
228229
pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p")
229230
crf: Optional constant rate factor for encoding quality
231+
preset: Optional encoder preset as string (e.g., "ultrafast", "medium")
230232
"""
231233
assert _pybind_ops is not None
232234

@@ -238,6 +240,7 @@ def encode_video_to_file_like(
238240
codec,
239241
pixel_format,
240242
crf,
243+
preset,
241244
)
242245

243246

@@ -328,6 +331,7 @@ def encode_video_to_file_abstract(
328331
codec: Optional[str],
329332
pixel_format: Optional[str] = None,
330333
crf: Optional[Union[int, float]] = None,
334+
preset: Optional[str] = None,
331335
) -> None:
332336
return
333337

@@ -340,6 +344,7 @@ def encode_video_to_tensor_abstract(
340344
codec: Optional[str],
341345
pixel_format: Optional[str] = None,
342346
crf: Optional[Union[int, float]] = None,
347+
preset: Optional[str] = None,
343348
) -> torch.Tensor:
344349
return torch.empty([], dtype=torch.long)
345350

@@ -350,9 +355,10 @@ def _encode_video_to_file_like_abstract(
350355
frame_rate: int,
351356
format: str,
352357
file_like_context: int,
353-
codec: Optional[str] = None,
358+
codec: Optional[str],
354359
pixel_format: Optional[str] = None,
355360
crf: Optional[Union[int, float]] = None,
361+
preset: Optional[str] = None,
356362
) -> None:
357363
return
358364

0 commit comments

Comments
 (0)