Skip to content

Commit 9eee77f

Browse files
committed
add preset option, update ffmpeg_cli test
1 parent b7e52fb commit 9eee77f

File tree

6 files changed

+87
-26
lines changed

6 files changed

+87
-26
lines changed

src/torchcodec/_core/Encoder.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,10 @@ void VideoEncoder::initializeEncoder(
745745
std::to_string(videoStreamOptions.crf.value()).c_str(),
746746
0);
747747
}
748+
if (videoStreamOptions.preset.has_value()) {
749+
av_dict_set(
750+
&options, "preset", videoStreamOptions.preset.value().c_str(), 0);
751+
}
748752
int status = avcodec_open2(avCodecContext_.get(), avCodec, &options);
749753
av_dict_free(&options);
750754

src/torchcodec/_core/StreamOptions.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,11 @@ struct VideoStreamOptions {
4545
std::string_view deviceVariant = "ffmpeg";
4646

4747
// Encoding options
48-
// TODO-VideoEncoder: Consider adding other optional fields here
49-
// (bit rate, gop size, max b frames, preset)
50-
std::optional<double> crf;
51-
5248
// Optional pixel format for video encoding (e.g., "yuv420p", "yuv444p")
5349
// If not specified, uses codec's default format.
5450
std::optional<std::string> pixelFormat;
51+
std::optional<double> crf;
52+
std::optional<std::string> preset;
5553
};
5654

5755
struct AudioStreamOptions {

src/torchcodec/_core/custom_ops.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
3737
m.def(
3838
"_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
3939
m.def(
40-
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None) -> ()");
40+
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
4141
m.def(
42-
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None) -> Tensor");
42+
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None, str? preset=None) -> Tensor");
4343
m.def(
44-
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None) -> ()");
44+
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
4545
m.def(
4646
"create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
4747
m.def(
@@ -603,11 +603,13 @@ void encode_video_to_file(
603603
const at::Tensor& frames,
604604
int64_t frame_rate,
605605
std::string_view file_name,
606-
std::optional<std::string> pixel_format = std::nullopt,
607-
std::optional<double> crf = std::nullopt) {
606+
std::optional<std::string_view> pixel_format = std::nullopt,
607+
std::optional<double> crf = std::nullopt,
608+
std::optional<std::string_view> preset = std::nullopt) {
608609
VideoStreamOptions videoStreamOptions;
609610
videoStreamOptions.pixelFormat = pixel_format;
610611
videoStreamOptions.crf = crf;
612+
videoStreamOptions.preset = preset;
611613
VideoEncoder(
612614
frames,
613615
validateInt64ToInt(frame_rate, "frame_rate"),
@@ -620,12 +622,14 @@ at::Tensor encode_video_to_tensor(
620622
const at::Tensor& frames,
621623
int64_t frame_rate,
622624
std::string_view format,
623-
std::optional<std::string> pixel_format = std::nullopt,
624-
std::optional<double> crf = std::nullopt) {
625+
std::optional<std::string_view> pixel_format = std::nullopt,
626+
std::optional<double> crf = std::nullopt,
627+
std::optional<std::string_view> preset = std::nullopt) {
625628
auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
626629
VideoStreamOptions videoStreamOptions;
627630
videoStreamOptions.pixelFormat = pixel_format;
628631
videoStreamOptions.crf = crf;
632+
videoStreamOptions.preset = preset;
629633
return VideoEncoder(
630634
frames,
631635
validateInt64ToInt(frame_rate, "frame_rate"),
@@ -640,8 +644,9 @@ void _encode_video_to_file_like(
640644
int64_t frame_rate,
641645
std::string_view format,
642646
int64_t file_like_context,
643-
std::optional<std::string> pixel_format = std::nullopt,
644-
std::optional<double> crf = std::nullopt) {
647+
std::optional<std::string_view> pixel_format = std::nullopt,
648+
std::optional<double> crf = std::nullopt,
649+
std::optional<std::string_view> preset = std::nullopt) {
645650
auto fileLikeContext =
646651
reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
647652
TORCH_CHECK(
@@ -651,6 +656,7 @@ void _encode_video_to_file_like(
651656
VideoStreamOptions videoStreamOptions;
652657
videoStreamOptions.pixelFormat = pixel_format;
653658
videoStreamOptions.crf = crf;
659+
videoStreamOptions.preset = preset;
654660

655661
VideoEncoder encoder(
656662
frames,

src/torchcodec/_core/ops.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ def encode_video_to_file_like(
215215
file_like: Union[io.RawIOBase, io.BufferedIOBase],
216216
crf: Optional[Union[int, float]] = None,
217217
pixel_format: Optional[str] = None,
218+
preset: Optional[str] = None,
218219
) -> None:
219220
"""Encode video frames to a file-like object.
220221
@@ -225,6 +226,7 @@ def encode_video_to_file_like(
225226
file_like: File-like object that supports write() and seek() methods
226227
crf: Optional constant rate factor for encoding quality
227228
pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p")
229+
preset: Optional encoder preset as string (e.g., "ultrafast", "medium")
228230
"""
229231
assert _pybind_ops is not None
230232

@@ -235,6 +237,7 @@ def encode_video_to_file_like(
235237
_pybind_ops.create_file_like_context(file_like, True), # True means for writing
236238
pixel_format,
237239
crf,
240+
preset,
238241
)
239242

240243

@@ -322,8 +325,9 @@ def encode_video_to_file_abstract(
322325
frames: torch.Tensor,
323326
frame_rate: int,
324327
filename: str,
325-
crf: Optional[Union[int, float]] = None,
326328
pixel_format: Optional[str] = None,
329+
crf: Optional[Union[int, float]] = None,
330+
preset: Optional[str] = None,
327331
) -> None:
328332
return
329333

@@ -333,8 +337,9 @@ def encode_video_to_tensor_abstract(
333337
frames: torch.Tensor,
334338
frame_rate: int,
335339
format: str,
336-
crf: Optional[Union[int, float]] = None,
337340
pixel_format: Optional[str] = None,
341+
crf: Optional[Union[int, float]] = None,
342+
preset: Optional[str] = None,
338343
) -> torch.Tensor:
339344
return torch.empty([], dtype=torch.long)
340345

@@ -345,8 +350,9 @@ def _encode_video_to_file_like_abstract(
345350
frame_rate: int,
346351
format: str,
347352
file_like_context: int,
348-
crf: Optional[Union[int, float]] = None,
349353
pixel_format: Optional[str] = None,
354+
crf: Optional[Union[int, float]] = None,
355+
preset: Optional[str] = None,
350356
) -> None:
351357
return
352358

src/torchcodec/encoders/_video_encoder.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def to_file(
3838
*,
3939
pixel_format: Optional[str] = None,
4040
crf: Optional[Union[int, float]] = None,
41+
preset: Optional[Union[str, int]] = None,
4142
) -> None:
4243
"""Encode frames into a file.
4344
@@ -50,13 +51,19 @@ def to_file(
5051
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
5152
mean better quality. Valid range depends on the encoder (commonly 0-51).
5253
Defaults to None (which will use encoder's default).
54+
preset (str or int, optional): Encoder option that controls the tradeoff between
55+
encoding speed and compression. Valid values depend on the encoder (commonly
56+
a string: "fast", "medium", "slow"). Defaults to None
57+
(which will use encoder's default).
5358
"""
59+
preset = str(preset) if isinstance(preset, int) else preset
5460
_core.encode_video_to_file(
5561
frames=self._frames,
5662
frame_rate=self._frame_rate,
5763
filename=str(dest),
5864
pixel_format=pixel_format,
5965
crf=crf,
66+
preset=preset,
6067
)
6168

6269
def to_tensor(
@@ -65,6 +72,7 @@ def to_tensor(
6572
*,
6673
pixel_format: Optional[str] = None,
6774
crf: Optional[Union[int, float]] = None,
75+
preset: Optional[Union[str, int]] = None,
6876
) -> Tensor:
6977
"""Encode frames into raw bytes, as a 1D uint8 Tensor.
7078
@@ -76,16 +84,22 @@ def to_tensor(
7684
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
7785
mean better quality. Valid range depends on the encoder (commonly 0-51).
7886
Defaults to None (which will use encoder's default).
87+
preset (str or int, optional): Encoder option that controls the tradeoff between
88+
encoding speed and compression. Valid values depend on the encoder (commonly
89+
a string: "fast", "medium", "slow"). Defaults to None
90+
(which will use encoder's default).
7991
8092
Returns:
8193
Tensor: The raw encoded bytes as 4D uint8 Tensor.
8294
"""
95+
preset_value = str(preset) if isinstance(preset, int) else preset
8396
return _core.encode_video_to_tensor(
8497
frames=self._frames,
8598
frame_rate=self._frame_rate,
8699
format=format,
87100
pixel_format=pixel_format,
88101
crf=crf,
102+
preset=preset_value,
89103
)
90104

91105
def to_file_like(
@@ -95,6 +109,7 @@ def to_file_like(
95109
*,
96110
pixel_format: Optional[str] = None,
97111
crf: Optional[Union[int, float]] = None,
112+
preset: Optional[Union[str, int]] = None,
98113
) -> None:
99114
"""Encode frames into a file-like object.
100115
@@ -111,12 +126,18 @@ def to_file_like(
111126
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
112127
mean better quality. Valid range depends on the encoder (commonly 0-51).
113128
Defaults to None (which will use encoder's default).
129+
preset (str or int, optional): Encoder option that controls the tradeoff between
130+
encoding speed and compression. Valid values depend on the encoder (commonly
131+
a string: "fast", "medium", "slow"). Defaults to None
132+
(which will use encoder's default).
114133
"""
134+
preset = str(preset) if isinstance(preset, int) else preset
115135
_core.encode_video_to_file_like(
116136
frames=self._frames,
117137
frame_rate=self._frame_rate,
118138
format=format,
119139
file_like=file_like,
120140
pixel_format=pixel_format,
121141
crf=crf,
142+
preset=preset,
122143
)

test/test_encoders.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,12 @@ def test_bad_input_parameterized(self, tmp_path, method):
617617
)
618618
getattr(encoder, method)(**valid_params, crf=-10)
619619

620+
with pytest.raises(
621+
RuntimeError,
622+
match=r"avcodec_open2 failed: Invalid argument",
623+
):
624+
encoder.to_tensor(format="mp4", preset="fake_preset")
625+
620626
@pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"])
621627
@pytest.mark.parametrize("crf", [23, 23.5, -0.9])
622628
def test_crf_valid_values(self, method, crf, tmp_path):
@@ -826,13 +832,26 @@ def test_against_to_file(self, tmp_path, format, method):
826832
pytest.param("webm", marks=pytest.mark.slow),
827833
),
828834
)
829-
@pytest.mark.parametrize("pixel_format", ("yuv444p", "yuv420p"))
830-
def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format):
835+
@pytest.mark.parametrize(
836+
"encode_params",
837+
[
838+
{"pixel_format": "yuv444p", "crf": 0, "preset": None},
839+
{"pixel_format": "yuv420p", "crf": 30, "preset": None},
840+
{"pixel_format": "yuv420p", "crf": None, "preset": "ultrafast"},
841+
{"pixel_format": "yuv420p", "crf": None, "preset": None},
842+
],
843+
)
844+
def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, encode_params):
831845
ffmpeg_version = get_ffmpeg_major_version()
832846
if format == "webm" and (
833847
ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7))
834848
):
835849
pytest.skip("Codec for webm is not available in this FFmpeg installation.")
850+
851+
pixel_format = encode_params["pixel_format"]
852+
crf = encode_params["crf"]
853+
preset = encode_params["preset"]
854+
836855
if format in ("avi", "flv") and pixel_format == "yuv444p":
837856
pytest.skip(f"Default codec for {format} does not support {pixel_format}")
838857

@@ -845,8 +864,7 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format):
845864

846865
ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}")
847866
frame_rate = 30
848-
crf = 0
849-
# Some codecs (ex. MPEG4) do not support CRF.
867+
# Some codecs (ex. MPEG4) do not support CRF or preset.
850868
# Flags not supported by the selected codec will be ignored.
851869
ffmpeg_cmd = [
852870
"ffmpeg",
@@ -861,18 +879,26 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format):
861879
str(frame_rate),
862880
"-i",
863881
temp_raw_path,
864-
"-pix_fmt",
865-
pixel_format, # Output format
866-
"-crf",
867-
str(crf),
868-
ffmpeg_encoded_path,
869882
]
883+
if pixel_format is not None: # Output format
884+
ffmpeg_cmd.extend(["-pix_fmt", pixel_format])
885+
if preset is not None:
886+
ffmpeg_cmd.extend(["-preset", preset])
887+
if crf is not None:
888+
ffmpeg_cmd.extend(["-crf", str(crf)])
889+
# Output path must be last
890+
ffmpeg_cmd.append(ffmpeg_encoded_path)
870891
subprocess.run(ffmpeg_cmd, check=True)
871892

872893
# Encode with our video encoder
873894
encoder_output_path = str(tmp_path / f"encoder_output.{format}")
874895
encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate)
875-
encoder.to_file(dest=encoder_output_path, pixel_format=pixel_format, crf=crf)
896+
encoder.to_file(
897+
dest=encoder_output_path,
898+
pixel_format=pixel_format,
899+
crf=crf,
900+
preset=preset,
901+
)
876902

877903
ffmpeg_frames = self.decode(ffmpeg_encoded_path).data
878904
encoder_frames = self.decode(encoder_output_path).data

0 commit comments

Comments
 (0)