crf is somtimes a double actually

Dan-Flores · Dan-Flores · commit b7e52fb12d28 · 2025-11-12T09:57:54.000-05:00
diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
@@ -570,17 +570,16 @@ AVPixelFormat validatePixelFormat(
   TORCH_CHECK(false, errorMsg.str());
 }
 
-void validateNumericOption(
+void validateDoubleOption(
     const AVCodec& avCodec,
     const char* optionName,
-    int value) {
-  // First determine if codec's private class is defined
+    double value) {
   if (!avCodec.priv_class) {
     return;
   }
   const AVOption* option = av_opt_find2(
-      // The obj arg must be converted from const AVClass* const* to non-const
-      // void* First cast to remove const, then cast to void*
+      // Convert obj arg from const AVClass* const* to non-const void*
+      // First cast to remove const, then cast to void*
       const_cast<void*>(static_cast<const void*>(&avCodec.priv_class)),
       optionName,
       nullptr,
@@ -739,7 +738,7 @@ void VideoEncoder::initializeEncoder(
   // Apply videoStreamOptions
   AVDictionary* options = nullptr;
   if (videoStreamOptions.crf.has_value()) {
-    validateNumericOption(*avCodec, "crf", videoStreamOptions.crf.value());
+    validateDoubleOption(*avCodec, "crf", videoStreamOptions.crf.value());
     av_dict_set(
         &options,
         "crf",
diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h
@@ -47,7 +47,7 @@ struct VideoStreamOptions {
   // Encoding options
   // TODO-VideoEncoder: Consider adding other optional fields here
   // (bit rate, gop size, max b frames, preset)
-  std::optional<int> crf;
+  std::optional<double> crf;
 
   // Optional pixel format for video encoding (e.g., "yuv420p", "yuv444p")
   // If not specified, uses codec's default format.
diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
   m.def(
-      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, int? crf=None) -> ()");
+      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None) -> ()");
   m.def(
-      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, int? crf=None) -> Tensor");
+      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None) -> Tensor");
   m.def(
-      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, int? crf=None) -> ()");
+      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None) -> ()");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
   m.def(
@@ -604,7 +604,7 @@ void encode_video_to_file(
     int64_t frame_rate,
     std::string_view file_name,
     std::optional<std::string> pixel_format = std::nullopt,
-    std::optional<int64_t> crf = std::nullopt) {
+    std::optional<double> crf = std::nullopt) {
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.pixelFormat = pixel_format;
   videoStreamOptions.crf = crf;
@@ -621,7 +621,7 @@ at::Tensor encode_video_to_tensor(
     int64_t frame_rate,
     std::string_view format,
     std::optional<std::string> pixel_format = std::nullopt,
-    std::optional<int64_t> crf = std::nullopt) {
+    std::optional<double> crf = std::nullopt) {
   auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.pixelFormat = pixel_format;
@@ -641,7 +641,7 @@ void _encode_video_to_file_like(
     std::string_view format,
     int64_t file_like_context,
     std::optional<std::string> pixel_format = std::nullopt,
-    std::optional<int64_t> crf = std::nullopt) {
+    std::optional<double> crf = std::nullopt) {
   auto fileLikeContext =
       reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
   TORCH_CHECK(
diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
@@ -213,7 +213,7 @@ def encode_video_to_file_like(
     frame_rate: int,
     format: str,
     file_like: Union[io.RawIOBase, io.BufferedIOBase],
-    crf: Optional[int] = None,
+    crf: Optional[Union[int, float]] = None,
     pixel_format: Optional[str] = None,
 ) -> None:
     """Encode video frames to a file-like object.
@@ -322,7 +322,7 @@ def encode_video_to_file_abstract(
     frames: torch.Tensor,
     frame_rate: int,
     filename: str,
-    crf: Optional[int] = None,
+    crf: Optional[Union[int, float]] = None,
     pixel_format: Optional[str] = None,
 ) -> None:
     return
@@ -333,7 +333,7 @@ def encode_video_to_tensor_abstract(
     frames: torch.Tensor,
     frame_rate: int,
     format: str,
-    crf: Optional[int] = None,
+    crf: Optional[Union[int, float]] = None,
     pixel_format: Optional[str] = None,
 ) -> torch.Tensor:
     return torch.empty([], dtype=torch.long)
@@ -345,7 +345,7 @@ def _encode_video_to_file_like_abstract(
     frame_rate: int,
     format: str,
     file_like_context: int,
-    crf: Optional[int] = None,
+    crf: Optional[Union[int, float]] = None,
     pixel_format: Optional[str] = None,
 ) -> None:
     return
diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
@@ -37,7 +37,7 @@ def to_file(
         dest: Union[str, Path],
         *,
         pixel_format: Optional[str] = None,
-        crf: Optional[int] = None,
+        crf: Optional[Union[int, float]] = None,
     ) -> None:
         """Encode frames into a file.
 
@@ -47,7 +47,7 @@ def to_file(
                 container format.
             pixel_format (str, optional): The pixel format for encoding (e.g.,
                 "yuv420p", "yuv444p"). If not specified, uses codec's default format.
-            crf (int, optional): Constant Rate Factor for encoding quality. Lower values
+            crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
                 mean better quality. Valid range depends on the encoder (commonly 0-51).
                 Defaults to None (which will use encoder's default).
         """
@@ -64,7 +64,7 @@ def to_tensor(
         format: str,
         *,
         pixel_format: Optional[str] = None,
-        crf: Optional[int] = None,
+        crf: Optional[Union[int, float]] = None,
     ) -> Tensor:
         """Encode frames into raw bytes, as a 1D uint8 Tensor.
 
@@ -73,7 +73,7 @@ def to_tensor(
                 "mkv", "avi", "webm", "flv", etc.
             pixel_format (str, optional): The pixel format to encode frames into (e.g.,
                 "yuv420p", "yuv444p"). If not specified, uses codec's default format.
-            crf (int, optional): Constant Rate Factor for encoding quality. Lower values
+            crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
                 mean better quality. Valid range depends on the encoder (commonly 0-51).
                 Defaults to None (which will use encoder's default).
 
@@ -94,7 +94,7 @@ def to_file_like(
         format: str,
         *,
         pixel_format: Optional[str] = None,
-        crf: Optional[int] = None,
+        crf: Optional[Union[int, float]] = None,
     ) -> None:
         """Encode frames into a file-like object.
 
@@ -108,7 +108,7 @@ def to_file_like(
                 "mkv", "avi", "webm", "flv", etc.
             pixel_format (str, optional): The pixel format for encoding (e.g.,
                 "yuv420p", "yuv444p"). If not specified, uses codec's default format.
-            crf (int, optional): Constant Rate Factor for encoding quality. Lower values
+            crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
                 mean better quality. Valid range depends on the encoder (commonly 0-51).
                 Defaults to None (which will use encoder's default).
         """
diff --git a/test/test_encoders.py b/test/test_encoders.py
@@ -617,6 +617,24 @@ def test_bad_input_parameterized(self, tmp_path, method):
             )
             getattr(encoder, method)(**valid_params, crf=-10)
 
+    @pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"])
+    @pytest.mark.parametrize("crf", [23, 23.5, -0.9])
+    def test_crf_valid_values(self, method, crf, tmp_path):
+        if method == "to_file":
+            valid_params = {"dest": str(tmp_path / "test.mp4")}
+        elif method == "to_tensor":
+            valid_params = {"format": "mp4"}
+        elif method == "to_file_like":
+            valid_params = dict(file_like=io.BytesIO(), format="mp4")
+        else:
+            raise ValueError(f"Unknown method: {method}")
+
+        encoder = VideoEncoder(
+            frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8),
+            frame_rate=30,
+        )
+        getattr(encoder, method)(**valid_params, crf=crf)
+
     def test_bad_input(self, tmp_path):
         encoder = VideoEncoder(
             frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8),