remove device arg instead use frames device

Dan-Flores · Dan-Flores · commit ffbdf4e2e62c · 2025-11-27T03:34:34.000Z
diff --git a/src/torchcodec/_core/CudaDeviceInterface.cpp b/src/torchcodec/_core/CudaDeviceInterface.cpp
@@ -1,10 +1,8 @@
 #include <ATen/cuda/CUDAEvent.h>
 #include <c10/cuda/CUDAStream.h>
-#include <cuda_runtime.h>
 #include <torch/types.h>
 #include <mutex>
 
-#include "CUDACommon.h"
 #include "Cache.h"
 #include "CudaDeviceInterface.h"
 #include "FFMPEGCommon.h"
diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
   m.def(
-      "encode_video_to_file(Tensor frames, float frame_rate, str filename, str device=\"cpu\", str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()");
+      "encode_video_to_file(Tensor frames, float frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()");
   m.def(
-      "encode_video_to_tensor(Tensor frames, float frame_rate, str format, str device=\"cpu\", str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> Tensor");
+      "encode_video_to_tensor(Tensor frames, float frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> Tensor");
   m.def(
-      "_encode_video_to_file_like(Tensor frames, float frame_rate, str format, int file_like_context, str device=\"cpu\",str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()");
+      "_encode_video_to_file_like(Tensor frames, float frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
   m.def(
@@ -640,14 +640,13 @@ void encode_video_to_file(
     const at::Tensor& frames,
     double frame_rate,
     std::string_view file_name,
-    std::string_view device = "cpu",
     std::optional<std::string_view> codec = std::nullopt,
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt,
     std::optional<std::vector<std::string>> extra_options = std::nullopt) {
   VideoStreamOptions videoStreamOptions;
-  videoStreamOptions.device = torch::Device(std::string(device));
+  videoStreamOptions.device = frames.device();
   videoStreamOptions.codec = std::move(codec);
   videoStreamOptions.pixelFormat = std::move(pixel_format);
   videoStreamOptions.crf = crf;
@@ -665,15 +664,14 @@ at::Tensor encode_video_to_tensor(
     const at::Tensor& frames,
     double frame_rate,
     std::string_view format,
-    std::string_view device = "cpu",
     std::optional<std::string_view> codec = std::nullopt,
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt,
     std::optional<std::vector<std::string>> extra_options = std::nullopt) {
   auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
   VideoStreamOptions videoStreamOptions;
-  videoStreamOptions.device = torch::Device(std::string(device));
+  videoStreamOptions.device = frames.device();
   videoStreamOptions.codec = std::move(codec);
   videoStreamOptions.pixelFormat = std::move(pixel_format);
   videoStreamOptions.crf = crf;
@@ -698,7 +696,6 @@ void _encode_video_to_file_like(
     double frame_rate,
     std::string_view format,
     int64_t file_like_context,
-    std::string_view device = "cpu",
     std::optional<std::string_view> codec = std::nullopt,
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
@@ -711,7 +708,7 @@ void _encode_video_to_file_like(
   std::unique_ptr<AVIOFileLikeContext> avioContextHolder(fileLikeContext);
 
   VideoStreamOptions videoStreamOptions;
-  videoStreamOptions.device = torch::Device(std::string(device));
+  videoStreamOptions.device = frames.device();
   videoStreamOptions.codec = std::move(codec);
   videoStreamOptions.pixelFormat = std::move(pixel_format);
   videoStreamOptions.crf = crf;
diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
@@ -213,7 +213,6 @@ def encode_video_to_file_like(
     frame_rate: float,
     format: str,
     file_like: Union[io.RawIOBase, io.BufferedIOBase],
-    device: Optional[str] = "cpu",
     codec: Optional[str] = None,
     pixel_format: Optional[str] = None,
     crf: Optional[Union[int, float]] = None,
@@ -223,11 +222,10 @@ def encode_video_to_file_like(
     """Encode video frames to a file-like object.
 
     Args:
-        frames: Video frames tensor
+        frames: Video frames tensor. The device of the frames tensor will be used for encoding.
         frame_rate: Frame rate in frames per second
         format: Video format (e.g., "mp4", "mov", "mkv")
         file_like: File-like object that supports write() and seek() methods
-        device: Device to use for encoding (default: "cpu")
         codec: Optional codec name (e.g., "libx264", "h264")
         pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p")
         crf: Optional constant rate factor for encoding quality
@@ -241,7 +239,6 @@ def encode_video_to_file_like(
         frame_rate,
         format,
         _pybind_ops.create_file_like_context(file_like, True),  # True means for writing
-        device,
         codec,
         pixel_format,
         crf,
@@ -334,7 +331,6 @@ def encode_video_to_file_abstract(
     frames: torch.Tensor,
     frame_rate: float,
     filename: str,
-    device: str = "cpu",
     codec: Optional[str] = None,
     pixel_format: Optional[str] = None,
     preset: Optional[str] = None,
@@ -349,7 +345,6 @@ def encode_video_to_tensor_abstract(
     frames: torch.Tensor,
     frame_rate: float,
     format: str,
-    device: str = "cpu",
     codec: Optional[str] = None,
     pixel_format: Optional[str] = None,
     preset: Optional[str] = None,
@@ -365,7 +360,6 @@ def _encode_video_to_file_like_abstract(
     frame_rate: float,
     format: str,
     file_like_context: int,
-    device: str = "cpu",
     codec: Optional[str] = None,
     pixel_format: Optional[str] = None,
     preset: Optional[str] = None,
diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
@@ -2,7 +2,7 @@
 from typing import Any, Dict, Optional, Union
 
 import torch
-from torch import device as torch_device, Tensor
+from torch import Tensor
 
 from torchcodec import _core
 
@@ -15,17 +15,15 @@ class VideoEncoder:
             tensor of shape ``(N, C, H, W)`` where N is the number of frames,
             C is 3 channels (RGB), H is height, and W is width.
             Values must be uint8 in the range ``[0, 255]``.
+            The device of the frames tensor will be used for encoding.
         frame_rate (float): The frame rate of the **input** ``frames``. Also defines the encoded **output** frame rate.
-        device (str or torch.device, optional): The device to use for encoding. Default: "cpu".
-            If you pass a CUDA device, frames will be encoded on GPU.
     """
 
     def __init__(
         self,
         frames: Tensor,
         *,
         frame_rate: float,
-        device: Optional[Union[str, torch_device]] = "cpu",
     ):
         torch._C._log_api_usage_once("torchcodec.encoders.VideoEncoder")
         if not isinstance(frames, Tensor):
@@ -37,13 +35,8 @@ def __init__(
         if frame_rate <= 0:
             raise ValueError(f"{frame_rate = } must be > 0.")
 
-        # Validate and store device
-        if isinstance(device, torch_device):
-            device = str(device)
-
         self._frames = frames
         self._frame_rate = frame_rate
-        self._device = device
 
     def to_file(
         self,
@@ -86,7 +79,6 @@ def to_file(
             frames=self._frames,
             frame_rate=self._frame_rate,
             filename=str(dest),
-            device=self._device,
             codec=codec,
             pixel_format=pixel_format,
             crf=crf,
@@ -139,7 +131,6 @@ def to_tensor(
             frames=self._frames,
             frame_rate=self._frame_rate,
             format=format,
-            device=self._device,
             codec=codec,
             pixel_format=pixel_format,
             crf=crf,
@@ -196,7 +187,6 @@ def to_file_like(
             frame_rate=self._frame_rate,
             format=format,
             file_like=file_like,
-            device=self._device,
             codec=codec,
             pixel_format=pixel_format,
             crf=crf,
diff --git a/test/test_encoders.py b/test/test_encoders.py
@@ -829,18 +829,16 @@ def encode_to_tensor(frames):
             common_params = dict(crf=0, pixel_format="yuv444p")
             if method == "to_file":
                 dest = str(tmp_path / "output.mp4")
-                VideoEncoder(frames, frame_rate=30, device=device).to_file(
-                    dest=dest, **common_params
-                )
+                VideoEncoder(frames, frame_rate=30).to_file(dest=dest, **common_params)
                 with open(dest, "rb") as f:
                     return torch.frombuffer(f.read(), dtype=torch.uint8).clone()
             elif method == "to_tensor":
-                return VideoEncoder(frames, frame_rate=30, device=device).to_tensor(
+                return VideoEncoder(frames, frame_rate=30).to_tensor(
                     format="mp4", **common_params
                 )
             elif method == "to_file_like":
                 file_like = io.BytesIO()
-                VideoEncoder(frames, frame_rate=30, device=device).to_file_like(
+                VideoEncoder(frames, frame_rate=30).to_file_like(
                     file_like, format="mp4", **common_params
                 )
                 return torch.frombuffer(file_like.getvalue(), dtype=torch.uint8)
@@ -1331,9 +1329,7 @@ def test_nvenc_against_ffmpeg_cli(
             else:
                 raise
 
-        encoder = VideoEncoder(
-            frames=source_frames, frame_rate=frame_rate, device=device
-        )
+        encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate)
 
         encoder_extra_options = {"qp": qp}
         if codec == "av1_nvenc":