Merge branch 'main' into approx-tuto

NicolasHug · web-flow · commit ce330d1341ec · 2025-12-01T18:20:51.000Z
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -30,7 +30,7 @@ Start by installing the **nightly** build of PyTorch following the
 Then, the easiest way to install the rest of the dependencies is to run:
 
 ```bash
-conda install cmake pkg-config pybind11 "ffmpeg<8" -c conda-forge
+conda install cmake pkg-config pybind11 "ffmpeg" -c conda-forge
 ```
 
 ### Clone and build
diff --git a/README.md b/README.md
@@ -107,16 +107,16 @@ ffmpeg -f lavfi -i \
    `torch` and `torchcodec`.
 
 2. Install FFmpeg, if it's not already installed. Linux distributions usually
-   come with FFmpeg pre-installed. TorchCodec supports major FFmpeg versions
-   in [4, 7] on all platforms, and FFmpeg version 8 is supported on Mac and Linux.
+   come with FFmpeg pre-installed. TorchCodec supports supports all major FFmpeg versions
+   in [4, 8].
 
    If FFmpeg is not already installed, or you need a more recent version, an
    easy way to install it is to use `conda`:
 
    ```bash
-   conda install "ffmpeg<8"
+   conda install "ffmpeg"
    # or
-   conda install "ffmpeg<8" -c conda-forge
+   conda install "ffmpeg" -c conda-forge
    ```
 
 3. Install TorchCodec:
@@ -148,16 +148,15 @@ format you want. Refer to Nvidia's GPU support matrix for more details
 [here](https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new).
 
 1. Install FFmpeg with NVDEC support.
-   TorchCodec with CUDA should work with FFmpeg versions in [4, 7] on all platforms,
-   and FFmpeg version 8 is supported on Linux.
+   TorchCodec with CUDA should work with FFmpeg versions in [4, 8].
 
    If FFmpeg is not already installed, or you need a more recent version, an
    easy way to install it is to use `conda`:
 
    ```bash
-   conda install "ffmpeg<8"
+   conda install "ffmpeg"
    # or
-   conda install "ffmpeg<8" -c conda-forge
+   conda install "ffmpeg" -c conda-forge
    ```
 
    After installing FFmpeg make sure it has NVDEC support when you list the supported
diff --git a/src/torchcodec/_core/Metadata.h b/src/torchcodec/_core/Metadata.h
@@ -22,11 +22,11 @@ enum class SeekMode { exact, approximate, custom_frame_mappings };
 
 struct StreamMetadata {
   // Common (video and audio) fields derived from the AVStream.
-  int streamIndex;
+  int streamIndex = -1;
 
   // See this link for what various values are available:
   // https://ffmpeg.org/doxygen/trunk/group__lavu__misc.html#ga9a84bba4713dfced21a1a56163be1f48
-  AVMediaType mediaType;
+  AVMediaType mediaType = AVMEDIA_TYPE_UNKNOWN;
 
   std::optional<AVCodecID> codecId;
   std::optional<std::string> codecName;
diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp
@@ -1088,32 +1088,17 @@ void SingleStreamDecoder::setCursor(int64_t pts) {
   cursor_ = pts;
 }
 
-/*
-Videos have I frames and non-I frames (P and B frames). Non-I frames need data
-from the previous I frame to be decoded.
-
-Imagine the cursor is at a random frame with PTS=lastDecodedAvFramePts (x for
-brevity) and we wish to seek to a user-specified PTS=y.
-
-If y < x, we don't have a choice but to seek backwards to the highest I frame
-before y.
-
-If y > x, we have two choices:
-
-1. We could keep decoding forward until we hit y. Illustrated below:
-
-I    P     P    P    I    P    P    P    I    P    P    I    P    P    I    P
-                          x         y
-
-2. We could try to jump to an I frame between x and y (indicated by j below).
-And then start decoding until we encounter y. Illustrated below:
-
-I    P     P    P    I    P    P    P    I    P    P    I    P    P    I    P
-                          x              j         y
-
-(2) is more efficient than (1) if there is an I frame between x and y.
-*/
 bool SingleStreamDecoder::canWeAvoidSeeking() const {
+  // Returns true if we can avoid seeking in the AVFormatContext based on
+  // heuristics that rely on the target cursor_ and the last decoded frame.
+  // Seeking is expensive, so we try to avoid it when possible.
+  // Note that this function itself isn't always that cheap to call: in
+  // particular the calls to getKeyFrameIndexForPts below in approximate mode
+  // are sometimes slow.
+  // TODO we should understand why (is it because it reads the file?) and
+  // potentially optimize it. E.g. we may not want to ever seek, or even *check*
+  // if we need to seek in some cases, like if we're going to decode 80% of the
+  // frames anyway.
   const StreamInfo& streamInfo = streamInfos_.at(activeStreamIndex_);
   if (streamInfo.avMediaType == AVMEDIA_TYPE_AUDIO) {
     // For audio, we only need to seek if a backwards seek was requested
@@ -1136,13 +1121,34 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const {
     // implement caching.
     return false;
   }
-  // We are seeking forwards.
-  // We can only skip a seek if both lastDecodedAvFramePts and
-  // cursor_ share the same keyframe.
-  int lastDecodedAvFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts_);
+  // We are seeking forwards. We can skip a seek if both the last decoded frame
+  // and cursor_ share the same keyframe:
+  // Videos have I frames and non-I frames (P and B frames). Non-I frames need
+  // data from the previous I frame to be decoded.
+  //
+  // Imagine the cursor is at a random frame with PTS=lastDecodedAvFramePts (x
+  // for brevity) and we wish to seek to a user-specified PTS=y.
+  //
+  // If y < x, we don't have a choice but to seek backwards to the highest I
+  // frame before y.
+  //
+  // If y > x, we have two choices:
+  //
+  // 1. We could keep decoding forward until we hit y. Illustrated below:
+  //
+  // I    P     P    P    I    P    P    P    I    P    P    I    P
+  //                           x         y
+  //
+  // 2. We could try to jump to an I frame between x and y (indicated by j
+  // below). And then start decoding until we encounter y. Illustrated below:
+  //
+  // I    P     P    P    I    P    P    P    I    P    P    I    P
+  //                           x              j         y
+  // (2) is only more efficient than (1) if there is an I frame between x and y.
+  int lastKeyFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts_);
   int targetKeyFrameIndex = getKeyFrameIndexForPts(cursor_);
-  return lastDecodedAvFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
-      lastDecodedAvFrameIndex == targetKeyFrameIndex;
+  return lastKeyFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
+      lastKeyFrameIndex == targetKeyFrameIndex;
 }
 
 // This method looks at currentPts and desiredPts and seeks in the
diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
@@ -70,7 +70,7 @@ def load_torchcodec_shared_libraries():
     raise RuntimeError(
         f"""Could not load libtorchcodec. Likely causes:
           1. FFmpeg is not properly installed in your environment. We support
-             versions 4, 5, 6, and 7 on all platforms, and 8 on Mac and Linux.
+             versions 4, 5, 6, 7, and 8.
           2. The PyTorch version ({torch.__version__}) is not compatible with
              this version of TorchCodec. Refer to the version compatibility
              table:
diff --git a/test/conftest.py b/test/conftest.py
@@ -4,12 +4,17 @@
 import pytest
 import torch
 
+from .utils import in_fbcode
+
 
 def pytest_configure(config):
     # register an additional marker (see pytest_collection_modifyitems)
     config.addinivalue_line(
         "markers", "needs_cuda: mark for tests that rely on a CUDA device"
     )
+    config.addinivalue_line(
+        "markers", "needs_ffmpeg_cli: mark for tests that rely on ffmpeg"
+    )
 
 
 def pytest_collection_modifyitems(items):
@@ -28,6 +33,15 @@ def pytest_collection_modifyitems(items):
         # 'needs_cuda' mark, and the ones with device == 'cpu' won't have the
         # mark.
         needs_cuda = item.get_closest_marker("needs_cuda") is not None
+        needs_ffmpeg_cli = item.get_closest_marker("needs_ffmpeg_cli") is not None
+        has_skip_marker = item.get_closest_marker("skip") is not None
+        has_skipif_marker = item.get_closest_marker("skipif") is not None
+
+        if in_fbcode():
+            # fbcode doesn't like skipping tests, so instead we  just don't collect the test
+            # so that they don't even "exist", hence the continue statements.
+            if needs_ffmpeg_cli or has_skip_marker or has_skipif_marker:
+                continue
 
         if (
             needs_cuda
diff --git a/test/test_decoders.py b/test/test_decoders.py
@@ -29,6 +29,7 @@
     BT709_FULL_RANGE,
     cuda_version_used_for_building_torch,
     get_ffmpeg_major_version,
+    get_python_version,
     H264_10BITS,
     H265_10BITS,
     H265_VIDEO,
@@ -39,6 +40,7 @@
     NASA_AUDIO_MP3_44100,
     NASA_VIDEO,
     needs_cuda,
+    needs_ffmpeg_cli,
     psnr,
     SINE_MONO_S16,
     SINE_MONO_S32,
@@ -1146,6 +1148,10 @@ def test_get_key_frame_indices(self, device):
 
     # TODO investigate why this fails internally.
     @pytest.mark.skipif(in_fbcode(), reason="Compile test fails internally.")
+    @pytest.mark.skipif(
+        get_python_version() >= (3, 14),
+        reason="torch.compile is not supported on Python 3.14+",
+    )
     @pytest.mark.parametrize("device", all_supported_devices())
     def test_compile(self, device):
         decoder, device = make_video_decoder(NASA_VIDEO.path, device=device)
@@ -1311,10 +1317,7 @@ def setup_frame_mappings(tmp_path, file, stream_index):
             # Return the custom frame mappings as a JSON string
             return custom_frame_mappings
 
-    @pytest.mark.skipif(
-        in_fbcode(),
-        reason="ffprobe not available internally",
-    )
+    @needs_ffmpeg_cli
     @pytest.mark.parametrize("device", all_supported_devices())
     @pytest.mark.parametrize("stream_index", [0, 3])
     @pytest.mark.parametrize(
@@ -1361,10 +1364,7 @@ def test_custom_frame_mappings_json_and_bytes(
             ),
         )
 
-    @pytest.mark.skipif(
-        in_fbcode(),
-        reason="ffprobe not available internally",
-    )
+    @needs_ffmpeg_cli
     @pytest.mark.parametrize("device", all_supported_devices())
     @pytest.mark.parametrize(
         "custom_frame_mappings,expected_match",
diff --git a/test/test_encoders.py b/test/test_encoders.py
diff --git a/test/test_ops.py b/test/test_ops.py
diff --git a/test/utils.py b/test/utils.py