Skip to content

Commit af16703

Browse files
author
pytorchbot
committed
2025-12-02 nightly release (2311422)
1 parent e6e2384 commit af16703

File tree

10 files changed

+177
-98
lines changed

10 files changed

+177
-98
lines changed

docs/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ sphinx==5.0.0
33
sphinx_design
44
sphinx_copybutton
55
sphinx-tabs
6+
sphinx-sitemap
67
matplotlib
78
torchvision
89
ipython

docs/source/conf.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
"sphinx_tabs.tabs",
5656
"sphinx_design",
5757
"sphinx_copybutton",
58+
"sphinx_sitemap",
5859
]
5960

6061

@@ -216,6 +217,15 @@ def __call__(self, filename):
216217
"matplotlib": ("https://matplotlib.org/stable/", None),
217218
}
218219

220+
# sitemap config
221+
html_baseurl = "https://meta-pytorch.org/torchcodec/stable/"
222+
sitemap_locales = [None]
223+
sitemap_excludes = [
224+
"search.html",
225+
"genindex.html",
226+
]
227+
sitemap_url_scheme = "{link}"
228+
219229

220230
def inject_minigalleries(app, what, name, obj, options, lines):
221231
"""Inject a minigallery into a docstring.

examples/decoding/approximate_mode.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
# Performance: ``VideoDecoder`` creation
6767
# --------------------------------------
6868
#
69-
# In terms of performance, the ``seek_mode`` parameter ultimately affects the
69+
# In terms of performance, the ``seek_mode`` parameter mainly affects the
7070
# **creation** of a :class:`~torchcodec.decoders.VideoDecoder` object. The
7171
# longer the video, the higher the performance gain.
7272

@@ -104,7 +104,7 @@ def bench(f, average_over=50, warmup=2, **f_kwargs):
104104
# ---------------------------------------------
105105
#
106106
# Strictly speaking the ``seek_mode`` parameter only affects the performance of
107-
# the :class:`~torchcodec.decoders.VideoDecoder` creation. It does not have a
107+
# the :class:`~torchcodec.decoders.VideoDecoder` creation. It usually does not have a
108108
# direct effect on the performance of frame decoding or sampling. **However**,
109109
# because frame decoding and sampling patterns typically involve the creation of
110110
# the :class:`~torchcodec.decoders.VideoDecoder` (one per video), ``seek_mode``
@@ -168,20 +168,21 @@ def sample_clips(seek_mode):
168168
# duration), and also builds an internal index of frames and key-frames. This
169169
# internal index is potentially more accurate than the one in the file's
170170
# headers, which leads to more accurate seeking behavior.
171-
# Without the scan, TorchCodec relies only on the metadata contained in the
172-
# file, which may not always be as accurate.
171+
# Without the scan (in approximate mode), TorchCodec relies only on the metadata
172+
# contained in the file, which may not always be as accurate. In some rare
173+
# cases, relying on this less accurate data may also lead to slower frame
174+
# decoding, because it can involve unnecessary seeks.
173175
#
174176
# Which mode should I use?
175177
# ------------------------
176178
#
177179
# The general rule of thumb is as follows:
178180
#
179181
# - If you really care about exactness of frame seeking, use "exact".
180-
# - If you can sacrifice exactness of seeking for speed, which is usually the
181-
# case when doing clip sampling, use "approximate".
182-
# - If your videos don't have variable framerate and their metadata is correct,
183-
# then "approximate" mode is a net win: it will be just as accurate as the
184-
# "exact" mode while still being significantly faster.
182+
# - If your videos are short (less then a few minutes) then "exact" will usually
183+
# be preferable, as the scan's fixed cost will be negligible.
184+
# - For long videos, if you can sacrifice exactness of seeking for speed, which
185+
# is usually the case when doing clip sampling, consider using "approximate".
185186

186187
# %%
187188
shutil.rmtree(temp_dir)

src/torchcodec/_core/SingleStreamDecoder.cpp

Lines changed: 44 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,32 +1088,10 @@ void SingleStreamDecoder::setCursor(int64_t pts) {
10881088
cursor_ = pts;
10891089
}
10901090

1091-
/*
1092-
Videos have I frames and non-I frames (P and B frames). Non-I frames need data
1093-
from the previous I frame to be decoded.
1094-
1095-
Imagine the cursor is at a random frame with PTS=lastDecodedAvFramePts (x for
1096-
brevity) and we wish to seek to a user-specified PTS=y.
1097-
1098-
If y < x, we don't have a choice but to seek backwards to the highest I frame
1099-
before y.
1100-
1101-
If y > x, we have two choices:
1102-
1103-
1. We could keep decoding forward until we hit y. Illustrated below:
1104-
1105-
I P P P I P P P I P P I P P I P
1106-
x y
1107-
1108-
2. We could try to jump to an I frame between x and y (indicated by j below).
1109-
And then start decoding until we encounter y. Illustrated below:
1110-
1111-
I P P P I P P P I P P I P P I P
1112-
x j y
1113-
1114-
(2) is more efficient than (1) if there is an I frame between x and y.
1115-
*/
11161091
bool SingleStreamDecoder::canWeAvoidSeeking() const {
1092+
// Returns true if we can avoid seeking in the AVFormatContext based on
1093+
// heuristics that rely on the target cursor_ and the last decoded frame.
1094+
// Seeking is expensive, so we try to avoid it when possible.
11171095
const StreamInfo& streamInfo = streamInfos_.at(activeStreamIndex_);
11181096
if (streamInfo.avMediaType == AVMEDIA_TYPE_AUDIO) {
11191097
// For audio, we only need to seek if a backwards seek was requested
@@ -1136,13 +1114,34 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const {
11361114
// implement caching.
11371115
return false;
11381116
}
1139-
// We are seeking forwards.
1140-
// We can only skip a seek if both lastDecodedAvFramePts and
1141-
// cursor_ share the same keyframe.
1142-
int lastDecodedAvFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts_);
1143-
int targetKeyFrameIndex = getKeyFrameIndexForPts(cursor_);
1144-
return lastDecodedAvFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
1145-
lastDecodedAvFrameIndex == targetKeyFrameIndex;
1117+
// We are seeking forwards. We can skip a seek if both the last decoded frame
1118+
// and cursor_ share the same keyframe:
1119+
// Videos have I frames and non-I frames (P and B frames). Non-I frames need
1120+
// data from the previous I frame to be decoded.
1121+
//
1122+
// Imagine the cursor is at a random frame with PTS=lastDecodedAvFramePts (x
1123+
// for brevity) and we wish to seek to a user-specified PTS=y.
1124+
//
1125+
// If y < x, we don't have a choice but to seek backwards to the highest I
1126+
// frame before y.
1127+
//
1128+
// If y > x, we have two choices:
1129+
//
1130+
// 1. We could keep decoding forward until we hit y. Illustrated below:
1131+
//
1132+
// I P P P I P P P I P P I P
1133+
// x y
1134+
//
1135+
// 2. We could try to jump to an I frame between x and y (indicated by j
1136+
// below). And then start decoding until we encounter y. Illustrated below:
1137+
//
1138+
// I P P P I P P P I P P I P
1139+
// x j y
1140+
// (2) is only more efficient than (1) if there is an I frame between x and y.
1141+
int lastKeyFrame = getKeyFrameIdentifier(lastDecodedAvFramePts_);
1142+
int targetKeyFrame = getKeyFrameIdentifier(cursor_);
1143+
return lastKeyFrame >= 0 && targetKeyFrame >= 0 &&
1144+
lastKeyFrame == targetKeyFrame;
11461145
}
11471146

11481147
// This method looks at currentPts and desiredPts and seeks in the
@@ -1359,7 +1358,19 @@ torch::Tensor SingleStreamDecoder::maybePermuteHWC2CHW(
13591358
// PTS <-> INDEX CONVERSIONS
13601359
// --------------------------------------------------------------------------
13611360

1362-
int SingleStreamDecoder::getKeyFrameIndexForPts(int64_t pts) const {
1361+
int SingleStreamDecoder::getKeyFrameIdentifier(int64_t pts) const {
1362+
// This function "identifies" a key frame for a given pts value.
1363+
// We use the term "identifier" rather than "index" because the nature of the
1364+
// index that is returned depends on various factors:
1365+
// - If seek_mode is exact, we return the index of the key frame in the
1366+
// scanned key-frame vector (streamInfo.keyFrames). So the returned value is
1367+
// in [0, num_key_frames).
1368+
// - If seek_mode is approximate, we use av_index_search_timestamp() which
1369+
// may return a value in [0, num_key_frames) like for mkv, but also a value
1370+
// in [0, num_frames) like for mp4. It really depends on the container.
1371+
//
1372+
// The range of the "identifier" doesn't matter that much, for now we only
1373+
// use it to uniquely identify a key frame in canWeAvoidSeeking().
13631374
const StreamInfo& streamInfo = streamInfos_.at(activeStreamIndex_);
13641375
if (streamInfo.keyFrames.empty()) {
13651376
return av_index_search_timestamp(

src/torchcodec/_core/SingleStreamDecoder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ class SingleStreamDecoder {
282282
// PTS <-> INDEX CONVERSIONS
283283
// --------------------------------------------------------------------------
284284

285-
int getKeyFrameIndexForPts(int64_t pts) const;
285+
int getKeyFrameIdentifier(int64_t pts) const;
286286

287287
// Returns the key frame index of the presentation timestamp using our index.
288288
// We build this index by scanning the file in

test/conftest.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,17 @@
44
import pytest
55
import torch
66

7+
from .utils import in_fbcode
8+
79

810
def pytest_configure(config):
911
# register an additional marker (see pytest_collection_modifyitems)
1012
config.addinivalue_line(
1113
"markers", "needs_cuda: mark for tests that rely on a CUDA device"
1214
)
15+
config.addinivalue_line(
16+
"markers", "needs_ffmpeg_cli: mark for tests that rely on ffmpeg"
17+
)
1318

1419

1520
def pytest_collection_modifyitems(items):
@@ -28,6 +33,15 @@ def pytest_collection_modifyitems(items):
2833
# 'needs_cuda' mark, and the ones with device == 'cpu' won't have the
2934
# mark.
3035
needs_cuda = item.get_closest_marker("needs_cuda") is not None
36+
needs_ffmpeg_cli = item.get_closest_marker("needs_ffmpeg_cli") is not None
37+
has_skip_marker = item.get_closest_marker("skip") is not None
38+
has_skipif_marker = item.get_closest_marker("skipif") is not None
39+
40+
if in_fbcode():
41+
# fbcode doesn't like skipping tests, so instead we just don't collect the test
42+
# so that they don't even "exist", hence the continue statements.
43+
if needs_ffmpeg_cli or has_skip_marker or has_skipif_marker:
44+
continue
3145

3246
if (
3347
needs_cuda

test/test_decoders.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
BT709_FULL_RANGE,
3030
cuda_version_used_for_building_torch,
3131
get_ffmpeg_major_version,
32+
get_python_version,
3233
H264_10BITS,
3334
H265_10BITS,
3435
H265_VIDEO,
@@ -39,6 +40,7 @@
3940
NASA_AUDIO_MP3_44100,
4041
NASA_VIDEO,
4142
needs_cuda,
43+
needs_ffmpeg_cli,
4244
psnr,
4345
SINE_MONO_S16,
4446
SINE_MONO_S32,
@@ -1146,6 +1148,10 @@ def test_get_key_frame_indices(self, device):
11461148

11471149
# TODO investigate why this fails internally.
11481150
@pytest.mark.skipif(in_fbcode(), reason="Compile test fails internally.")
1151+
@pytest.mark.skipif(
1152+
get_python_version() >= (3, 14),
1153+
reason="torch.compile is not supported on Python 3.14+",
1154+
)
11491155
@pytest.mark.parametrize("device", all_supported_devices())
11501156
def test_compile(self, device):
11511157
decoder, device = make_video_decoder(NASA_VIDEO.path, device=device)
@@ -1311,10 +1317,7 @@ def setup_frame_mappings(tmp_path, file, stream_index):
13111317
# Return the custom frame mappings as a JSON string
13121318
return custom_frame_mappings
13131319

1314-
@pytest.mark.skipif(
1315-
in_fbcode(),
1316-
reason="ffprobe not available internally",
1317-
)
1320+
@needs_ffmpeg_cli
13181321
@pytest.mark.parametrize("device", all_supported_devices())
13191322
@pytest.mark.parametrize("stream_index", [0, 3])
13201323
@pytest.mark.parametrize(
@@ -1361,10 +1364,7 @@ def test_custom_frame_mappings_json_and_bytes(
13611364
),
13621365
)
13631366

1364-
@pytest.mark.skipif(
1365-
in_fbcode(),
1366-
reason="ffprobe not available internally",
1367-
)
1367+
@needs_ffmpeg_cli
13681368
@pytest.mark.parametrize("device", all_supported_devices())
13691369
@pytest.mark.parametrize(
13701370
"custom_frame_mappings,expected_match",

0 commit comments

Comments
 (0)