Skip to content

Commit ce330d1

Browse files
authored
Merge branch 'main' into approx-tuto
2 parents ade26a2 + 392bab3 commit ce330d1

File tree

10 files changed

+159
-98
lines changed

10 files changed

+159
-98
lines changed

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Start by installing the **nightly** build of PyTorch following the
3030
Then, the easiest way to install the rest of the dependencies is to run:
3131

3232
```bash
33-
conda install cmake pkg-config pybind11 "ffmpeg<8" -c conda-forge
33+
conda install cmake pkg-config pybind11 "ffmpeg" -c conda-forge
3434
```
3535

3636
### Clone and build

README.md

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,16 @@ ffmpeg -f lavfi -i \
107107
`torch` and `torchcodec`.
108108

109109
2. Install FFmpeg, if it's not already installed. Linux distributions usually
110-
come with FFmpeg pre-installed. TorchCodec supports major FFmpeg versions
111-
in [4, 7] on all platforms, and FFmpeg version 8 is supported on Mac and Linux.
110+
come with FFmpeg pre-installed. TorchCodec supports supports all major FFmpeg versions
111+
in [4, 8].
112112

113113
If FFmpeg is not already installed, or you need a more recent version, an
114114
easy way to install it is to use `conda`:
115115

116116
```bash
117-
conda install "ffmpeg<8"
117+
conda install "ffmpeg"
118118
# or
119-
conda install "ffmpeg<8" -c conda-forge
119+
conda install "ffmpeg" -c conda-forge
120120
```
121121

122122
3. Install TorchCodec:
@@ -148,16 +148,15 @@ format you want. Refer to Nvidia's GPU support matrix for more details
148148
[here](https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new).
149149

150150
1. Install FFmpeg with NVDEC support.
151-
TorchCodec with CUDA should work with FFmpeg versions in [4, 7] on all platforms,
152-
and FFmpeg version 8 is supported on Linux.
151+
TorchCodec with CUDA should work with FFmpeg versions in [4, 8].
153152

154153
If FFmpeg is not already installed, or you need a more recent version, an
155154
easy way to install it is to use `conda`:
156155

157156
```bash
158-
conda install "ffmpeg<8"
157+
conda install "ffmpeg"
159158
# or
160-
conda install "ffmpeg<8" -c conda-forge
159+
conda install "ffmpeg" -c conda-forge
161160
```
162161

163162
After installing FFmpeg make sure it has NVDEC support when you list the supported

src/torchcodec/_core/Metadata.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ enum class SeekMode { exact, approximate, custom_frame_mappings };
2222

2323
struct StreamMetadata {
2424
// Common (video and audio) fields derived from the AVStream.
25-
int streamIndex;
25+
int streamIndex = -1;
2626

2727
// See this link for what various values are available:
2828
// https://ffmpeg.org/doxygen/trunk/group__lavu__misc.html#ga9a84bba4713dfced21a1a56163be1f48
29-
AVMediaType mediaType;
29+
AVMediaType mediaType = AVMEDIA_TYPE_UNKNOWN;
3030

3131
std::optional<AVCodecID> codecId;
3232
std::optional<std::string> codecName;

src/torchcodec/_core/SingleStreamDecoder.cpp

Lines changed: 37 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,32 +1088,17 @@ void SingleStreamDecoder::setCursor(int64_t pts) {
10881088
cursor_ = pts;
10891089
}
10901090

1091-
/*
1092-
Videos have I frames and non-I frames (P and B frames). Non-I frames need data
1093-
from the previous I frame to be decoded.
1094-
1095-
Imagine the cursor is at a random frame with PTS=lastDecodedAvFramePts (x for
1096-
brevity) and we wish to seek to a user-specified PTS=y.
1097-
1098-
If y < x, we don't have a choice but to seek backwards to the highest I frame
1099-
before y.
1100-
1101-
If y > x, we have two choices:
1102-
1103-
1. We could keep decoding forward until we hit y. Illustrated below:
1104-
1105-
I P P P I P P P I P P I P P I P
1106-
x y
1107-
1108-
2. We could try to jump to an I frame between x and y (indicated by j below).
1109-
And then start decoding until we encounter y. Illustrated below:
1110-
1111-
I P P P I P P P I P P I P P I P
1112-
x j y
1113-
1114-
(2) is more efficient than (1) if there is an I frame between x and y.
1115-
*/
11161091
bool SingleStreamDecoder::canWeAvoidSeeking() const {
1092+
// Returns true if we can avoid seeking in the AVFormatContext based on
1093+
// heuristics that rely on the target cursor_ and the last decoded frame.
1094+
// Seeking is expensive, so we try to avoid it when possible.
1095+
// Note that this function itself isn't always that cheap to call: in
1096+
// particular the calls to getKeyFrameIndexForPts below in approximate mode
1097+
// are sometimes slow.
1098+
// TODO we should understand why (is it because it reads the file?) and
1099+
// potentially optimize it. E.g. we may not want to ever seek, or even *check*
1100+
// if we need to seek in some cases, like if we're going to decode 80% of the
1101+
// frames anyway.
11171102
const StreamInfo& streamInfo = streamInfos_.at(activeStreamIndex_);
11181103
if (streamInfo.avMediaType == AVMEDIA_TYPE_AUDIO) {
11191104
// For audio, we only need to seek if a backwards seek was requested
@@ -1136,13 +1121,34 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const {
11361121
// implement caching.
11371122
return false;
11381123
}
1139-
// We are seeking forwards.
1140-
// We can only skip a seek if both lastDecodedAvFramePts and
1141-
// cursor_ share the same keyframe.
1142-
int lastDecodedAvFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts_);
1124+
// We are seeking forwards. We can skip a seek if both the last decoded frame
1125+
// and cursor_ share the same keyframe:
1126+
// Videos have I frames and non-I frames (P and B frames). Non-I frames need
1127+
// data from the previous I frame to be decoded.
1128+
//
1129+
// Imagine the cursor is at a random frame with PTS=lastDecodedAvFramePts (x
1130+
// for brevity) and we wish to seek to a user-specified PTS=y.
1131+
//
1132+
// If y < x, we don't have a choice but to seek backwards to the highest I
1133+
// frame before y.
1134+
//
1135+
// If y > x, we have two choices:
1136+
//
1137+
// 1. We could keep decoding forward until we hit y. Illustrated below:
1138+
//
1139+
// I P P P I P P P I P P I P
1140+
// x y
1141+
//
1142+
// 2. We could try to jump to an I frame between x and y (indicated by j
1143+
// below). And then start decoding until we encounter y. Illustrated below:
1144+
//
1145+
// I P P P I P P P I P P I P
1146+
// x j y
1147+
// (2) is only more efficient than (1) if there is an I frame between x and y.
1148+
int lastKeyFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts_);
11431149
int targetKeyFrameIndex = getKeyFrameIndexForPts(cursor_);
1144-
return lastDecodedAvFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
1145-
lastDecodedAvFrameIndex == targetKeyFrameIndex;
1150+
return lastKeyFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
1151+
lastKeyFrameIndex == targetKeyFrameIndex;
11461152
}
11471153

11481154
// This method looks at currentPts and desiredPts and seeks in the

src/torchcodec/_core/ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def load_torchcodec_shared_libraries():
7070
raise RuntimeError(
7171
f"""Could not load libtorchcodec. Likely causes:
7272
1. FFmpeg is not properly installed in your environment. We support
73-
versions 4, 5, 6, and 7 on all platforms, and 8 on Mac and Linux.
73+
versions 4, 5, 6, 7, and 8.
7474
2. The PyTorch version ({torch.__version__}) is not compatible with
7575
this version of TorchCodec. Refer to the version compatibility
7676
table:

test/conftest.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,17 @@
44
import pytest
55
import torch
66

7+
from .utils import in_fbcode
8+
79

810
def pytest_configure(config):
911
# register an additional marker (see pytest_collection_modifyitems)
1012
config.addinivalue_line(
1113
"markers", "needs_cuda: mark for tests that rely on a CUDA device"
1214
)
15+
config.addinivalue_line(
16+
"markers", "needs_ffmpeg_cli: mark for tests that rely on ffmpeg"
17+
)
1318

1419

1520
def pytest_collection_modifyitems(items):
@@ -28,6 +33,15 @@ def pytest_collection_modifyitems(items):
2833
# 'needs_cuda' mark, and the ones with device == 'cpu' won't have the
2934
# mark.
3035
needs_cuda = item.get_closest_marker("needs_cuda") is not None
36+
needs_ffmpeg_cli = item.get_closest_marker("needs_ffmpeg_cli") is not None
37+
has_skip_marker = item.get_closest_marker("skip") is not None
38+
has_skipif_marker = item.get_closest_marker("skipif") is not None
39+
40+
if in_fbcode():
41+
# fbcode doesn't like skipping tests, so instead we just don't collect the test
42+
# so that they don't even "exist", hence the continue statements.
43+
if needs_ffmpeg_cli or has_skip_marker or has_skipif_marker:
44+
continue
3145

3246
if (
3347
needs_cuda

test/test_decoders.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
BT709_FULL_RANGE,
3030
cuda_version_used_for_building_torch,
3131
get_ffmpeg_major_version,
32+
get_python_version,
3233
H264_10BITS,
3334
H265_10BITS,
3435
H265_VIDEO,
@@ -39,6 +40,7 @@
3940
NASA_AUDIO_MP3_44100,
4041
NASA_VIDEO,
4142
needs_cuda,
43+
needs_ffmpeg_cli,
4244
psnr,
4345
SINE_MONO_S16,
4446
SINE_MONO_S32,
@@ -1146,6 +1148,10 @@ def test_get_key_frame_indices(self, device):
11461148

11471149
# TODO investigate why this fails internally.
11481150
@pytest.mark.skipif(in_fbcode(), reason="Compile test fails internally.")
1151+
@pytest.mark.skipif(
1152+
get_python_version() >= (3, 14),
1153+
reason="torch.compile is not supported on Python 3.14+",
1154+
)
11491155
@pytest.mark.parametrize("device", all_supported_devices())
11501156
def test_compile(self, device):
11511157
decoder, device = make_video_decoder(NASA_VIDEO.path, device=device)
@@ -1311,10 +1317,7 @@ def setup_frame_mappings(tmp_path, file, stream_index):
13111317
# Return the custom frame mappings as a JSON string
13121318
return custom_frame_mappings
13131319

1314-
@pytest.mark.skipif(
1315-
in_fbcode(),
1316-
reason="ffprobe not available internally",
1317-
)
1320+
@needs_ffmpeg_cli
13181321
@pytest.mark.parametrize("device", all_supported_devices())
13191322
@pytest.mark.parametrize("stream_index", [0, 3])
13201323
@pytest.mark.parametrize(
@@ -1361,10 +1364,7 @@ def test_custom_frame_mappings_json_and_bytes(
13611364
),
13621365
)
13631366

1364-
@pytest.mark.skipif(
1365-
in_fbcode(),
1366-
reason="ffprobe not available internally",
1367-
)
1367+
@needs_ffmpeg_cli
13681368
@pytest.mark.parametrize("device", all_supported_devices())
13691369
@pytest.mark.parametrize(
13701370
"custom_frame_mappings,expected_match",

0 commit comments

Comments
 (0)