Skip to content

Commit 5629e6e

Browse files
committed
Merge branch 'main' of https://github.com/meta-pytorch/torchcodec into codec_options_encode_option
2 parents 888c8d4 + 79e633c commit 5629e6e

File tree

17 files changed

+494
-79
lines changed

17 files changed

+494
-79
lines changed

.github/workflows/lint.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
run: python -m pip install --upgrade pip
6363
- name: Install dependencies and FFmpeg
6464
run: |
65-
python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
65+
python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
6666
conda install "ffmpeg=7.0.1" pkg-config pybind11 -c conda-forge
6767
ffmpeg -version
6868
- name: Build and install torchcodec

.github/workflows/linux_cuda_wheel.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,13 @@ jobs:
9595
# We install conda packages at the start because otherwise conda may have conflicts with dependencies.
9696
# Note: xorg-libxau was addded to fix a problem with ffmpeg 4. We should consider removing it.
9797
default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }} conda-forge::xorg-libxau"
98-
- name: Check env
98+
- name: Check env, set LD_LIBRARY_PATH
9999
run: |
100100
${CONDA_RUN} env
101101
${CONDA_RUN} conda info
102102
${CONDA_RUN} nvidia-smi
103103
${CONDA_RUN} conda list
104+
echo LD_LIBRARY_PATH=$CONDA_PREFIX/lib:/usr/local/cuda/lib64/:${LD_LIBRARY_PATH} >> $GITHUB_ENV
104105
- name: Assert ffmpeg exists
105106
run: |
106107
${CONDA_RUN} ffmpeg -buildconf

docs/source/api_ref_transforms.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
.. _transforms:
2+
3+
=====================
4+
torchcodec.transforms
5+
=====================
6+
7+
.. currentmodule:: torchcodec.transforms
8+
9+
For a tutorial, see: TODO_DECODER_TRANSFORMS_TUTORIAL.
10+
11+
.. autosummary::
12+
:toctree: generated/
13+
:nosignatures:
14+
:template: dataclass.rst
15+
16+
DecoderTransform
17+
Resize

docs/source/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ def __call__(self, filename):
209209
intersphinx_mapping = {
210210
"python": ("https://docs.python.org/3/", None),
211211
"torch": ("https://pytorch.org/docs/stable/", None),
212+
"torchvision": ("https://docs.pytorch.org/vision/stable/", None),
212213
"numpy": ("https://numpy.org/doc/stable/", None),
213214
"PIL": ("https://pillow.readthedocs.io/en/stable/", None),
214215
"matplotlib": ("https://matplotlib.org/stable/", None),

docs/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,4 @@ Encoding
125125
api_ref_decoders
126126
api_ref_encoders
127127
api_ref_samplers
128+
api_ref_transforms

mypy.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ files = src/torchcodec
44
show_error_codes = True
55
pretty = True
66
allow_redefinition = True
7+
follow_untyped_imports = True

src/torchcodec/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# Note: usort wants to put Frame and FrameBatch after decoders and samplers,
1010
# but that results in circular import.
1111
from ._frame import AudioSamples, Frame, FrameBatch # usort:skip # noqa
12-
from . import decoders, encoders, samplers # noqa
12+
from . import decoders, encoders, samplers, transforms # noqa
1313

1414
try:
1515
# Note that version.py is generated during install.

src/torchcodec/_core/Encoder.cpp

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -724,9 +724,33 @@ VideoEncoder::VideoEncoder(
724724

725725
void VideoEncoder::initializeEncoder(
726726
const VideoStreamOptions& videoStreamOptions) {
727-
const AVCodec* avCodec =
728-
avcodec_find_encoder(avFormatContext_->oformat->video_codec);
729-
TORCH_CHECK(avCodec != nullptr, "Video codec not found");
727+
const AVCodec* avCodec = nullptr;
728+
// If codec arg is provided, find codec using logic similar to FFmpeg:
729+
// https://github.com/FFmpeg/FFmpeg/blob/master/fftools/ffmpeg_opt.c#L804-L835
730+
if (videoStreamOptions.codec.has_value()) {
731+
const std::string& codec = videoStreamOptions.codec.value();
732+
// Try to find codec by name ("libx264", "libsvtav1")
733+
avCodec = avcodec_find_encoder_by_name(codec.c_str());
734+
// Try to find by codec descriptor ("h264", "av1")
735+
if (!avCodec) {
736+
const AVCodecDescriptor* desc =
737+
avcodec_descriptor_get_by_name(codec.c_str());
738+
if (desc) {
739+
avCodec = avcodec_find_encoder(desc->id);
740+
}
741+
}
742+
TORCH_CHECK(
743+
avCodec != nullptr,
744+
"Video codec ",
745+
codec,
746+
" not found. To see available codecs, run: ffmpeg -encoders");
747+
} else {
748+
TORCH_CHECK(
749+
avFormatContext_->oformat != nullptr,
750+
"Output format is null, unable to find default codec.");
751+
avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec);
752+
TORCH_CHECK(avCodec != nullptr, "Video codec not found");
753+
}
730754

731755
AVCodecContext* avCodecContext = avcodec_alloc_context3(avCodec);
732756
TORCH_CHECK(avCodecContext != nullptr, "Couldn't allocate codec context.");

src/torchcodec/_core/StreamOptions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ struct VideoStreamOptions {
4646
std::string_view deviceVariant = "ffmpeg";
4747

4848
// Encoding options
49+
std::optional<std::string> codec;
4950
// Optional pixel format for video encoding (e.g., "yuv420p", "yuv444p")
5051
// If not specified, uses codec's default format.
5152
std::optional<std::string> pixelFormat;

src/torchcodec/_core/custom_ops.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
3737
m.def(
3838
"_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
3939
m.def(
40-
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> ()");
40+
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> ()");
4141
m.def(
42-
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> Tensor");
42+
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> Tensor");
4343
m.def(
44-
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> ()");
44+
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> ()");
4545
m.def(
4646
"create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
4747
m.def(
@@ -613,11 +613,13 @@ void encode_video_to_file(
613613
const at::Tensor& frames,
614614
int64_t frame_rate,
615615
std::string_view file_name,
616+
std::optional<std::string> codec = std::nullopt,
616617
std::optional<std::string_view> pixel_format = std::nullopt,
617618
std::optional<double> crf = std::nullopt,
618619
std::optional<std::string_view> preset = std::nullopt,
619620
std::optional<std::vector<std::string>> codec_options = std::nullopt) {
620621
VideoStreamOptions videoStreamOptions;
622+
videoStreamOptions.codec = codec;
621623
videoStreamOptions.pixelFormat = pixel_format;
622624
videoStreamOptions.crf = crf;
623625
videoStreamOptions.preset = preset;
@@ -639,12 +641,14 @@ at::Tensor encode_video_to_tensor(
639641
const at::Tensor& frames,
640642
int64_t frame_rate,
641643
std::string_view format,
644+
std::optional<std::string> codec = std::nullopt,
642645
std::optional<std::string_view> pixel_format = std::nullopt,
643646
std::optional<double> crf = std::nullopt,
644647
std::optional<std::string_view> preset = std::nullopt,
645648
std::optional<std::vector<std::string>> codec_options = std::nullopt) {
646649
auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
647650
VideoStreamOptions videoStreamOptions;
651+
videoStreamOptions.codec = codec;
648652
videoStreamOptions.pixelFormat = pixel_format;
649653
videoStreamOptions.crf = crf;
650654
videoStreamOptions.preset = preset;
@@ -668,6 +672,7 @@ void _encode_video_to_file_like(
668672
int64_t frame_rate,
669673
std::string_view format,
670674
int64_t file_like_context,
675+
std::optional<std::string> codec = std::nullopt,
671676
std::optional<std::string_view> pixel_format = std::nullopt,
672677
std::optional<double> crf = std::nullopt,
673678
std::optional<std::string_view> preset = std::nullopt,
@@ -679,6 +684,7 @@ void _encode_video_to_file_like(
679684
std::unique_ptr<AVIOFileLikeContext> avioContextHolder(fileLikeContext);
680685

681686
VideoStreamOptions videoStreamOptions;
687+
videoStreamOptions.codec = codec;
682688
videoStreamOptions.pixelFormat = pixel_format;
683689
videoStreamOptions.crf = crf;
684690
videoStreamOptions.preset = preset;

0 commit comments

Comments
 (0)