@@ -100,46 +100,32 @@ void GpuEncoder::initializeHardwareContext() {
100100 nppCtx_ = getNppStreamContext (device_);
101101}
102102
103- std::optional<const AVCodec*> GpuEncoder::findEncoder (
104- const AVCodecID& codecId) {
105- void * i = nullptr ;
106- const AVCodec* codec = nullptr ;
107- while ((codec = av_codec_iterate (&i)) != nullptr ) {
108- if (codec->id != codecId || !av_codec_is_encoder (codec)) {
109- continue ;
110- }
111-
112- const AVCodecHWConfig* config = nullptr ;
113- for (int j = 0 ; (config = avcodec_get_hw_config (codec, j)) != nullptr ;
114- ++j) {
115- if (config->device_type == AV_HWDEVICE_TYPE_CUDA) {
116- return codec;
117- }
118- }
119- }
120- return std::nullopt ;
121- }
122-
123103void GpuEncoder::registerHardwareDeviceWithCodec (AVCodecContext* codecContext) {
124104 TORCH_CHECK (
125105 hardwareDeviceCtx_, " Hardware device context has not been initialized" );
126106 TORCH_CHECK (codecContext != nullptr , " codecContext is null" );
127107 codecContext->hw_device_ctx = av_buffer_ref (hardwareDeviceCtx_.get ());
128108}
129109
130- void GpuEncoder::setupEncodingContext (AVCodecContext* codecContext) {
110+ // Allocates and initializes AVHWFramesContext, and sets pixel format fields
111+ // to enable encoding with CUDA device. The hw_frames_ctx field is needed by
112+ // FFmpeg to allocate frames on GPU's memory.
113+ void GpuEncoder::setupHardwareFrameContext (AVCodecContext* codecContext) {
131114 TORCH_CHECK (
132115 hardwareDeviceCtx_, " Hardware device context has not been initialized" );
133116 TORCH_CHECK (codecContext != nullptr , " codecContext is null" );
134117
135- codecContext->sw_pix_fmt = AV_PIX_FMT_NV12;
136- codecContext->pix_fmt = AV_PIX_FMT_CUDA;
137-
138118 AVBufferRef* hwFramesCtxRef = av_hwframe_ctx_alloc (hardwareDeviceCtx_.get ());
139119 TORCH_CHECK (
140120 hwFramesCtxRef != nullptr ,
141121 " Failed to allocate hardware frames context for codec" );
142122
123+ // Always set pixel formats to options that support CUDA encoding.
124+ // TODO-VideoEncoder: Enable user set pixel formats to be set and properly
125+ // converted with npp functions below
126+ codecContext->sw_pix_fmt = AV_PIX_FMT_NV12;
127+ codecContext->pix_fmt = AV_PIX_FMT_CUDA;
128+
143129 AVHWFramesContext* hwFramesCtx =
144130 reinterpret_cast <AVHWFramesContext*>(hwFramesCtxRef->data );
145131 hwFramesCtx->format = codecContext->pix_fmt ;
@@ -164,41 +150,44 @@ UniqueAVFrame GpuEncoder::convertTensorToAVFrame(
164150 [[maybe_unused]] AVPixelFormat targetFormat,
165151 int frameIndex,
166152 AVCodecContext* codecContext) {
167- TORCH_CHECK (tensor.is_cuda (), " GpuEncoder requires CUDA tensors" );
153+ TORCH_CHECK (
154+ tensor.is_cuda (),
155+ " Frame tensor is not stored on GPU, but the GPU method convertTensorToAVFrame was called." );
168156 TORCH_CHECK (
169157 tensor.dim () == 3 && tensor.size (0 ) == 3 ,
170158 " Expected 3D RGB tensor (CHW format), got shape: " ,
171159 tensor.sizes ());
160+
161+ // TODO-VideoEncoder: Unify AVFrame creation with CPU version of this method
172162 UniqueAVFrame avFrame (av_frame_alloc ());
173163 TORCH_CHECK (avFrame != nullptr , " Failed to allocate AVFrame" );
164+ int height = static_cast <int >(tensor.size (1 ));
165+ int width = static_cast <int >(tensor.size (2 ));
174166
175167 avFrame->format = AV_PIX_FMT_CUDA;
176- avFrame->width = static_cast < int >(tensor. size ( 2 )) ;
177- avFrame->height = static_cast < int >(tensor. size ( 1 )) ;
168+ avFrame->height = height ;
169+ avFrame->width = width ;
178170 avFrame->pts = frameIndex;
179171
180- int ret = av_hwframe_get_buffer (
181- codecContext ? codecContext->hw_frames_ctx : nullptr , avFrame.get (), 0 );
172+ // FFmpeg's av_hwframe_get_buffer is used to allocate memory on CUDA device.
173+ // TODO-VideoEncoder: Consider using pytorch to allocate CUDA memory for
174+ // efficiency
175+ int ret =
176+ av_hwframe_get_buffer (codecContext->hw_frames_ctx , avFrame.get (), 0 );
182177 TORCH_CHECK (
183178 ret >= 0 ,
184179 " Failed to allocate hardware frame: " ,
185180 getFFMPEGErrorStringFromErrorCode (ret));
186181
187- // Validate that avFrame was properly allocated with CUDA memory
188182 TORCH_CHECK (
189183 avFrame != nullptr && avFrame->data [0 ] != nullptr ,
190184 " avFrame must be pre-allocated with CUDA memory" );
191185
192- // Convert CHW to HWC for NPP processing
193- int height = static_cast <int >(tensor.size (1 ));
194- int width = static_cast <int >(tensor.size (2 ));
195186 torch::Tensor hwcFrame = tensor.permute ({1 , 2 , 0 }).contiguous ();
196187
197- // Get current CUDA stream for NPP operations
198188 at::cuda::CUDAStream currentStream =
199189 at::cuda::getCurrentCUDAStream (device_.index ());
200190
201- // Setup NPP context with current stream
202191 nppCtx_->hStream = currentStream.stream ();
203192 cudaError_t cudaErr =
204193 cudaStreamGetFlags (nppCtx_->hStream , &nppCtx_->nStreamFlags );
@@ -207,9 +196,7 @@ UniqueAVFrame GpuEncoder::convertTensorToAVFrame(
207196 " cudaStreamGetFlags failed: " ,
208197 cudaGetErrorString (cudaErr));
209198
210- // Always use FFmpeg's default behavior: BT.601 limited range
211199 NppiSize oSizeROI = {width, height};
212-
213200 NppStatus status = nppiRGBToNV12_8u_ColorTwist32f_C3P2R_Ctx (
214201 static_cast <const Npp8u*>(hwcFrame.data_ptr ()),
215202 hwcFrame.stride (0 ) * hwcFrame.element_size (),
@@ -224,15 +211,8 @@ UniqueAVFrame GpuEncoder::convertTensorToAVFrame(
224211 " Failed to convert RGB to NV12: NPP error code " ,
225212 status);
226213
227- // Validate CUDA operations completed successfully
228- cudaError_t memCheck = cudaGetLastError ();
229- TORCH_CHECK (
230- memCheck == cudaSuccess,
231- " CUDA error detected: " ,
232- cudaGetErrorString (memCheck));
233-
234214 // TODO-VideoEncoder: Enable configuration of color properties, similar to
235- // FFmpeg Set color properties to FFmpeg defaults
215+ // FFmpeg. Below are the default color properties used by FFmpeg.
236216 avFrame->colorspace = AVCOL_SPC_SMPTE170M; // BT.601
237217 avFrame->color_range = AVCOL_RANGE_MPEG; // Limited range
238218
0 commit comments