From 539de9ed20885ba0ad99acabccbe7b992ffeae5f Mon Sep 17 00:00:00 2001 From: Janusz Lisiecki Date: Mon, 25 May 2026 12:28:31 +0200 Subject: [PATCH 1/2] Add support for full-range VP9 video decoding - Use FFmpeg color-range metadata as a fallback when NVDEC does not report full-range video. Pass full-range source details to libswscale in the CPU decoder and extend full dynamic range video coverage to the VP9 sample. Signed-off-by: Janusz Lisiecki --- dali/operators/video/frames_decoder_cpu.cc | 17 +++++++++++++- dali/operators/video/frames_decoder_gpu.cc | 7 +++++- dali/operators/video/frames_decoder_gpu.h | 2 ++ .../legacy/reader/nvdecoder/nvdecoder.cc | 7 ++++-- .../video/legacy/reader/nvdecoder/nvdecoder.h | 1 + dali/test/python/decoder/test_video.py | 23 ++++++++++++------- 6 files changed, 45 insertions(+), 12 deletions(-) diff --git a/dali/operators/video/frames_decoder_cpu.cc b/dali/operators/video/frames_decoder_cpu.cc index b9f3b1d1b6c..26b4e6de3b2 100644 --- a/dali/operators/video/frames_decoder_cpu.cc +++ b/dali/operators/video/frames_decoder_cpu.cc @@ -104,12 +104,27 @@ void FramesDecoderCpu::CopyToOutput(uint8_t *data) { sws_freeContext); DALI_ENFORCE(sws_ctx_, "Could not create sw context"); } + bool src_full_range = frame_->color_range == AVCOL_RANGE_JPEG || + (frame_->color_range == AVCOL_RANGE_UNSPECIFIED && + codec_params_->color_range == AVCOL_RANGE_JPEG); + int ret = sws_setColorspaceDetails( + sws_ctx_.get(), + sws_getCoefficients(SWS_CS_DEFAULT), + src_full_range, + sws_getCoefficients(SWS_CS_DEFAULT), + 1, + 0, + 1 << 16, + 1 << 16); + DALI_ENFORCE(ret >= 0, + make_string("Could not set color space conversion details: ", + av_error_string(ret))); uint8_t *dest[4] = {sws_output_data, nullptr, nullptr, nullptr}; int dest_linesize[4] = {frame_->width * Channels(), 0, 0, 0}; LOG_LINE << "Converting frame data to format " << (sws_output_format == AV_PIX_FMT_RGB24 ? "RGB" : "YUV") << std::endl; - int ret = sws_scale( + ret = sws_scale( sws_ctx_.get(), frame_->data, frame_->linesize, diff --git a/dali/operators/video/frames_decoder_gpu.cc b/dali/operators/video/frames_decoder_gpu.cc index 3aac8f1a8f4..d76f4497794 100644 --- a/dali/operators/video/frames_decoder_gpu.cc +++ b/dali/operators/video/frames_decoder_gpu.cc @@ -361,9 +361,14 @@ cudaVideoCodec FramesDecoderGpu::GetCodecType(AVCodecID codec_id) const { } } +bool FramesDecoderGpu::IsFullRange(CUVIDEOFORMAT *video_format) const { + return video_format->video_signal_description.video_full_range_flag || + codec_params_->color_range == AVCOL_RANGE_JPEG; +} + void FramesDecoderGpu::InitGpuDecoder(CUVIDEOFORMAT *video_format) { if (!nvdecode_state_->decoder) { - bool is_full_range = video_format->video_signal_description.video_full_range_flag; + bool is_full_range = IsFullRange(video_format); conversion_type_ = image_type_ == DALI_RGB ? is_full_range ? VIDEO_COLOR_SPACE_CONVERSION_TYPE_YUV_TO_RGB_FULL_RANGE : VIDEO_COLOR_SPACE_CONVERSION_TYPE_YUV_TO_RGB : diff --git a/dali/operators/video/frames_decoder_gpu.h b/dali/operators/video/frames_decoder_gpu.h index 468c081190f..0f51701c4bf 100644 --- a/dali/operators/video/frames_decoder_gpu.h +++ b/dali/operators/video/frames_decoder_gpu.h @@ -216,6 +216,8 @@ class DLL_PUBLIC FramesDecoderGpu : public FramesDecoderBase { cudaVideoCodec GetCodecType(AVCodecID codec_id) const; + bool IsFullRange(CUVIDEOFORMAT *video_format) const; + void InitGpuParser(); bool ReadNextFrameWithIndex(uint8_t *data); diff --git a/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.cc b/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.cc index 72ec122394f..adf4aeae324 100644 --- a/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.cc +++ b/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.cc @@ -45,6 +45,7 @@ NvDecoder::NvDecoder(int device_id, int additional_decode_surfaces) : device_id_(device_id), rgb_(image_type == DALI_RGB), dtype_(dtype), normalized_(normalized), + full_range_from_codecpar_(codecpar->color_range == AVCOL_RANGE_JPEG), device_(), parser_(), decoder_(max_height, max_width, additional_decode_surfaces), frame_in_use_(32), // 32 is cuvid's max number of decode surfaces frame_full_range_(32), // 32 is cuvid's max number of decode surfaces @@ -189,9 +190,11 @@ int NvDecoder::handle_display(void* user_data, CUVIDPARSERDISPINFO* disp_info) { int NvDecoder::handle_sequence_(CUVIDEOFORMAT* format) { int ret = kNvcuvid_failure; try { - // utilize the NVDEC parser to determine if the video is full range + // The NVDEC parser does not report full range for all codecs, for example VP9. + // Fall back to the FFmpeg stream metadata when the parser omits it. recv_queue_.front().full_range = - static_cast(format->video_signal_description.video_full_range_flag); + static_cast(format->video_signal_description.video_full_range_flag) || + full_range_from_codecpar_; ret = decoder_.initialize(format); } catch (...) { ERROR_LOG << "Unable to decode file " << recv_queue_.peek().filename << '\n'; diff --git a/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h b/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h index 27fb20612a8..2b69503bc4b 100644 --- a/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h +++ b/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h @@ -147,6 +147,7 @@ class NvDecoder { bool rgb_; DALIDataType dtype_; bool normalized_; + bool full_range_from_codecpar_; CUdevice device_; CUVideoParser parser_; diff --git a/dali/test/python/decoder/test_video.py b/dali/test/python/decoder/test_video.py index 934fe5e2f81..ce755d8bc74 100644 --- a/dali/test/python/decoder/test_video.py +++ b/dali/test/python/decoder/test_video.py @@ -244,14 +244,15 @@ def test_video_decoder(device, module): assert np.array_equal(seq, ref_seq) -def test_full_range_video(): +@params(("video.mp4", "0001.png"), ("video_vp9.mp4", "0001_vp9.png")) +def test_full_range_video(filename, reference): skip_if_m60() @pipeline_def def test_pipeline(): videos = fn.readers.video( device="gpu", - filenames=[get_dali_extra_path() + "/db/video/full_dynamic_range/video.mp4"], + filenames=[get_dali_extra_path() + f"/db/video/full_dynamic_range/{filename}"], sequence_length=1, initial_fill=10, normalized=False, @@ -263,7 +264,7 @@ def test_pipeline(): o = video_pipeline.run() out = o[0].as_cpu().as_array() - ref = cv2.imread(get_dali_extra_path() + "/db/video/full_dynamic_range/0001.png") + ref = cv2.imread(get_dali_extra_path() + f"/db/video/full_dynamic_range/{reference}") ref = cv2.cvtColor(ref, cv2.COLOR_BGR2RGB) left = ref right = out @@ -271,27 +272,33 @@ def test_pipeline(): assert np.mean(absdiff) < 2 -@params("gpu") -def test_full_range_video_in_memory(device): +@params( + ("gpu", ("video.mp4", "0001.png")), + ("gpu", ("video_vp9.mp4", "0001_vp9.png")), + ("cpu", ("video_vp9.mp4", "0001_vp9.png")), +) +def test_full_range_video_experimental(device, video): skip_if_m60() + filename, reference = video @pipeline_def def test_pipeline(): videos = fn.experimental.readers.video( device=device, - filenames=[get_dali_extra_path() + "/db/video/full_dynamic_range/video.mp4"], + filenames=[get_dali_extra_path() + f"/db/video/full_dynamic_range/{filename}"], sequence_length=1, ) return videos - video_pipeline = test_pipeline(batch_size=1, num_threads=1, device_id=0) + device_id = None if device == "cpu" else 0 + video_pipeline = test_pipeline(batch_size=1, num_threads=1, device_id=device_id) o = video_pipeline.run() out = o[0] if device == "gpu": out = out.as_cpu() out = out.as_array() - ref = cv2.imread(get_dali_extra_path() + "/db/video/full_dynamic_range/0001.png") + ref = cv2.imread(get_dali_extra_path() + f"/db/video/full_dynamic_range/{reference}") ref = cv2.cvtColor(ref, cv2.COLOR_BGR2RGB) left = ref right = out From f512a90aeed7166fac061940345ad9509a04bde4 Mon Sep 17 00:00:00 2001 From: Janusz Lisiecki Date: Tue, 26 May 2026 23:45:07 +0200 Subject: [PATCH 2/2] Avoid redundant sws_setColorspaceDetails per frame Cache the last applied source full-range flag in FramesDecoderCpu and only call sws_setColorspaceDetails when it changes. Previously the call ran for every decoded frame even though sws_ctx_ is created once, forcing libswscale to recompute coefficient tables unnecessarily. Also bump copyright headers on the five video decoder files touched in this PR to include 2026. Signed-off-by: Janusz Lisiecki --- dali/operators/video/frames_decoder_cpu.cc | 31 ++++++++++--------- dali/operators/video/frames_decoder_cpu.h | 4 ++- dali/operators/video/frames_decoder_gpu.cc | 2 +- dali/operators/video/frames_decoder_gpu.h | 2 +- .../legacy/reader/nvdecoder/nvdecoder.cc | 2 +- .../video/legacy/reader/nvdecoder/nvdecoder.h | 2 +- 6 files changed, 24 insertions(+), 19 deletions(-) diff --git a/dali/operators/video/frames_decoder_cpu.cc b/dali/operators/video/frames_decoder_cpu.cc index 26b4e6de3b2..3a0e99ce470 100644 --- a/dali/operators/video/frames_decoder_cpu.cc +++ b/dali/operators/video/frames_decoder_cpu.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -107,24 +107,27 @@ void FramesDecoderCpu::CopyToOutput(uint8_t *data) { bool src_full_range = frame_->color_range == AVCOL_RANGE_JPEG || (frame_->color_range == AVCOL_RANGE_UNSPECIFIED && codec_params_->color_range == AVCOL_RANGE_JPEG); - int ret = sws_setColorspaceDetails( - sws_ctx_.get(), - sws_getCoefficients(SWS_CS_DEFAULT), - src_full_range, - sws_getCoefficients(SWS_CS_DEFAULT), - 1, - 0, - 1 << 16, - 1 << 16); - DALI_ENFORCE(ret >= 0, - make_string("Could not set color space conversion details: ", - av_error_string(ret))); + if (sws_src_full_range_ != src_full_range) { + int ret = sws_setColorspaceDetails( + sws_ctx_.get(), + sws_getCoefficients(SWS_CS_DEFAULT), + src_full_range, + sws_getCoefficients(SWS_CS_DEFAULT), + 1, + 0, + 1 << 16, + 1 << 16); + DALI_ENFORCE(ret >= 0, + make_string("Could not set color space conversion details: ", + av_error_string(ret))); + sws_src_full_range_ = src_full_range; + } uint8_t *dest[4] = {sws_output_data, nullptr, nullptr, nullptr}; int dest_linesize[4] = {frame_->width * Channels(), 0, 0, 0}; LOG_LINE << "Converting frame data to format " << (sws_output_format == AV_PIX_FMT_RGB24 ? "RGB" : "YUV") << std::endl; - ret = sws_scale( + int ret = sws_scale( sws_ctx_.get(), frame_->data, frame_->linesize, diff --git a/dali/operators/video/frames_decoder_cpu.h b/dali/operators/video/frames_decoder_cpu.h index df182b02567..3babe99e767 100644 --- a/dali/operators/video/frames_decoder_cpu.h +++ b/dali/operators/video/frames_decoder_cpu.h @@ -1,4 +1,4 @@ -// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #define DALI_OPERATORS_VIDEO_FRAMES_DECODER_CPU_H_ #include "dali/operators/video/frames_decoder_base.h" +#include #include #include @@ -63,6 +64,7 @@ class DLL_PUBLIC FramesDecoderCpu : public FramesDecoderBase { const AVCodec *codec_ = nullptr; std::unique_ptr sws_ctx_{ nullptr, sws_freeContext}; + std::optional sws_src_full_range_; std::vector tmp_buffer_; }; diff --git a/dali/operators/video/frames_decoder_gpu.cc b/dali/operators/video/frames_decoder_gpu.cc index d76f4497794..d209bd99b18 100644 --- a/dali/operators/video/frames_decoder_gpu.cc +++ b/dali/operators/video/frames_decoder_gpu.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/dali/operators/video/frames_decoder_gpu.h b/dali/operators/video/frames_decoder_gpu.h index 0f51701c4bf..a6da7b2adc6 100644 --- a/dali/operators/video/frames_decoder_gpu.h +++ b/dali/operators/video/frames_decoder_gpu.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.cc b/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.cc index adf4aeae324..8b1783d4cc1 100644 --- a/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.cc +++ b/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h b/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h index 2b69503bc4b..fb8349c522e 100644 --- a/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h +++ b/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License.