diff --git a/dali/operators/video/frames_decoder_cpu.cc b/dali/operators/video/frames_decoder_cpu.cc index b9f3b1d1b6c..3a0e99ce470 100644 --- a/dali/operators/video/frames_decoder_cpu.cc +++ b/dali/operators/video/frames_decoder_cpu.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -104,6 +104,24 @@ void FramesDecoderCpu::CopyToOutput(uint8_t *data) { sws_freeContext); DALI_ENFORCE(sws_ctx_, "Could not create sw context"); } + bool src_full_range = frame_->color_range == AVCOL_RANGE_JPEG || + (frame_->color_range == AVCOL_RANGE_UNSPECIFIED && + codec_params_->color_range == AVCOL_RANGE_JPEG); + if (sws_src_full_range_ != src_full_range) { + int ret = sws_setColorspaceDetails( + sws_ctx_.get(), + sws_getCoefficients(SWS_CS_DEFAULT), + src_full_range, + sws_getCoefficients(SWS_CS_DEFAULT), + 1, + 0, + 1 << 16, + 1 << 16); + DALI_ENFORCE(ret >= 0, + make_string("Could not set color space conversion details: ", + av_error_string(ret))); + sws_src_full_range_ = src_full_range; + } uint8_t *dest[4] = {sws_output_data, nullptr, nullptr, nullptr}; int dest_linesize[4] = {frame_->width * Channels(), 0, 0, 0}; diff --git a/dali/operators/video/frames_decoder_cpu.h b/dali/operators/video/frames_decoder_cpu.h index df182b02567..3babe99e767 100644 --- a/dali/operators/video/frames_decoder_cpu.h +++ b/dali/operators/video/frames_decoder_cpu.h @@ -1,4 +1,4 @@ -// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #define DALI_OPERATORS_VIDEO_FRAMES_DECODER_CPU_H_ #include "dali/operators/video/frames_decoder_base.h" +#include #include #include @@ -63,6 +64,7 @@ class DLL_PUBLIC FramesDecoderCpu : public FramesDecoderBase { const AVCodec *codec_ = nullptr; std::unique_ptr sws_ctx_{ nullptr, sws_freeContext}; + std::optional sws_src_full_range_; std::vector tmp_buffer_; }; diff --git a/dali/operators/video/frames_decoder_gpu.cc b/dali/operators/video/frames_decoder_gpu.cc index 3aac8f1a8f4..d209bd99b18 100644 --- a/dali/operators/video/frames_decoder_gpu.cc +++ b/dali/operators/video/frames_decoder_gpu.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -361,9 +361,14 @@ cudaVideoCodec FramesDecoderGpu::GetCodecType(AVCodecID codec_id) const { } } +bool FramesDecoderGpu::IsFullRange(CUVIDEOFORMAT *video_format) const { + return video_format->video_signal_description.video_full_range_flag || + codec_params_->color_range == AVCOL_RANGE_JPEG; +} + void FramesDecoderGpu::InitGpuDecoder(CUVIDEOFORMAT *video_format) { if (!nvdecode_state_->decoder) { - bool is_full_range = video_format->video_signal_description.video_full_range_flag; + bool is_full_range = IsFullRange(video_format); conversion_type_ = image_type_ == DALI_RGB ? is_full_range ? VIDEO_COLOR_SPACE_CONVERSION_TYPE_YUV_TO_RGB_FULL_RANGE : VIDEO_COLOR_SPACE_CONVERSION_TYPE_YUV_TO_RGB : diff --git a/dali/operators/video/frames_decoder_gpu.h b/dali/operators/video/frames_decoder_gpu.h index 468c081190f..a6da7b2adc6 100644 --- a/dali/operators/video/frames_decoder_gpu.h +++ b/dali/operators/video/frames_decoder_gpu.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -216,6 +216,8 @@ class DLL_PUBLIC FramesDecoderGpu : public FramesDecoderBase { cudaVideoCodec GetCodecType(AVCodecID codec_id) const; + bool IsFullRange(CUVIDEOFORMAT *video_format) const; + void InitGpuParser(); bool ReadNextFrameWithIndex(uint8_t *data); diff --git a/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.cc b/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.cc index 72ec122394f..8b1783d4cc1 100644 --- a/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.cc +++ b/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -45,6 +45,7 @@ NvDecoder::NvDecoder(int device_id, int additional_decode_surfaces) : device_id_(device_id), rgb_(image_type == DALI_RGB), dtype_(dtype), normalized_(normalized), + full_range_from_codecpar_(codecpar->color_range == AVCOL_RANGE_JPEG), device_(), parser_(), decoder_(max_height, max_width, additional_decode_surfaces), frame_in_use_(32), // 32 is cuvid's max number of decode surfaces frame_full_range_(32), // 32 is cuvid's max number of decode surfaces @@ -189,9 +190,11 @@ int NvDecoder::handle_display(void* user_data, CUVIDPARSERDISPINFO* disp_info) { int NvDecoder::handle_sequence_(CUVIDEOFORMAT* format) { int ret = kNvcuvid_failure; try { - // utilize the NVDEC parser to determine if the video is full range + // The NVDEC parser does not report full range for all codecs, for example VP9. + // Fall back to the FFmpeg stream metadata when the parser omits it. recv_queue_.front().full_range = - static_cast(format->video_signal_description.video_full_range_flag); + static_cast(format->video_signal_description.video_full_range_flag) || + full_range_from_codecpar_; ret = decoder_.initialize(format); } catch (...) { ERROR_LOG << "Unable to decode file " << recv_queue_.peek().filename << '\n'; diff --git a/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h b/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h index 27fb20612a8..fb8349c522e 100644 --- a/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h +++ b/dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2017-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -147,6 +147,7 @@ class NvDecoder { bool rgb_; DALIDataType dtype_; bool normalized_; + bool full_range_from_codecpar_; CUdevice device_; CUVideoParser parser_; diff --git a/dali/test/python/decoder/test_video.py b/dali/test/python/decoder/test_video.py index 934fe5e2f81..ce755d8bc74 100644 --- a/dali/test/python/decoder/test_video.py +++ b/dali/test/python/decoder/test_video.py @@ -244,14 +244,15 @@ def test_video_decoder(device, module): assert np.array_equal(seq, ref_seq) -def test_full_range_video(): +@params(("video.mp4", "0001.png"), ("video_vp9.mp4", "0001_vp9.png")) +def test_full_range_video(filename, reference): skip_if_m60() @pipeline_def def test_pipeline(): videos = fn.readers.video( device="gpu", - filenames=[get_dali_extra_path() + "/db/video/full_dynamic_range/video.mp4"], + filenames=[get_dali_extra_path() + f"/db/video/full_dynamic_range/{filename}"], sequence_length=1, initial_fill=10, normalized=False, @@ -263,7 +264,7 @@ def test_pipeline(): o = video_pipeline.run() out = o[0].as_cpu().as_array() - ref = cv2.imread(get_dali_extra_path() + "/db/video/full_dynamic_range/0001.png") + ref = cv2.imread(get_dali_extra_path() + f"/db/video/full_dynamic_range/{reference}") ref = cv2.cvtColor(ref, cv2.COLOR_BGR2RGB) left = ref right = out @@ -271,27 +272,33 @@ def test_pipeline(): assert np.mean(absdiff) < 2 -@params("gpu") -def test_full_range_video_in_memory(device): +@params( + ("gpu", ("video.mp4", "0001.png")), + ("gpu", ("video_vp9.mp4", "0001_vp9.png")), + ("cpu", ("video_vp9.mp4", "0001_vp9.png")), +) +def test_full_range_video_experimental(device, video): skip_if_m60() + filename, reference = video @pipeline_def def test_pipeline(): videos = fn.experimental.readers.video( device=device, - filenames=[get_dali_extra_path() + "/db/video/full_dynamic_range/video.mp4"], + filenames=[get_dali_extra_path() + f"/db/video/full_dynamic_range/{filename}"], sequence_length=1, ) return videos - video_pipeline = test_pipeline(batch_size=1, num_threads=1, device_id=0) + device_id = None if device == "cpu" else 0 + video_pipeline = test_pipeline(batch_size=1, num_threads=1, device_id=device_id) o = video_pipeline.run() out = o[0] if device == "gpu": out = out.as_cpu() out = out.as_array() - ref = cv2.imread(get_dali_extra_path() + "/db/video/full_dynamic_range/0001.png") + ref = cv2.imread(get_dali_extra_path() + f"/db/video/full_dynamic_range/{reference}") ref = cv2.cvtColor(ref, cv2.COLOR_BGR2RGB) left = ref right = out