Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion dali/operators/video/frames_decoder_cpu.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -104,6 +104,24 @@ void FramesDecoderCpu::CopyToOutput(uint8_t *data) {
sws_freeContext);
DALI_ENFORCE(sws_ctx_, "Could not create sw context");
}
bool src_full_range = frame_->color_range == AVCOL_RANGE_JPEG ||
(frame_->color_range == AVCOL_RANGE_UNSPECIFIED &&
codec_params_->color_range == AVCOL_RANGE_JPEG);
if (sws_src_full_range_ != src_full_range) {
int ret = sws_setColorspaceDetails(
sws_ctx_.get(),
sws_getCoefficients(SWS_CS_DEFAULT),
src_full_range,
sws_getCoefficients(SWS_CS_DEFAULT),
1,
0,
1 << 16,
1 << 16);
DALI_ENFORCE(ret >= 0,
make_string("Could not set color space conversion details: ",
av_error_string(ret)));
sws_src_full_range_ = src_full_range;
}

uint8_t *dest[4] = {sws_output_data, nullptr, nullptr, nullptr};
int dest_linesize[4] = {frame_->width * Channels(), 0, 0, 0};
Expand Down
4 changes: 3 additions & 1 deletion dali/operators/video/frames_decoder_cpu.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -16,6 +16,7 @@
#define DALI_OPERATORS_VIDEO_FRAMES_DECODER_CPU_H_

#include "dali/operators/video/frames_decoder_base.h"
#include <optional>
#include <string>
#include <string_view>

Expand Down Expand Up @@ -63,6 +64,7 @@ class DLL_PUBLIC FramesDecoderCpu : public FramesDecoderBase {
const AVCodec *codec_ = nullptr;
std::unique_ptr<SwsContext, decltype(&sws_freeContext)> sws_ctx_{
nullptr, sws_freeContext};
std::optional<bool> sws_src_full_range_;

std::vector<uint8_t> tmp_buffer_;
};
Expand Down
9 changes: 7 additions & 2 deletions dali/operators/video/frames_decoder_gpu.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -361,9 +361,14 @@ cudaVideoCodec FramesDecoderGpu::GetCodecType(AVCodecID codec_id) const {
}
}

bool FramesDecoderGpu::IsFullRange(CUVIDEOFORMAT *video_format) const {
return video_format->video_signal_description.video_full_range_flag ||
codec_params_->color_range == AVCOL_RANGE_JPEG;
}

void FramesDecoderGpu::InitGpuDecoder(CUVIDEOFORMAT *video_format) {
if (!nvdecode_state_->decoder) {
bool is_full_range = video_format->video_signal_description.video_full_range_flag;
bool is_full_range = IsFullRange(video_format);
conversion_type_ = image_type_ == DALI_RGB ?
is_full_range ? VIDEO_COLOR_SPACE_CONVERSION_TYPE_YUV_TO_RGB_FULL_RANGE :
VIDEO_COLOR_SPACE_CONVERSION_TYPE_YUV_TO_RGB :
Expand Down
4 changes: 3 additions & 1 deletion dali/operators/video/frames_decoder_gpu.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -216,6 +216,8 @@ class DLL_PUBLIC FramesDecoderGpu : public FramesDecoderBase {

cudaVideoCodec GetCodecType(AVCodecID codec_id) const;

bool IsFullRange(CUVIDEOFORMAT *video_format) const;

void InitGpuParser();

bool ReadNextFrameWithIndex(uint8_t *data);
Expand Down
9 changes: 6 additions & 3 deletions dali/operators/video/legacy/reader/nvdecoder/nvdecoder.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2017-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -45,6 +45,7 @@ NvDecoder::NvDecoder(int device_id,
int additional_decode_surfaces)
: device_id_(device_id),
rgb_(image_type == DALI_RGB), dtype_(dtype), normalized_(normalized),
full_range_from_codecpar_(codecpar->color_range == AVCOL_RANGE_JPEG),
device_(), parser_(), decoder_(max_height, max_width, additional_decode_surfaces),
frame_in_use_(32), // 32 is cuvid's max number of decode surfaces
frame_full_range_(32), // 32 is cuvid's max number of decode surfaces
Expand Down Expand Up @@ -189,9 +190,11 @@ int NvDecoder::handle_display(void* user_data, CUVIDPARSERDISPINFO* disp_info) {
int NvDecoder::handle_sequence_(CUVIDEOFORMAT* format) {
int ret = kNvcuvid_failure;
try {
// utilize the NVDEC parser to determine if the video is full range
// The NVDEC parser does not report full range for all codecs, for example VP9.
// Fall back to the FFmpeg stream metadata when the parser omits it.
recv_queue_.front().full_range =
static_cast<bool>(format->video_signal_description.video_full_range_flag);
static_cast<bool>(format->video_signal_description.video_full_range_flag) ||
full_range_from_codecpar_;
ret = decoder_.initialize(format);
} catch (...) {
ERROR_LOG << "Unable to decode file " << recv_queue_.peek().filename << '\n';
Expand Down
3 changes: 2 additions & 1 deletion dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2017-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -147,6 +147,7 @@ class NvDecoder {
bool rgb_;
DALIDataType dtype_;
bool normalized_;
bool full_range_from_codecpar_;

CUdevice device_;
CUVideoParser parser_;
Expand Down
23 changes: 15 additions & 8 deletions dali/test/python/decoder/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,14 +244,15 @@ def test_video_decoder(device, module):
assert np.array_equal(seq, ref_seq)


def test_full_range_video():
@params(("video.mp4", "0001.png"), ("video_vp9.mp4", "0001_vp9.png"))
def test_full_range_video(filename, reference):
skip_if_m60()

@pipeline_def
def test_pipeline():
videos = fn.readers.video(
device="gpu",
filenames=[get_dali_extra_path() + "/db/video/full_dynamic_range/video.mp4"],
filenames=[get_dali_extra_path() + f"/db/video/full_dynamic_range/{filename}"],
sequence_length=1,
initial_fill=10,
normalized=False,
Expand All @@ -263,35 +264,41 @@ def test_pipeline():

o = video_pipeline.run()
out = o[0].as_cpu().as_array()
ref = cv2.imread(get_dali_extra_path() + "/db/video/full_dynamic_range/0001.png")
ref = cv2.imread(get_dali_extra_path() + f"/db/video/full_dynamic_range/{reference}")
ref = cv2.cvtColor(ref, cv2.COLOR_BGR2RGB)
left = ref
right = out
absdiff = np.abs(left.astype(int) - right.astype(int))
assert np.mean(absdiff) < 2


@params("gpu")
def test_full_range_video_in_memory(device):
@params(
("gpu", ("video.mp4", "0001.png")),
("gpu", ("video_vp9.mp4", "0001_vp9.png")),
("cpu", ("video_vp9.mp4", "0001_vp9.png")),
)
def test_full_range_video_experimental(device, video):
skip_if_m60()
filename, reference = video

@pipeline_def
def test_pipeline():
videos = fn.experimental.readers.video(
device=device,
filenames=[get_dali_extra_path() + "/db/video/full_dynamic_range/video.mp4"],
filenames=[get_dali_extra_path() + f"/db/video/full_dynamic_range/{filename}"],
sequence_length=1,
)
return videos

video_pipeline = test_pipeline(batch_size=1, num_threads=1, device_id=0)
device_id = None if device == "cpu" else 0
video_pipeline = test_pipeline(batch_size=1, num_threads=1, device_id=device_id)

o = video_pipeline.run()
out = o[0]
if device == "gpu":
out = out.as_cpu()
out = out.as_array()
ref = cv2.imread(get_dali_extra_path() + "/db/video/full_dynamic_range/0001.png")
ref = cv2.imread(get_dali_extra_path() + f"/db/video/full_dynamic_range/{reference}")
ref = cv2.cvtColor(ref, cv2.COLOR_BGR2RGB)
left = ref
right = out
Expand Down