Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,6 @@ compile_commands.json
CTestTestfile.cmake
_deps
CMakeUserPresets.json

third_party/
build/
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Incorrect spdlog format string using printf-style specifiers instead of fmt-style placeholders.
- Division by zero in CLI progress display within the first second of processing.
- `unhook_ffmpeg_logging` disabling all FFmpeg logging instead of restoring the default callback.
- Elminate flickering in interpolated output and improve color consistency with input

## [6.4.0] - 2025-01-24

Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ add_library(libvideo2x
src/filter_realcugan.cpp
src/filter_realesrgan.cpp
src/fsutils.cpp
src/interpolator.cpp
src/interpolator_rife.cpp
src/libplacebo.cpp
src/libvideo2x.cpp
Expand Down
12 changes: 10 additions & 2 deletions include/libvideo2x/conversions.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,18 @@ namespace conversions {
AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt);

// Convert AVFrame to ncnn::Mat
ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame);
// color_hint: optional frame whose color metadata is used when the source frame
// has AVCOL_SPC_UNSPECIFIED / AVCOL_RANGE_UNSPECIFIED (common for per-frame metadata).
ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame, const AVFrame* color_hint = nullptr);

// Convert ncnn::Mat to AVFrame
AVFrame* ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt);
// color_hint: optional frame whose color metadata is used to configure the
// YUV output color matrix and to populate the output frame's color properties.
AVFrame* ncnn_mat_to_avframe(
const ncnn::Mat& mat,
AVPixelFormat pix_fmt,
const AVFrame* color_hint = nullptr
);

} // namespace conversions
} // namespace video2x
11 changes: 6 additions & 5 deletions include/libvideo2x/interpolator_rife.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,12 @@ class InterpolatorRIFE : public Interpolator {
int init(AVCodecContext* dec_ctx, AVCodecContext* enc_ctx, AVBufferRef* hw_ctx) override;

// Processes an input frame and returns the processed frame
int interpolate(AVFrame* prev_frame, AVFrame* in_frame, AVFrame** out_frame, float time_step)
override;
int interpolate(
AVFrame* prev_frame,
AVFrame* in_frame,
AVFrame** out_frame,
float time_step
) override;

// Returns the interpolator's type
ProcessorType get_processor_type() const override { return ProcessorType::RIFE; }
Expand All @@ -53,9 +57,6 @@ class InterpolatorRIFE : public Interpolator {
bool uhd_mode_;
int num_threads_;
const fsutils::StringType model_name_;
AVRational in_time_base_;
AVRational out_time_base_;
AVPixelFormat out_pix_fmt_;
};

} // namespace processors
Expand Down
53 changes: 53 additions & 0 deletions include/libvideo2x/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ extern "C" {
#include <libavcodec/avcodec.h>
#include <libavfilter/avfilter.h>
#include <libavutil/buffer.h>
#include <libavutil/pixdesc.h>
}

#include "fsutils.h"
#include "logger_manager.h"

namespace video2x {
namespace processors {
Expand Down Expand Up @@ -77,6 +79,43 @@ class Processor {
int& width,
int& height
) const = 0;

protected:
// Authoritative color properties captured from dec_ctx at init time.
// Per-frame AVFrame fields are often AVCOL_*_UNSPECIFIED even when the
// stream header carries valid values, so we store them here once.
AVColorSpace out_color_space_ = AVCOL_SPC_UNSPECIFIED;
AVColorRange out_color_range_ = AVCOL_RANGE_UNSPECIFIED;
AVColorPrimaries out_color_primaries_ = AVCOL_PRI_UNSPECIFIED;
AVColorTransferCharacteristic out_color_trc_ = AVCOL_TRC_UNSPECIFIED;

// Assign all four color properties from dec_ctx and log them at info level.
// Call once at the end of init() in each subclass.
void capture_color_properties(AVCodecContext* dec_ctx) {
out_color_space_ = dec_ctx->colorspace;
out_color_range_ = dec_ctx->color_range;
out_color_primaries_ = dec_ctx->color_primaries;
out_color_trc_ = dec_ctx->color_trc;
logger()->info(
"Decoder color properties: space={}, range={}, primaries={}, trc={}",
av_color_space_name(out_color_space_),
av_color_range_name(out_color_range_),
av_color_primaries_name(out_color_primaries_),
av_color_transfer_name(out_color_trc_)
);
}

// Build a lightweight AVFrame carrying only color metadata.
// Pass the result as color_hint to avframe_to_ncnn_mat / ncnn_mat_to_avframe
// so both conversion directions use the same, consistent color matrix.
AVFrame make_color_hint() const {
AVFrame color_hint = {};
color_hint.colorspace = out_color_space_;
color_hint.color_range = out_color_range_;
color_hint.color_primaries = out_color_primaries_;
color_hint.color_trc = out_color_trc_;
return color_hint;
}
};

// Abstract base class for filters
Expand All @@ -92,6 +131,20 @@ class Interpolator : public Processor {
ProcessingMode get_processing_mode() const override { return ProcessingMode::Interpolate; }
virtual int
interpolate(AVFrame* prev_frame, AVFrame* in_frame, AVFrame** out_frame, float time_step) = 0;

// Normalize a pass-through frame so it goes through the same pixel-format
// round-trip as interpolated frames, eliminating visual differences between
// original and interpolated frames.
// The default implementation performs the ncnn pixel-format round-trip and
// rescales PTS to the encoder time base, which is correct for all ncnn-based
// interpolators. Non-ncnn subclasses (e.g., CUDA-based) should override this
// to match their own conversion pipeline.
virtual int normalize(AVFrame* in_frame, AVFrame** out_frame);

protected:
AVRational in_time_base_ = {0, 1};
AVRational out_time_base_ = {0, 1};
AVPixelFormat out_pix_fmt_ = AV_PIX_FMT_NONE;
};

} // namespace processors
Expand Down
150 changes: 144 additions & 6 deletions src/conversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

#include <cstddef>
#include <cstdio>
#include <mutex>

extern "C" {
#include <libavutil/pixdesc.h>
}

#include <spdlog/spdlog.h>

Expand All @@ -10,9 +15,84 @@
namespace video2x {
namespace conversions {

// Resampling filter used for all pixel format conversions. SWS_BICUBIC is
// sharper than bilinear while avoiding the ringing/shimmer of Lanczos/Sinc.
// Both conversion directions use the same filter so
// the round-trip is symmetric and does not introduce asymmetric chroma error.
static constexpr int SWS_FILTER = SWS_BICUBIC;

// Map an AVColorSpace enum to the SWS_CS_* constant expected by sws_setColorspaceDetails.
// sws_getContext always defaults to BT.601; call sws_setColorspaceDetails after to override.
static int avcol_spc_to_sws_cs(AVColorSpace cs) {
switch (cs) {
case AVCOL_SPC_BT709:
return SWS_CS_ITU709;
case AVCOL_SPC_FCC:
return SWS_CS_FCC;
case AVCOL_SPC_BT470BG:
return SWS_CS_ITU601;
case AVCOL_SPC_SMPTE170M:
return SWS_CS_SMPTE170M;
case AVCOL_SPC_SMPTE240M:
return SWS_CS_SMPTE240M;
case AVCOL_SPC_BT2020_NCL:
return SWS_CS_BT2020;
case AVCOL_SPC_BT2020_CL:
return SWS_CS_BT2020;
default:
if (cs == AVCOL_SPC_UNSPECIFIED) {
static std::once_flag warned;
std::call_once(warned, [] {
logger()->warn(
"Color space is AVCOL_SPC_UNSPECIFIED; "
"falling back to BT.601. Output colors may be incorrect "
"for BT.709 or BT.2020 content."
);
});
}
// Default to BT.601 using SWS_CS_DEFAULT
// Assuming BT.709 could silently mis-color
return SWS_CS_DEFAULT;
}
}

// Override the default BT.601 colorspace matrix on an already-created SwsContext.
// src_range/dst_range: 0 = limited (TV), 1 = full (JPEG/PC).
// Brightness/contrast/saturation are left at identity (0, 1.0, 1.0).
static void
apply_sws_colorspace(SwsContext* sws_ctx, const int* coeffs, int src_range, int dst_range) {
int ret = sws_setColorspaceDetails(
sws_ctx,
coeffs,
src_range,
coeffs,
dst_range,
0, // brightness: 0.0 in 16.16 fixed-point (additive offset, no adjustment)
1 << 16, // contrast: 1.0 in 16.16 fixed-point (multiplicative offset, no adjustment)
1 << 16 // saturation: 1.0 in 16.16 fixed-point (multiplicative offset, no adjustment)
);
if (ret < 0) {
static std::atomic<int> fail_count{0};
int count = ++fail_count;
if (count == 1) {
logger()->warn(
"sws_setColorspaceDetails failed; colorspace conversion may be inaccurate."
);
} else {
logger()->debug(
"sws_setColorspaceDetails failed (occurrence #{}); "
"colorspace conversion may be inaccurate.",
count
);
}
}
}

// Convert AVFrame format
[[gnu::target_clones("arch=x86-64-v4", "arch=x86-64-v3", "default")]]
AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt) {
int ret;

AVFrame* dst_frame = av_frame_alloc();
if (dst_frame == nullptr) {
logger()->error("Failed to allocate destination AVFrame.");
Expand All @@ -38,7 +118,7 @@ AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt) {
dst_frame->width,
dst_frame->height,
pix_fmt,
SWS_BILINEAR,
SWS_FILTER,
nullptr,
nullptr,
nullptr
Expand All @@ -50,8 +130,18 @@ AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt) {
return nullptr;
}

{
int sws_cs = avcol_spc_to_sws_cs(src_frame->colorspace);
int src_range = (src_frame->color_range == AVCOL_RANGE_JPEG) ? 1 : 0;
const AVPixFmtDescriptor* dst_desc = av_pix_fmt_desc_get(pix_fmt);
int dst_range = (dst_desc && (dst_desc->flags & AV_PIX_FMT_FLAG_RGB)) ? 1 : src_range;
// Override sws's default BT.601 matrix with the source frame's actual colorspace.
// RGB output is always full-range; YUV output keeps the source range.
apply_sws_colorspace(sws_ctx, sws_getCoefficients(sws_cs), src_range, dst_range);
}

// Perform the conversion
sws_scale(
ret = sws_scale(
sws_ctx,
src_frame->data,
src_frame->linesize,
Expand All @@ -64,12 +154,40 @@ AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt) {
// Clean up
sws_freeContext(sws_ctx);

if (ret != dst_frame->height) {
logger()->error("Failed to convert AVFrame pixel format.");
av_frame_free(&dst_frame);
return nullptr;
}

return dst_frame;
}

// Convert AVFrame to ncnn::Mat by copying the data
[[gnu::target_clones("arch=x86-64-v4", "arch=x86-64-v3", "default")]]
ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame) {
ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame, const AVFrame* color_hint) {
// Fill in missing color metadata from the hint so convert_avframe_pix_fmt
// uses the correct color matrix (decoders often leave per-frame colorspace
// as AVCOL_SPC_UNSPECIFIED even when the stream header specifies it)
if (frame->colorspace == AVCOL_SPC_UNSPECIFIED) {
static std::once_flag warned;
std::call_once(warned, [] {
logger()->warn(
"Decoded frame has AVCOL_SPC_UNSPECIFIED colorspace; "
"color matrix will be taken from stream header via color_hint."
);
});
}

if (color_hint != nullptr) {
if (frame->colorspace == AVCOL_SPC_UNSPECIFIED) {
frame->colorspace = color_hint->colorspace;
}
if (frame->color_range == AVCOL_RANGE_UNSPECIFIED) {
frame->color_range = color_hint->color_range;
}
}

AVFrame* converted_frame = nullptr;

// Convert to BGR24 format if necessary
Expand Down Expand Up @@ -107,9 +225,10 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame) {
return ncnn_image;
}

// Convert ncnn::Mat to AVFrame with a specified pixel format (this part is unchanged)
// Convert ncnn::Mat to AVFrame with a specified pixel format
[[gnu::target_clones("arch=x86-64-v4", "arch=x86-64-v3", "default")]]
AVFrame* ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt) {
AVFrame*
ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt, const AVFrame* color_hint) {
int ret;

// Step 1: Allocate a destination AVFrame for the specified pixel format
Expand Down Expand Up @@ -167,7 +286,7 @@ AVFrame* ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt) {
dst_frame->width,
dst_frame->height,
pix_fmt,
SWS_BILINEAR,
SWS_FILTER,
nullptr,
nullptr,
nullptr
Expand All @@ -180,6 +299,25 @@ AVFrame* ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt) {
return nullptr;
}

// Override sws's default BT.601 matrix using the reference frame's colorspace.
// ncnn::Mat pixel data is always full-range (0-255), so src_range=1.
// The output range and color metadata are taken from the color_hint reference frame.
if (color_hint != nullptr) {
int sws_cs = avcol_spc_to_sws_cs(color_hint->colorspace);
int dst_range = (color_hint->color_range == AVCOL_RANGE_JPEG) ? 1 : 0;
// ncnn::Mat pixel data is always full-range (0-255), so src_range=1.
// The output range and color metadata are taken from the color_hint reference frame.
apply_sws_colorspace(sws_ctx, sws_getCoefficients(sws_cs), 1, dst_range);

// ncnn::Mat carries no color metadata, so the output frame would have
// all AVCOL_*_UNSPECIFIED fields without this. Copy the source video's
// properties so the player can correctly interpret the output frame.
dst_frame->colorspace = color_hint->colorspace;
dst_frame->color_range = color_hint->color_range;
dst_frame->color_primaries = color_hint->color_primaries;
dst_frame->color_trc = color_hint->color_trc;
}

// Perform the conversion
ret = sws_scale(
sws_ctx,
Expand Down
Loading