Skip to content

Commit fed73cf

Browse files
committed
fix(*): eliminate flickering in interpolated output and improve color consistency with input
Add a virtual `Interpolator::normalize()` method that runs pass-through frames through the same pixel-format round-trip as interpolated frames, eliminating temporal colour inconsistencies and flickering in alternating frames (assuming 2x interpolation). Refactor `in_time_base_`, `out_time_base_`, and `out_pix_fmt_` from `InterpolatorRIFE` private members into `Interpolator` protected members to avoid duplication in future derived classes. Capture authoritative color properties (color_space, color_range, color_primaries, color_trc) from the decoder context once during `init()` and store them in the `Processor` base class. Introduce `make_color_hint()`, which creates a metadata-only `AVFrame` carrying these properties and forwards it to `avframe_to_ncnn_mat()` and `ncnn_mat_to_avframe()`, ensuring `sws_scale` always uses a consistent colour matrix instead of falling back to BT.601 defaults whenever a frame carries `AVCOL_SPC_UNSPECIFIED`. Additional changes in `src/conversions.cpp`: - Add `avcol_spc_to_sws_cs()` to map `AVColorSpace` to SWS colorspace constants with a BT.601 fallback and a thread-safe one-shot warning via `std::once_flag`. - Replace hardcoded `SWS_BILINEAR` with `static constexpr int SWS_FILTER = SWS_BICUBIC`. - Extract `apply_sws_colorspace()` helper to deduplicate the two `sws_setColorspaceDetails` call sites; failures emit a one-time `warn` on the first occurrence followed by `debug` logs with a running count thereafter. - Validate the return value of `sws_scale()` against `dst_frame->height` before returning. Register `src/interpolator.cpp` in `CMakeLists.txt`.
1 parent 7db9c18 commit fed73cf

12 files changed

Lines changed: 301 additions & 25 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,6 @@ compile_commands.json
7575
CTestTestfile.cmake
7676
_deps
7777
CMakeUserPresets.json
78+
79+
third_party/
80+
build/

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2424
- Incorrect spdlog format string using printf-style specifiers instead of fmt-style placeholders.
2525
- Division by zero in CLI progress display within the first second of processing.
2626
- `unhook_ffmpeg_logging` disabling all FFmpeg logging instead of restoring the default callback.
27+
- Elminate flickering in interpolated output and improve color consistency with input
2728

2829
## [6.4.0] - 2025-01-24
2930

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ add_library(libvideo2x
8383
src/filter_realcugan.cpp
8484
src/filter_realesrgan.cpp
8585
src/fsutils.cpp
86+
src/interpolator.cpp
8687
src/interpolator_rife.cpp
8788
src/libplacebo.cpp
8889
src/libvideo2x.cpp

include/libvideo2x/conversions.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,18 @@ namespace conversions {
1414
AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt);
1515

1616
// Convert AVFrame to ncnn::Mat
17-
ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame);
17+
// color_hint: optional frame whose color metadata is used when the source frame
18+
// has AVCOL_SPC_UNSPECIFIED / AVCOL_RANGE_UNSPECIFIED (common for per-frame metadata).
19+
ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame, const AVFrame* color_hint = nullptr);
1820

1921
// Convert ncnn::Mat to AVFrame
20-
AVFrame* ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt);
22+
// color_hint: optional frame whose color metadata is used to configure the
23+
// YUV output color matrix and to populate the output frame's color properties.
24+
AVFrame* ncnn_mat_to_avframe(
25+
const ncnn::Mat& mat,
26+
AVPixelFormat pix_fmt,
27+
const AVFrame* color_hint = nullptr
28+
);
2129

2230
} // namespace conversions
2331
} // namespace video2x

include/libvideo2x/interpolator_rife.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,12 @@ class InterpolatorRIFE : public Interpolator {
3030
int init(AVCodecContext* dec_ctx, AVCodecContext* enc_ctx, AVBufferRef* hw_ctx) override;
3131

3232
// Processes an input frame and returns the processed frame
33-
int interpolate(AVFrame* prev_frame, AVFrame* in_frame, AVFrame** out_frame, float time_step)
34-
override;
33+
int interpolate(
34+
AVFrame* prev_frame,
35+
AVFrame* in_frame,
36+
AVFrame** out_frame,
37+
float time_step
38+
) override;
3539

3640
// Returns the interpolator's type
3741
ProcessorType get_processor_type() const override { return ProcessorType::RIFE; }
@@ -53,9 +57,6 @@ class InterpolatorRIFE : public Interpolator {
5357
bool uhd_mode_;
5458
int num_threads_;
5559
const fsutils::StringType model_name_;
56-
AVRational in_time_base_;
57-
AVRational out_time_base_;
58-
AVPixelFormat out_pix_fmt_;
5960
};
6061

6162
} // namespace processors

include/libvideo2x/processor.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@ extern "C" {
77
#include <libavcodec/avcodec.h>
88
#include <libavfilter/avfilter.h>
99
#include <libavutil/buffer.h>
10+
#include <libavutil/pixdesc.h>
1011
}
1112

1213
#include "fsutils.h"
14+
#include "logger_manager.h"
1315

1416
namespace video2x {
1517
namespace processors {
@@ -77,6 +79,43 @@ class Processor {
7779
int& width,
7880
int& height
7981
) const = 0;
82+
83+
protected:
84+
// Authoritative color properties captured from dec_ctx at init time.
85+
// Per-frame AVFrame fields are often AVCOL_*_UNSPECIFIED even when the
86+
// stream header carries valid values, so we store them here once.
87+
AVColorSpace out_color_space_ = AVCOL_SPC_UNSPECIFIED;
88+
AVColorRange out_color_range_ = AVCOL_RANGE_UNSPECIFIED;
89+
AVColorPrimaries out_color_primaries_ = AVCOL_PRI_UNSPECIFIED;
90+
AVColorTransferCharacteristic out_color_trc_ = AVCOL_TRC_UNSPECIFIED;
91+
92+
// Assign all four color properties from dec_ctx and log them at info level.
93+
// Call once at the end of init() in each subclass.
94+
void capture_color_properties(AVCodecContext* dec_ctx) {
95+
out_color_space_ = dec_ctx->colorspace;
96+
out_color_range_ = dec_ctx->color_range;
97+
out_color_primaries_ = dec_ctx->color_primaries;
98+
out_color_trc_ = dec_ctx->color_trc;
99+
logger()->info(
100+
"Decoder color properties: space={}, range={}, primaries={}, trc={}",
101+
av_color_space_name(out_color_space_),
102+
av_color_range_name(out_color_range_),
103+
av_color_primaries_name(out_color_primaries_),
104+
av_color_transfer_name(out_color_trc_)
105+
);
106+
}
107+
108+
// Build a lightweight AVFrame carrying only color metadata.
109+
// Pass the result as color_hint to avframe_to_ncnn_mat / ncnn_mat_to_avframe
110+
// so both conversion directions use the same, consistent color matrix.
111+
AVFrame make_color_hint() const {
112+
AVFrame color_hint = {};
113+
color_hint.colorspace = out_color_space_;
114+
color_hint.color_range = out_color_range_;
115+
color_hint.color_primaries = out_color_primaries_;
116+
color_hint.color_trc = out_color_trc_;
117+
return color_hint;
118+
}
80119
};
81120

82121
// Abstract base class for filters
@@ -92,6 +131,20 @@ class Interpolator : public Processor {
92131
ProcessingMode get_processing_mode() const override { return ProcessingMode::Interpolate; }
93132
virtual int
94133
interpolate(AVFrame* prev_frame, AVFrame* in_frame, AVFrame** out_frame, float time_step) = 0;
134+
135+
// Normalize a pass-through frame so it goes through the same pixel-format
136+
// round-trip as interpolated frames, eliminating visual differences between
137+
// original and interpolated frames.
138+
// The default implementation performs the ncnn pixel-format round-trip and
139+
// rescales PTS to the encoder time base, which is correct for all ncnn-based
140+
// interpolators. Non-ncnn subclasses (e.g., CUDA-based) should override this
141+
// to match their own conversion pipeline.
142+
virtual int normalize(AVFrame* in_frame, AVFrame** out_frame);
143+
144+
protected:
145+
AVRational in_time_base_ = {0, 1};
146+
AVRational out_time_base_ = {0, 1};
147+
AVPixelFormat out_pix_fmt_ = AV_PIX_FMT_NONE;
95148
};
96149

97150
} // namespace processors

src/conversions.cpp

Lines changed: 144 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
#include <cstddef>
44
#include <cstdio>
5+
#include <mutex>
6+
7+
extern "C" {
8+
#include <libavutil/pixdesc.h>
9+
}
510

611
#include <spdlog/spdlog.h>
712

@@ -10,9 +15,84 @@
1015
namespace video2x {
1116
namespace conversions {
1217

18+
// Resampling filter used for all pixel format conversions. SWS_BICUBIC is
19+
// sharper than bilinear while avoiding the ringing/shimmer of Lanczos/Sinc.
20+
// Both conversion directions use the same filter so
21+
// the round-trip is symmetric and does not introduce asymmetric chroma error.
22+
static constexpr int SWS_FILTER = SWS_BICUBIC;
23+
24+
// Map an AVColorSpace enum to the SWS_CS_* constant expected by sws_setColorspaceDetails.
25+
// sws_getContext always defaults to BT.601; call sws_setColorspaceDetails after to override.
26+
static int avcol_spc_to_sws_cs(AVColorSpace cs) {
27+
switch (cs) {
28+
case AVCOL_SPC_BT709:
29+
return SWS_CS_ITU709;
30+
case AVCOL_SPC_FCC:
31+
return SWS_CS_FCC;
32+
case AVCOL_SPC_BT470BG:
33+
return SWS_CS_ITU601;
34+
case AVCOL_SPC_SMPTE170M:
35+
return SWS_CS_SMPTE170M;
36+
case AVCOL_SPC_SMPTE240M:
37+
return SWS_CS_SMPTE240M;
38+
case AVCOL_SPC_BT2020_NCL:
39+
return SWS_CS_BT2020;
40+
case AVCOL_SPC_BT2020_CL:
41+
return SWS_CS_BT2020;
42+
default:
43+
if (cs == AVCOL_SPC_UNSPECIFIED) {
44+
static std::once_flag warned;
45+
std::call_once(warned, [] {
46+
logger()->warn(
47+
"Color space is AVCOL_SPC_UNSPECIFIED; "
48+
"falling back to BT.601. Output colors may be incorrect "
49+
"for BT.709 or BT.2020 content."
50+
);
51+
});
52+
}
53+
// Default to BT.601 using SWS_CS_DEFAULT
54+
// Assuming BT.709 could silently mis-color
55+
return SWS_CS_DEFAULT;
56+
}
57+
}
58+
59+
// Override the default BT.601 colorspace matrix on an already-created SwsContext.
60+
// src_range/dst_range: 0 = limited (TV), 1 = full (JPEG/PC).
61+
// Brightness/contrast/saturation are left at identity (0, 1.0, 1.0).
62+
static void
63+
apply_sws_colorspace(SwsContext* sws_ctx, const int* coeffs, int src_range, int dst_range) {
64+
int ret = sws_setColorspaceDetails(
65+
sws_ctx,
66+
coeffs,
67+
src_range,
68+
coeffs,
69+
dst_range,
70+
0, // brightness: 0.0 in 16.16 fixed-point (additive offset, no adjustment)
71+
1 << 16, // contrast: 1.0 in 16.16 fixed-point (multiplicative offset, no adjustment)
72+
1 << 16 // saturation: 1.0 in 16.16 fixed-point (multiplicative offset, no adjustment)
73+
);
74+
if (ret < 0) {
75+
static std::atomic<int> fail_count{0};
76+
int count = ++fail_count;
77+
if (count == 1) {
78+
logger()->warn(
79+
"sws_setColorspaceDetails failed; colorspace conversion may be inaccurate."
80+
);
81+
} else {
82+
logger()->debug(
83+
"sws_setColorspaceDetails failed (occurrence #{}); "
84+
"colorspace conversion may be inaccurate.",
85+
count
86+
);
87+
}
88+
}
89+
}
90+
1391
// Convert AVFrame format
1492
[[gnu::target_clones("arch=x86-64-v4", "arch=x86-64-v3", "default")]]
1593
AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt) {
94+
int ret;
95+
1696
AVFrame* dst_frame = av_frame_alloc();
1797
if (dst_frame == nullptr) {
1898
logger()->error("Failed to allocate destination AVFrame.");
@@ -38,7 +118,7 @@ AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt) {
38118
dst_frame->width,
39119
dst_frame->height,
40120
pix_fmt,
41-
SWS_BILINEAR,
121+
SWS_FILTER,
42122
nullptr,
43123
nullptr,
44124
nullptr
@@ -50,8 +130,18 @@ AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt) {
50130
return nullptr;
51131
}
52132

133+
{
134+
int sws_cs = avcol_spc_to_sws_cs(src_frame->colorspace);
135+
int src_range = (src_frame->color_range == AVCOL_RANGE_JPEG) ? 1 : 0;
136+
const AVPixFmtDescriptor* dst_desc = av_pix_fmt_desc_get(pix_fmt);
137+
int dst_range = (dst_desc && (dst_desc->flags & AV_PIX_FMT_FLAG_RGB)) ? 1 : src_range;
138+
// Override sws's default BT.601 matrix with the source frame's actual colorspace.
139+
// RGB output is always full-range; YUV output keeps the source range.
140+
apply_sws_colorspace(sws_ctx, sws_getCoefficients(sws_cs), src_range, dst_range);
141+
}
142+
53143
// Perform the conversion
54-
sws_scale(
144+
ret = sws_scale(
55145
sws_ctx,
56146
src_frame->data,
57147
src_frame->linesize,
@@ -64,12 +154,40 @@ AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt) {
64154
// Clean up
65155
sws_freeContext(sws_ctx);
66156

157+
if (ret != dst_frame->height) {
158+
logger()->error("Failed to convert AVFrame pixel format.");
159+
av_frame_free(&dst_frame);
160+
return nullptr;
161+
}
162+
67163
return dst_frame;
68164
}
69165

70166
// Convert AVFrame to ncnn::Mat by copying the data
71167
[[gnu::target_clones("arch=x86-64-v4", "arch=x86-64-v3", "default")]]
72-
ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame) {
168+
ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame, const AVFrame* color_hint) {
169+
// Fill in missing color metadata from the hint so convert_avframe_pix_fmt
170+
// uses the correct color matrix (decoders often leave per-frame colorspace
171+
// as AVCOL_SPC_UNSPECIFIED even when the stream header specifies it)
172+
if (frame->colorspace == AVCOL_SPC_UNSPECIFIED) {
173+
static std::once_flag warned;
174+
std::call_once(warned, [] {
175+
logger()->warn(
176+
"Decoded frame has AVCOL_SPC_UNSPECIFIED colorspace; "
177+
"color matrix will be taken from stream header via color_hint."
178+
);
179+
});
180+
}
181+
182+
if (color_hint != nullptr) {
183+
if (frame->colorspace == AVCOL_SPC_UNSPECIFIED) {
184+
frame->colorspace = color_hint->colorspace;
185+
}
186+
if (frame->color_range == AVCOL_RANGE_UNSPECIFIED) {
187+
frame->color_range = color_hint->color_range;
188+
}
189+
}
190+
73191
AVFrame* converted_frame = nullptr;
74192

75193
// Convert to BGR24 format if necessary
@@ -107,9 +225,10 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame) {
107225
return ncnn_image;
108226
}
109227

110-
// Convert ncnn::Mat to AVFrame with a specified pixel format (this part is unchanged)
228+
// Convert ncnn::Mat to AVFrame with a specified pixel format
111229
[[gnu::target_clones("arch=x86-64-v4", "arch=x86-64-v3", "default")]]
112-
AVFrame* ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt) {
230+
AVFrame*
231+
ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt, const AVFrame* color_hint) {
113232
int ret;
114233

115234
// Step 1: Allocate a destination AVFrame for the specified pixel format
@@ -167,7 +286,7 @@ AVFrame* ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt) {
167286
dst_frame->width,
168287
dst_frame->height,
169288
pix_fmt,
170-
SWS_BILINEAR,
289+
SWS_FILTER,
171290
nullptr,
172291
nullptr,
173292
nullptr
@@ -180,6 +299,25 @@ AVFrame* ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt) {
180299
return nullptr;
181300
}
182301

302+
// Override sws's default BT.601 matrix using the reference frame's colorspace.
303+
// ncnn::Mat pixel data is always full-range (0-255), so src_range=1.
304+
// The output range and color metadata are taken from the color_hint reference frame.
305+
if (color_hint != nullptr) {
306+
int sws_cs = avcol_spc_to_sws_cs(color_hint->colorspace);
307+
int dst_range = (color_hint->color_range == AVCOL_RANGE_JPEG) ? 1 : 0;
308+
// ncnn::Mat pixel data is always full-range (0-255), so src_range=1.
309+
// The output range and color metadata are taken from the color_hint reference frame.
310+
apply_sws_colorspace(sws_ctx, sws_getCoefficients(sws_cs), 1, dst_range);
311+
312+
// ncnn::Mat carries no color metadata, so the output frame would have
313+
// all AVCOL_*_UNSPECIFIED fields without this. Copy the source video's
314+
// properties so the player can correctly interpret the output frame.
315+
dst_frame->colorspace = color_hint->colorspace;
316+
dst_frame->color_range = color_hint->color_range;
317+
dst_frame->color_primaries = color_hint->color_primaries;
318+
dst_frame->color_trc = color_hint->color_trc;
319+
}
320+
183321
// Perform the conversion
184322
ret = sws_scale(
185323
sws_ctx,

0 commit comments

Comments
 (0)