k4yt3x · exwm · Mar 12, 2026
diff --git a/.gitignore b/.gitignore
@@ -75,3 +75,6 @@ compile_commands.json
 CTestTestfile.cmake
 _deps
 CMakeUserPresets.json
+
+third_party/
+build/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Incorrect spdlog format string using printf-style specifiers instead of fmt-style placeholders.
 - Division by zero in CLI progress display within the first second of processing.
 - `unhook_ffmpeg_logging` disabling all FFmpeg logging instead of restoring the default callback.
+- Elminate flickering in interpolated output and improve color consistency with input
 
 ## [6.4.0] - 2025-01-24
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -83,6 +83,7 @@ add_library(libvideo2x
     src/filter_realcugan.cpp
     src/filter_realesrgan.cpp
     src/fsutils.cpp
+    src/interpolator.cpp
     src/interpolator_rife.cpp
     src/libplacebo.cpp
     src/libvideo2x.cpp

diff --git a/include/libvideo2x/conversions.h b/include/libvideo2x/conversions.h
@@ -14,10 +14,18 @@ namespace conversions {
 AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt);
 
 // Convert AVFrame to ncnn::Mat
-ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame);
+// color_hint: optional frame whose color metadata is used when the source frame
+// has AVCOL_SPC_UNSPECIFIED / AVCOL_RANGE_UNSPECIFIED (common for per-frame metadata).
+ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame, const AVFrame* color_hint = nullptr);
 
 // Convert ncnn::Mat to AVFrame
-AVFrame* ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt);
+// color_hint: optional frame whose color metadata is used to configure the
+// YUV output color matrix and to populate the output frame's color properties.
+AVFrame* ncnn_mat_to_avframe(
+    const ncnn::Mat& mat,
+    AVPixelFormat pix_fmt,
+    const AVFrame* color_hint = nullptr
+);
 
 }  // namespace conversions
 }  // namespace video2x
diff --git a/include/libvideo2x/interpolator_rife.h b/include/libvideo2x/interpolator_rife.h
@@ -30,8 +30,12 @@ class InterpolatorRIFE : public Interpolator {
     int init(AVCodecContext* dec_ctx, AVCodecContext* enc_ctx, AVBufferRef* hw_ctx) override;
 
     // Processes an input frame and returns the processed frame
-    int interpolate(AVFrame* prev_frame, AVFrame* in_frame, AVFrame** out_frame, float time_step)
-        override;
+    int interpolate(
+        AVFrame* prev_frame,
+        AVFrame* in_frame,
+        AVFrame** out_frame,
+        float time_step
+    ) override;
 
     // Returns the interpolator's type
     ProcessorType get_processor_type() const override { return ProcessorType::RIFE; }
@@ -53,9 +57,6 @@ class InterpolatorRIFE : public Interpolator {
     bool uhd_mode_;
     int num_threads_;
     const fsutils::StringType model_name_;
-    AVRational in_time_base_;
-    AVRational out_time_base_;
-    AVPixelFormat out_pix_fmt_;
 };
 
 }  // namespace processors

diff --git a/include/libvideo2x/processor.h b/include/libvideo2x/processor.h
@@ -7,9 +7,11 @@ extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavfilter/avfilter.h>
 #include <libavutil/buffer.h>
+#include <libavutil/pixdesc.h>
 }
 
 #include "fsutils.h"
+#include "logger_manager.h"
 
 namespace video2x {
 namespace processors {
@@ -77,6 +79,43 @@ class Processor {
         int& width,
         int& height
     ) const = 0;
+
+   protected:
+    // Authoritative color properties captured from dec_ctx at init time.
+    // Per-frame AVFrame fields are often AVCOL_*_UNSPECIFIED even when the
+    // stream header carries valid values, so we store them here once.
+    AVColorSpace out_color_space_ = AVCOL_SPC_UNSPECIFIED;
+    AVColorRange out_color_range_ = AVCOL_RANGE_UNSPECIFIED;
+    AVColorPrimaries out_color_primaries_ = AVCOL_PRI_UNSPECIFIED;
+    AVColorTransferCharacteristic out_color_trc_ = AVCOL_TRC_UNSPECIFIED;
+
+    // Assign all four color properties from dec_ctx and log them at info level.
+    // Call once at the end of init() in each subclass.
+    void capture_color_properties(AVCodecContext* dec_ctx) {
+        out_color_space_ = dec_ctx->colorspace;
+        out_color_range_ = dec_ctx->color_range;
+        out_color_primaries_ = dec_ctx->color_primaries;
+        out_color_trc_ = dec_ctx->color_trc;
+        logger()->info(
+            "Decoder color properties: space={}, range={}, primaries={}, trc={}",
+            av_color_space_name(out_color_space_),
+            av_color_range_name(out_color_range_),
+            av_color_primaries_name(out_color_primaries_),
+            av_color_transfer_name(out_color_trc_)
+        );
+    }
+
+    // Build a lightweight AVFrame carrying only color metadata.
+    // Pass the result as color_hint to avframe_to_ncnn_mat / ncnn_mat_to_avframe
+    // so both conversion directions use the same, consistent color matrix.
+    AVFrame make_color_hint() const {
+        AVFrame color_hint = {};
+        color_hint.colorspace = out_color_space_;
+        color_hint.color_range = out_color_range_;
+        color_hint.color_primaries = out_color_primaries_;
+        color_hint.color_trc = out_color_trc_;
+        return color_hint;
+    }
 };
 
 // Abstract base class for filters
@@ -92,6 +131,20 @@ class Interpolator : public Processor {
     ProcessingMode get_processing_mode() const override { return ProcessingMode::Interpolate; }
     virtual int
     interpolate(AVFrame* prev_frame, AVFrame* in_frame, AVFrame** out_frame, float time_step) = 0;
+
+    // Normalize a pass-through frame so it goes through the same pixel-format
+    // round-trip as interpolated frames, eliminating visual differences between
+    // original and interpolated frames.
+    // The default implementation performs the ncnn pixel-format round-trip and
+    // rescales PTS to the encoder time base, which is correct for all ncnn-based
+    // interpolators. Non-ncnn subclasses (e.g., CUDA-based) should override this
+    // to match their own conversion pipeline.
+    virtual int normalize(AVFrame* in_frame, AVFrame** out_frame);
+
+   protected:
+    AVRational in_time_base_ = {0, 1};
+    AVRational out_time_base_ = {0, 1};
+    AVPixelFormat out_pix_fmt_ = AV_PIX_FMT_NONE;
 };
 
 }  // namespace processors

diff --git a/src/conversions.cpp b/src/conversions.cpp
@@ -2,6 +2,11 @@
 
 #include <cstddef>
 #include <cstdio>
+#include <mutex>
+
+extern "C" {
+#include <libavutil/pixdesc.h>
+}
 
 #include <spdlog/spdlog.h>
 
@@ -10,9 +15,84 @@
 namespace video2x {
 namespace conversions {
 
+// Resampling filter used for all pixel format conversions. SWS_BICUBIC is
+// sharper than bilinear while avoiding the ringing/shimmer of Lanczos/Sinc.
+// Both conversion directions use the same filter so
+// the round-trip is symmetric and does not introduce asymmetric chroma error.
+static constexpr int SWS_FILTER = SWS_BICUBIC;
+
+// Map an AVColorSpace enum to the SWS_CS_* constant expected by sws_setColorspaceDetails.
+// sws_getContext always defaults to BT.601; call sws_setColorspaceDetails after to override.
+static int avcol_spc_to_sws_cs(AVColorSpace cs) {
+    switch (cs) {
+        case AVCOL_SPC_BT709:
+            return SWS_CS_ITU709;
+        case AVCOL_SPC_FCC:
+            return SWS_CS_FCC;
+        case AVCOL_SPC_BT470BG:
+            return SWS_CS_ITU601;
+        case AVCOL_SPC_SMPTE170M:
+            return SWS_CS_SMPTE170M;
+        case AVCOL_SPC_SMPTE240M:
+            return SWS_CS_SMPTE240M;
+        case AVCOL_SPC_BT2020_NCL:
+            return SWS_CS_BT2020;
+        case AVCOL_SPC_BT2020_CL:
+            return SWS_CS_BT2020;
+        default:
+            if (cs == AVCOL_SPC_UNSPECIFIED) {
+                static std::once_flag warned;
+                std::call_once(warned, [] {
+                    logger()->warn(
+                        "Color space is AVCOL_SPC_UNSPECIFIED; "
+                        "falling back to BT.601. Output colors may be incorrect "
+                        "for BT.709 or BT.2020 content."
+                    );
+                });
+            }
+            // Default to BT.601 using SWS_CS_DEFAULT
+            // Assuming BT.709 could silently mis-color
+            return SWS_CS_DEFAULT;
+    }
+}
+
+// Override the default BT.601 colorspace matrix on an already-created SwsContext.
+// src_range/dst_range: 0 = limited (TV), 1 = full (JPEG/PC).
+// Brightness/contrast/saturation are left at identity (0, 1.0, 1.0).
+static void
+apply_sws_colorspace(SwsContext* sws_ctx, const int* coeffs, int src_range, int dst_range) {
+    int ret = sws_setColorspaceDetails(
+        sws_ctx,
+        coeffs,
+        src_range,
+        coeffs,
+        dst_range,
+        0,        // brightness: 0.0 in 16.16 fixed-point (additive offset, no adjustment)
+        1 << 16,  // contrast:   1.0 in 16.16 fixed-point (multiplicative offset, no adjustment)
+        1 << 16   // saturation: 1.0 in 16.16 fixed-point (multiplicative offset, no adjustment)
+    );
+    if (ret < 0) {
+        static std::atomic<int> fail_count{0};
+        int count = ++fail_count;
+        if (count == 1) {
+            logger()->warn(
+                "sws_setColorspaceDetails failed; colorspace conversion may be inaccurate."
+            );
+        } else {
+            logger()->debug(
+                "sws_setColorspaceDetails failed (occurrence #{}); "
+                "colorspace conversion may be inaccurate.",
+                count
+            );
+        }
+    }
+}
+
 // Convert AVFrame format
 [[gnu::target_clones("arch=x86-64-v4", "arch=x86-64-v3", "default")]]
 AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt) {
+    int ret;
+
     AVFrame* dst_frame = av_frame_alloc();
     if (dst_frame == nullptr) {
         logger()->error("Failed to allocate destination AVFrame.");
@@ -38,7 +118,7 @@ AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt) {
         dst_frame->width,
         dst_frame->height,
         pix_fmt,
-        SWS_BILINEAR,
+        SWS_FILTER,
         nullptr,
         nullptr,
         nullptr
@@ -50,8 +130,18 @@ AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt) {
         return nullptr;
     }
 
+    {
+        int sws_cs = avcol_spc_to_sws_cs(src_frame->colorspace);
+        int src_range = (src_frame->color_range == AVCOL_RANGE_JPEG) ? 1 : 0;
+        const AVPixFmtDescriptor* dst_desc = av_pix_fmt_desc_get(pix_fmt);
+        int dst_range = (dst_desc && (dst_desc->flags & AV_PIX_FMT_FLAG_RGB)) ? 1 : src_range;
+        // Override sws's default BT.601 matrix with the source frame's actual colorspace.
+        // RGB output is always full-range; YUV output keeps the source range.
+        apply_sws_colorspace(sws_ctx, sws_getCoefficients(sws_cs), src_range, dst_range);
+    }
+
     // Perform the conversion
-    sws_scale(
+    ret = sws_scale(
         sws_ctx,
         src_frame->data,
         src_frame->linesize,
@@ -64,12 +154,40 @@ AVFrame* convert_avframe_pix_fmt(AVFrame* src_frame, AVPixelFormat pix_fmt) {
     // Clean up
     sws_freeContext(sws_ctx);
 
+    if (ret != dst_frame->height) {
+        logger()->error("Failed to convert AVFrame pixel format.");
+        av_frame_free(&dst_frame);
+        return nullptr;
+    }
+
     return dst_frame;
 }
 
 // Convert AVFrame to ncnn::Mat by copying the data
 [[gnu::target_clones("arch=x86-64-v4", "arch=x86-64-v3", "default")]]
-ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame) {
+ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame, const AVFrame* color_hint) {
+    // Fill in missing color metadata from the hint so convert_avframe_pix_fmt
+    // uses the correct color matrix (decoders often leave per-frame colorspace
+    // as AVCOL_SPC_UNSPECIFIED even when the stream header specifies it)
+    if (frame->colorspace == AVCOL_SPC_UNSPECIFIED) {
+        static std::once_flag warned;
+        std::call_once(warned, [] {
+            logger()->warn(
+                "Decoded frame has AVCOL_SPC_UNSPECIFIED colorspace; "
+                "color matrix will be taken from stream header via color_hint."
+            );
+        });
+    }
+
+    if (color_hint != nullptr) {
+        if (frame->colorspace == AVCOL_SPC_UNSPECIFIED) {
+            frame->colorspace = color_hint->colorspace;
+        }
+        if (frame->color_range == AVCOL_RANGE_UNSPECIFIED) {
+            frame->color_range = color_hint->color_range;
+        }
+    }
+
     AVFrame* converted_frame = nullptr;
 
     // Convert to BGR24 format if necessary
@@ -107,9 +225,10 @@ ncnn::Mat avframe_to_ncnn_mat(AVFrame* frame) {
     return ncnn_image;
 }
 
-// Convert ncnn::Mat to AVFrame with a specified pixel format (this part is unchanged)
+// Convert ncnn::Mat to AVFrame with a specified pixel format
 [[gnu::target_clones("arch=x86-64-v4", "arch=x86-64-v3", "default")]]
-AVFrame* ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt) {
+AVFrame*
+ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt, const AVFrame* color_hint) {
     int ret;
 
     // Step 1: Allocate a destination AVFrame for the specified pixel format
@@ -167,7 +286,7 @@ AVFrame* ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt) {
         dst_frame->width,
         dst_frame->height,
         pix_fmt,
-        SWS_BILINEAR,
+        SWS_FILTER,
         nullptr,
         nullptr,
         nullptr
@@ -180,6 +299,25 @@ AVFrame* ncnn_mat_to_avframe(const ncnn::Mat& mat, AVPixelFormat pix_fmt) {
         return nullptr;
     }
 
+    // Override sws's default BT.601 matrix using the reference frame's colorspace.
+    // ncnn::Mat pixel data is always full-range (0-255), so src_range=1.
+    // The output range and color metadata are taken from the color_hint reference frame.
+    if (color_hint != nullptr) {
+        int sws_cs = avcol_spc_to_sws_cs(color_hint->colorspace);
+        int dst_range = (color_hint->color_range == AVCOL_RANGE_JPEG) ? 1 : 0;
+        // ncnn::Mat pixel data is always full-range (0-255), so src_range=1.
+        // The output range and color metadata are taken from the color_hint reference frame.
+        apply_sws_colorspace(sws_ctx, sws_getCoefficients(sws_cs), 1, dst_range);
+
+        // ncnn::Mat carries no color metadata, so the output frame would have
+        // all AVCOL_*_UNSPECIFIED fields without this. Copy the source video's
+        // properties so the player can correctly interpret the output frame.
+        dst_frame->colorspace = color_hint->colorspace;
+        dst_frame->color_range = color_hint->color_range;
+        dst_frame->color_primaries = color_hint->color_primaries;
+        dst_frame->color_trc = color_hint->color_trc;
+    }
+
     // Perform the conversion
     ret = sws_scale(
         sws_ctx,