From 2d7701e01a723721d688304ed094b2bd9c8c00db Mon Sep 17 00:00:00 2001 From: Salem874 Date: Fri, 19 Jun 2026 07:37:30 +0100 Subject: [PATCH] fix(noise): suppress recurring ffprobe demuxing-error lines in non-verbose activity log (#847) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `Error during demuxing: Invalid data found when processing input` fires from ffprobe stderr on virtually every track during enrichment — modern ffmpeg's partial-moov-atom warning at byte 0. Downloads complete; ffprobe falls through to a valid result on retry. But the noise produces ~20 entries per album in the activity log, reported in #847 as the most-aggravating recurring noise. Implementation modelled exactly on the shipped #660 pattern (`is_python_traceback_noise` + non-verbose suppression in `download_queue.rs::emit_subprocess_line` gate): - New `process::is_ffprobe_demux_noise(line: &str) -> bool` in `utils/process.rs`. Match keys on the structural `[in#/ @ 0x]` prefix plus both required tail substrings (`Error during demuxing:` AND `Invalid data found when processing input`). Tight enough that genuine ffmpeg demuxer errors that share words pass through; loose enough to survive future demuxer-list shifts (the comma-separated demuxer names vary by ffmpeg version). - Wired into both stdout (line 10958) and stderr (line 11155) reader gates in `download_queue.rs` — `is_known_noise` now combines both `is_python_traceback_noise` and `is_ffprobe_demux_noise`. The 5th arg to `emit_subprocess_line` (`show_in_ui`) stays `verbose || !is_known_noise` so the on-disk activity-log writer still records every line — forensic record intact for support requests. 6 new unit tests pin the contract: - Canonical Apple Music shape (verbatim from the #847 issue report) - Leading-whitespace + wider-hex-pointer tolerance - Alternate demuxer lists (aac, wav,mp3) — future-proof against ffmpeg auto-selection changes - Genuine ffmpeg errors pass through (muxer errors, parameter errors, generic "Invalid data" without the demuxing prefix) - Empty / unrelated / traceback lines pass through - Wrapped GAMDL `[INFO HH:MM:SS]` prefix doesn't false-positive - Requires `0x` pointer form — gate fails open (noise resumes) if upstream switches format rather than over-suppressing Closes #847 --- src-tauri/src/services/download_queue.rs | 27 +++-- src-tauri/src/utils/process.rs | 141 +++++++++++++++++++++++ 2 files changed, 157 insertions(+), 11 deletions(-) diff --git a/src-tauri/src/services/download_queue.rs b/src-tauri/src/services/download_queue.rs index dd07bced..26f5a0af 100644 --- a/src-tauri/src/services/download_queue.rs +++ b/src-tauri/src/services/download_queue.rs @@ -10952,10 +10952,12 @@ async fn run_download_with_events( if should_emit { // Suppress Python traceback noise from the user- // facing activity-log feed when verbose is off - // (#660). The helper unconditionally writes to + // (#660). Same gate for ffprobe demuxing noise + // (#847). The helper unconditionally writes to // disk so support requests stay debuggable. - let is_traceback_noise = - process::is_python_traceback_noise(&clean_line); + let is_known_noise = + process::is_python_traceback_noise(&clean_line) + || process::is_ffprobe_demux_noise(&clean_line); // Phase 3.5h: humanise GAMDL "codec skip" lines // — strip "(media ID: NNN)" and the Python-repr // codec list. Idempotent + safe on non-matching @@ -10966,7 +10968,7 @@ async fn run_download_with_events( &download_id, "stdout", humanised, - verbose || !is_traceback_noise, + verbose || !is_known_noise, ); } } @@ -11146,12 +11148,15 @@ async fn run_download_with_events( set.insert(clean_line.clone()) }; if should_emit { - // Suppress Python traceback noise from the - // user-facing activity-log feed in non-verbose - // mode (#660). The helper unconditionally writes - // to disk so support requests stay debuggable. - let is_traceback_noise = - process::is_python_traceback_noise(&clean_line); + // Suppress Python traceback noise (#660) and + // recurring ffprobe demuxing-error lines (#847) + // from the user-facing activity-log feed in + // non-verbose mode. The helper unconditionally + // writes to disk so support requests stay + // debuggable. + let is_known_noise = + process::is_python_traceback_noise(&clean_line) + || process::is_ffprobe_demux_noise(&clean_line); // Phase 3.5h: humanise GAMDL "codec skip" lines // (strips "(media ID: NNN)" + Python-repr codec // list). @@ -11161,7 +11166,7 @@ async fn run_download_with_events( &download_id, "stderr", humanised, - verbose || !is_traceback_noise, + verbose || !is_known_noise, ); } } diff --git a/src-tauri/src/utils/process.rs b/src-tauri/src/utils/process.rs index e915006e..422722d9 100644 --- a/src-tauri/src/utils/process.rs +++ b/src-tauri/src/utils/process.rs @@ -265,6 +265,57 @@ pub fn is_python_traceback_noise(line: &str) -> bool { trimmed.chars().all(|c| c == '^') } +/// Reports whether `line` is a recurring ffprobe demuxing-error noise +/// line that should be suppressed from the user-facing activity log +/// when verbose mode is off (#847). +/// +/// During enrichment, MeedyaDL runs ffprobe several times per track +/// (codec detection, ReplayGain, mediainfo, …). Each invocation against +/// a freshly-written M4A occasionally trips an +/// "Invalid data found when processing input" warning that ffmpeg's +/// stderr emits for the partial-moov-atom case at byte 0. Downloads +/// complete fine; ffprobe falls through to a valid result on retry. +/// But the noise produces ~20 entries per album in the activity log — +/// reported in #847 as the most-aggravating recurring log noise. +/// +/// Modelled exactly on [`is_python_traceback_noise`] (#660): the +/// on-disk activity-log writer still records the line regardless so +/// the forensic record stays complete; this helper just gates the +/// per-line `activity-log` Tauri event when verbose is off. +/// +/// The match is intentionally tight on the recognisable prefix — +/// `[in#0/ @ 0x…] Error during demuxing: ` — to avoid +/// suppressing genuine ffmpeg errors that happen to share the +/// "demuxing" / "invalid data" wording. The hex pointer in the +/// bracket varies per invocation so we match by structure +/// (`[in#0/…@ 0x…]` substring) rather than literal. +#[must_use] +pub fn is_ffprobe_demux_noise(line: &str) -> bool { + let trimmed = line.trim(); + if trimmed.is_empty() { + return false; + } + // Required prefix shape — `[in#/ @ 0x]`. + // `in#0/mov,mp4,m4a,3gp,3g2,mj2` is the typical demuxer-list ffmpeg + // selects for Apple-Music-shipped M4A files, but we keep this + // generic so future demuxer-list shifts don't slip past the gate. + let Some(after_in_marker) = trimmed.strip_prefix("[in#") else { + return false; + }; + let Some(after_at_sign) = after_in_marker.split_once(" @ 0x").map(|(_, r)| r) else { + return false; + }; + let Some(after_close_bracket) = after_at_sign.split_once("] ").map(|(_, r)| r) else { + return false; + }; + // Required tail — both substrings present (order-flexible). Apple's + // ffmpeg has been stable on this exact wording since 5.x; if it + // changes upstream this gate fails open and noise resumes — which + // is the safe default vs accidentally suppressing genuine errors. + after_close_bracket.starts_with("Error during demuxing: ") + && after_close_bracket.contains("Invalid data found when processing input") +} + /// Reports whether `line` matches the Python exception summary pattern /// (e.g. `TypeError: …`, `httpx.ConnectError: Connection refused`). /// @@ -1480,6 +1531,96 @@ mod tests { )); } + // ---------------------------------------------------------- + // ffprobe demuxing-noise detector tests (#847) + // ---------------------------------------------------------- + + #[test] + fn is_ffprobe_demux_noise_recognises_canonical_apple_music_shape() { + // Verbatim from the #847 issue report — fires every track during + // enrichment. + assert!(is_ffprobe_demux_noise( + "[in#0/mov,mp4,m4a,3gp,3g2,mj2 @ 0x954c14000] Error during demuxing: Invalid data found when processing input" + )); + } + + #[test] + fn is_ffprobe_demux_noise_tolerates_leading_whitespace_and_alt_hex_widths() { + // Some ffmpeg builds prefix the line with a single space; some + // hex pointers are wider on 64-bit platforms. + assert!(is_ffprobe_demux_noise( + " [in#0/mov,mp4,m4a,3gp,3g2,mj2 @ 0x7b8c1c000] Error during demuxing: Invalid data found when processing input" + )); + assert!(is_ffprobe_demux_noise( + "[in#0/mov,mp4,m4a,3gp,3g2,mj2 @ 0xff00ff00ff00ff00] Error during demuxing: Invalid data found when processing input" + )); + } + + #[test] + fn is_ffprobe_demux_noise_recognises_alternate_demuxer_lists() { + // Future-proofing — ffmpeg's auto-selected demuxer list may + // change as more formats are added. The match keys on the + // structural `[in#/ @ 0x]` shape, not the + // exact comma-separated demuxer names. + assert!(is_ffprobe_demux_noise( + "[in#0/aac @ 0xabc123] Error during demuxing: Invalid data found when processing input" + )); + assert!(is_ffprobe_demux_noise( + "[in#1/wav,mp3 @ 0x1234] Error during demuxing: Invalid data found when processing input" + )); + } + + #[test] + fn is_ffprobe_demux_noise_rejects_genuine_ffmpeg_errors() { + // Other ffmpeg errors that happen to share words must NOT be + // suppressed — they're rare but actionable when they fire. + // Pattern requires BOTH "Error during demuxing:" prefix AND + // "Invalid data found when processing input" — so e.g. a + // muxer error or a generic "Invalid data" without the demuxing + // prefix passes through. + assert!(!is_ffprobe_demux_noise( + "[in#0/mov,mp4,m4a,3gp,3g2,mj2 @ 0x954c14000] Error muxing packet (track 2)" + )); + assert!(!is_ffprobe_demux_noise( + "[in#0/mov @ 0x1234] Could not find codec parameters for stream 0" + )); + assert!(!is_ffprobe_demux_noise( + "Invalid data found when processing input" + )); + // No `[in#…]` prefix → not the noise we're after. + assert!(!is_ffprobe_demux_noise( + "Error during demuxing: Invalid data found when processing input" + )); + } + + #[test] + fn is_ffprobe_demux_noise_rejects_unrelated_lines() { + assert!(!is_ffprobe_demux_noise("")); + assert!(!is_ffprobe_demux_noise(" ")); + assert!(!is_ffprobe_demux_noise( + "[INFO 12:34:56] [Track 1/12] Downloading \"Hello\"" + )); + assert!(!is_ffprobe_demux_noise( + "Traceback (most recent call last):" + )); + // Line that LOOKS like a bracketed prefix but uses a different + // shape (e.g. structlog's `[level HH:MM:SS]` prefix on + // GAMDL v3.0+ wrapped lines). + assert!(!is_ffprobe_demux_noise( + "[INFO 12:34:56] [in#0/mov,mp4 @ 0x1234] Error during demuxing: Invalid data found when processing input" + )); + } + + #[test] + fn is_ffprobe_demux_noise_requires_zero_x_hex_pointer_form() { + // ffmpeg always prints the pointer as `0x`; if upstream + // ever switches format the gate fails open (noise resumes) + // rather than over-suppressing. + assert!(!is_ffprobe_demux_noise( + "[in#0/mov,mp4,m4a,3gp,3g2,mj2 @ 12345] Error during demuxing: Invalid data found when processing input" + )); + } + // ---------------------------------------------------------- // Storefront-mismatch detector tests (#666) // ----------------------------------------------------------