Skip to content
Merged
7 changes: 5 additions & 2 deletions cmd/jivetalking/main.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"errors"
"fmt"
"os"
"time"
Expand All @@ -20,6 +21,8 @@ import (
// Release builds: git tag (e.g. "0.1.0")
var version = "dev"

var errCancelledByUser = errors.New("cancelled by user")

// CLI defines the command-line interface
type CLI struct {
Version bool `short:"v" help:"Show version information"`
Expand Down Expand Up @@ -256,7 +259,7 @@ func runAnalysisOnly(files []string, log func(string, ...any)) {
}

if analysisErr != nil {
if analysisErr.Error() == "cancelled by user" {
if errors.Is(analysisErr, errCancelledByUser) {
// User pressed Ctrl+C - exit immediately, don't process remaining files
return
}
Expand Down Expand Up @@ -335,7 +338,7 @@ func runAnalysisWithTUI(inputPath string, config *processor.FilterChainConfig, l

// Check for user cancellation (TUI exited without completing analysis)
if !analysisModel.Done {
return nil, nil, fmt.Errorf("cancelled by user")
return nil, nil, errCancelledByUser
}

return analysisModel.Measurements, analysisModel.Config, nil
Expand Down
11 changes: 4 additions & 7 deletions internal/processor/adaptive.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,10 +269,7 @@ const (
la2aNoiseFloorNoisy = -45.0 // dBFS - above: noisy, reduce wet

// LA-2A High-crest override constants
// These mirror calculateLimiterCeiling() in normalise.go - keep in sync.
la2aHighCrestMaxDeficit = 6.0 // dB, deficit at which severity reaches 1.0 (beyond ~6 dB, pre-gain handles it)
la2aHighCrestSafetyMargin = 1.5 // dB, matches calculateLimiterCeiling safetyMargin
la2aHighCrestMinLimiterCeiling = -24.0 // dBTP, alimiter hardware floor
la2aHighCrestMaxDeficit = 6.0 // dB, deficit at which severity reaches 1.0 (beyond ~6 dB, pre-gain handles it)

// Default fallback values for sanitization
ds201DefaultHPFreq = 80.0
Expand Down Expand Up @@ -1316,11 +1313,11 @@ func applyHighCrestOverrides(config *FilterChainConfig, measurements *AudioMeasu
debugLog("high-crest: SpeechProfile is nil, using full-file InputI/InputTP for deficit calculation")
}

// Deficit calculation - mirrors calculateLimiterCeiling() in normalise.go
// Deficit calculation
gainRequired := NormTargetLUFS - measurements.InputI
projectedTP := measurements.InputTP + gainRequired
idealCeiling := config.LoudnormTargetTP - gainRequired - la2aHighCrestSafetyMargin
deficit := la2aHighCrestMinLimiterCeiling - idealCeiling
idealCeiling := config.LoudnormTargetTP - gainRequired - safetyMarginDB
deficit := minLimiterCeilingDB - idealCeiling

// Always populate diagnostic fields
config.LA2AHighCrestDeficit = deficit
Expand Down
32 changes: 14 additions & 18 deletions internal/processor/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,6 @@ func AnalyzeAudio(filename string, config *FilterChainConfig, progressCallback f
var intervalAcc intervalAccumulator
intervalAcc.reset() // Initialize with proper defaults
var intervalStartTime time.Duration
var lastFrameTime time.Duration // Track for end-of-file handling

// Track input frame time (before filter graph, which upsamples to 192kHz)
var inputSamplesProcessed int64
Expand All @@ -352,8 +351,6 @@ func AnalyzeAudio(filename string, config *FilterChainConfig, progressCallback f
// Calculate input frame time based on samples processed (before filter graph upsampling)
inputFrameTime := time.Duration(float64(inputSamplesProcessed) / inputSampleRate * float64(time.Second))
inputSamplesProcessed += int64(inputFrame.NbSamples())
lastFrameTime = inputFrameTime

// Accumulate RMS and peak from INPUT frame (before filter graph which upsamples to 192kHz)
// This gives accurate RMS and peak values matching the original audio levels
intervalAcc.addFrameRMSAndPeak(inputFrame)
Expand Down Expand Up @@ -400,7 +397,6 @@ func AnalyzeAudio(filename string, config *FilterChainConfig, progressCallback f
}

// Note: We intentionally discard partial intervals with no data
_ = lastFrameTime // Silence unused variable warning (used for debugging if needed)

// Free the filter graph
ffmpeg.AVFilterGraphFree(&filterGraph)
Expand Down Expand Up @@ -442,20 +438,20 @@ func AnalyzeAudio(filename string, config *FilterChainConfig, progressCallback f

// Calculate average spectral statistics from aspectralstats
if acc.spectralFrameCount > 0 {
frameCount := float64(acc.spectralFrameCount)
measurements.SpectralMean = acc.spectralMeanSum / frameCount
measurements.SpectralVariance = acc.spectralVarianceSum / frameCount
measurements.SpectralCentroid = acc.spectralCentroidSum / frameCount
measurements.SpectralSpread = acc.spectralSpreadSum / frameCount
measurements.SpectralSkewness = acc.spectralSkewnessSum / frameCount
measurements.SpectralKurtosis = acc.spectralKurtosisSum / frameCount
measurements.SpectralEntropy = acc.spectralEntropySum / frameCount
measurements.SpectralFlatness = acc.spectralFlatnessSum / frameCount
measurements.SpectralCrest = acc.spectralCrestSum / frameCount
measurements.SpectralFlux = acc.spectralFluxSum / frameCount
measurements.SpectralSlope = acc.spectralSlopeSum / frameCount
measurements.SpectralDecrease = acc.spectralDecreaseSum / frameCount
measurements.SpectralRolloff = acc.spectralRolloffSum / frameCount
spectralFrameCountF := float64(acc.spectralFrameCount)
measurements.SpectralMean = acc.spectralMeanSum / spectralFrameCountF
measurements.SpectralVariance = acc.spectralVarianceSum / spectralFrameCountF
measurements.SpectralCentroid = acc.spectralCentroidSum / spectralFrameCountF
measurements.SpectralSpread = acc.spectralSpreadSum / spectralFrameCountF
measurements.SpectralSkewness = acc.spectralSkewnessSum / spectralFrameCountF
measurements.SpectralKurtosis = acc.spectralKurtosisSum / spectralFrameCountF
measurements.SpectralEntropy = acc.spectralEntropySum / spectralFrameCountF
measurements.SpectralFlatness = acc.spectralFlatnessSum / spectralFrameCountF
measurements.SpectralCrest = acc.spectralCrestSum / spectralFrameCountF
measurements.SpectralFlux = acc.spectralFluxSum / spectralFrameCountF
measurements.SpectralSlope = acc.spectralSlopeSum / spectralFrameCountF
measurements.SpectralDecrease = acc.spectralDecreaseSum / spectralFrameCountF
measurements.SpectralRolloff = acc.spectralRolloffSum / spectralFrameCountF
}

// Store astats measurements (if captured)
Expand Down
75 changes: 0 additions & 75 deletions internal/processor/analyzer_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ type intervalAccumulator struct {
rawSampleCount int64 // Total sample count for this interval
rawPeakAbs float64 // Maximum absolute sample value (linear, 0.0-1.0) for this interval

// ─── Peak tracking (max per interval, from astats metadata) ─────────────────
peakMax float64 // Maximum peak level from astats (dBFS) - cumulative, less accurate

// ─── aspectralstats accumulators (valid per-window from FFmpeg) ─────────────
spectralMeanSum float64
spectralVarianceSum float64
Expand Down Expand Up @@ -109,9 +106,6 @@ type intervalFrameMetrics struct {
// add accumulates a frame's metrics into the interval.
func (a *intervalAccumulator) add(m intervalFrameMetrics) {
// Peak levels: keep maximum
if a.frameCount == 0 || m.PeakLevel > a.peakMax {
a.peakMax = m.PeakLevel
}
if a.frameCount == 0 || m.TruePeak > a.truePeakMax {
a.truePeakMax = m.TruePeak
}
Expand Down Expand Up @@ -314,9 +308,6 @@ func (a *intervalAccumulator) reset() {
a.rawSampleCount = 0
a.rawPeakAbs = 0

// Peak tracking (astats metadata)
a.peakMax = -120.0

// aspectralstats
a.spectralMeanSum = 0
a.spectralVarianceSum = 0
Expand Down Expand Up @@ -392,15 +383,6 @@ var (
metaKeyEbur128SamplePeak = ffmpeg.GlobalCStr("lavfi.r128.sample_peak")
metaKeyEbur128LRA = ffmpeg.GlobalCStr("lavfi.r128.LRA")
metaKeyEbur128TargetThresh = ffmpeg.GlobalCStr("lavfi.r128.target_threshold")

// Silence detection metadata keys (from silencedetect filter)
// For mono audio these are lavfi.silence_start.1, lavfi.silence_end.1, lavfi.silence_duration.1
metaKeySilenceStart = ffmpeg.GlobalCStr("lavfi.silence_start")
metaKeySilenceStart1 = ffmpeg.GlobalCStr("lavfi.silence_start.1")
metaKeySilenceEnd = ffmpeg.GlobalCStr("lavfi.silence_end")
metaKeySilenceEnd1 = ffmpeg.GlobalCStr("lavfi.silence_end.1")
metaKeySilenceDuration = ffmpeg.GlobalCStr("lavfi.silence_duration")
metaKeySilenceDur1 = ffmpeg.GlobalCStr("lavfi.silence_duration.1")
)

// metadataAccumulators holds all accumulator variables for frame metadata extraction.
Expand Down Expand Up @@ -557,13 +539,6 @@ type metadataAccumulators struct {
ebur128InputSP float64 // Sample peak
ebur128InputLRA float64
ebur128Found bool

// Silence detection (collected across frames)
// silencedetect sets lavfi.silence_start on first frame of silence,
// then lavfi.silence_end and lavfi.silence_duration on first frame after silence ends
silenceRegions []SilenceRegion
pendingSilenceStart float64 // Pending silence start timestamp (seconds)
hasPendingSilence bool // Whether we have a pending silence start
}

// getFloatMetadata extracts a float value from the metadata dictionary
Expand Down Expand Up @@ -828,56 +803,6 @@ func extractFrameMetadata(metadata *ffmpeg.AVDictionary, acc *metadataAccumulato
if value, ok := getFloatMetadata(metadata, metaKeyEbur128LRA); ok {
acc.ebur128InputLRA = value
}

// Extract silence detection metadata
// silencedetect sets lavfi.silence_start on the first frame of a silence region,
// then lavfi.silence_end and lavfi.silence_duration on the first frame after silence ends.
// For mono audio, these may be suffixed with .1
var silenceStart float64
var hasSilenceStart bool
if value, ok := getFloatMetadata(metadata, metaKeySilenceStart); ok {
silenceStart = value
hasSilenceStart = true
} else if value, ok := getFloatMetadata(metadata, metaKeySilenceStart1); ok {
silenceStart = value
hasSilenceStart = true
}

if hasSilenceStart {
acc.pendingSilenceStart = silenceStart
acc.hasPendingSilence = true
}

// Check for silence end - this completes a silence region
var silenceEnd, silenceDuration float64
var hasSilenceEnd bool
if value, ok := getFloatMetadata(metadata, metaKeySilenceEnd); ok {
silenceEnd = value
hasSilenceEnd = true
} else if value, ok := getFloatMetadata(metadata, metaKeySilenceEnd1); ok {
silenceEnd = value
hasSilenceEnd = true
}

if hasSilenceEnd {
// Get duration - try both keys
if value, ok := getFloatMetadata(metadata, metaKeySilenceDuration); ok {
silenceDuration = value
} else if value, ok := getFloatMetadata(metadata, metaKeySilenceDur1); ok {
silenceDuration = value
}

// Record the completed silence region
if acc.hasPendingSilence {
region := SilenceRegion{
Start: time.Duration(acc.pendingSilenceStart * float64(time.Second)),
End: time.Duration(silenceEnd * float64(time.Second)),
Duration: time.Duration(silenceDuration * float64(time.Second)),
}
acc.silenceRegions = append(acc.silenceRegions, region)
acc.hasPendingSilence = false
}
}
}

// outputMetadataAccumulators holds accumulator variables for Pass 2 output measurement extraction.
Expand Down
48 changes: 6 additions & 42 deletions internal/processor/analyzer_output.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,6 @@ type regionMeasurements struct {
FramesProcessed int64
}

// MeasureOutputSilenceRegion analyses the elected silence region in the output file
// to capture comprehensive metrics for before/after comparison and adaptive tuning.
//
// The region parameter should use the same Start/Duration as the NoiseProfile
// from Pass 1 analysis. Returns nil if the region cannot be measured.
//
// Returns full SilenceCandidateMetrics with all amplitude, spectral, and loudness measurements.
func MeasureOutputSilenceRegion(outputPath string, region SilenceRegion) (*SilenceCandidateMetrics, error) {
// Open the processed audio file
reader, _, err := audio.OpenAudioFile(outputPath)
if err != nil {
return nil, fmt.Errorf("failed to open output file: %w", err)
}
defer reader.Close()

return measureOutputSilenceRegionFromReader(reader, region)
}

// measureOutputRegionFromReader measures amplitude, spectral, and loudness
// metrics for a time region in an already-opened audio file. This is the
// shared implementation behind measureOutputSilenceRegionFromReader and
Expand Down Expand Up @@ -177,15 +159,15 @@ func measureOutputRegionFromReader(reader *audio.Reader, start, duration time.Du
// measureOutputSilenceRegionFromReader measures a silence region and maps
// the result to SilenceCandidateMetrics.
func measureOutputSilenceRegionFromReader(reader *audio.Reader, region SilenceRegion) (*SilenceCandidateMetrics, error) {
debugLog("=== MeasureOutputSilenceRegion: start=%.3fs, duration=%.3fs ===",
debugLog("=== measureOutputSilenceRegion: start=%.3fs, duration=%.3fs ===",
region.Start.Seconds(), region.Duration.Seconds())

result, err := measureOutputRegionFromReader(reader, region.Start, region.Duration)
if err != nil {
return nil, err
}

debugLog("=== MeasureOutputSilenceRegion SUMMARY ===")
debugLog("=== measureOutputSilenceRegion SUMMARY ===")

return &SilenceCandidateMetrics{
Region: region,
Expand All @@ -202,8 +184,8 @@ func measureOutputSilenceRegionFromReader(reader *audio.Reader, region SilenceRe

// MeasureOutputRegions measures both silence and speech regions from the same
// output file in a single open/close cycle. This avoids redundant file opens,
// demuxing, and decoding that would occur when calling MeasureOutputSilenceRegion
// and MeasureOutputSpeechRegion independently.
// demuxing, and decoding that would occur if silence and speech regions were
// measured in separate passes.
//
// Either region parameter may be nil to skip that measurement. Returns nil for
// any skipped or failed measurement (non-fatal - matches existing behaviour).
Expand Down Expand Up @@ -251,36 +233,18 @@ func MeasureOutputRegions(outputPath string, silenceRegion *SilenceRegion, speec
return silenceMetrics, nil
}

// MeasureOutputSpeechRegion analyses a speech region in the output file
// to capture comprehensive metrics for adaptive filter tuning and validation.
//
// The region parameter should identify a representative speech section from
// the processed audio. Returns nil if the region cannot be measured.
//
// Returns full SpeechCandidateMetrics with all amplitude, spectral, and loudness measurements.
func MeasureOutputSpeechRegion(outputPath string, region SpeechRegion) (*SpeechCandidateMetrics, error) {
// Open the processed audio file
reader, _, err := audio.OpenAudioFile(outputPath)
if err != nil {
return nil, fmt.Errorf("failed to open output file: %w", err)
}
defer reader.Close()

return measureOutputSpeechRegionFromReader(reader, region)
}

// measureOutputSpeechRegionFromReader measures a speech region and maps
// the result to SpeechCandidateMetrics.
func measureOutputSpeechRegionFromReader(reader *audio.Reader, region SpeechRegion) (*SpeechCandidateMetrics, error) {
debugLog("=== MeasureOutputSpeechRegion: start=%.3fs, duration=%.3fs ===",
debugLog("=== measureOutputSpeechRegion: start=%.3fs, duration=%.3fs ===",
region.Start.Seconds(), region.Duration.Seconds())

result, err := measureOutputRegionFromReader(reader, region.Start, region.Duration)
if err != nil {
return nil, err
}

debugLog("=== MeasureOutputSpeechRegion SUMMARY ===")
debugLog("=== measureOutputSpeechRegion SUMMARY ===")

return &SpeechCandidateMetrics{
Region: region,
Expand Down
39 changes: 39 additions & 0 deletions internal/processor/analyzer_output_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package processor

import (
"fmt"

"github.com/linuxmatters/jivetalking/internal/audio"
)

// measureOutputSilenceRegion analyses the elected silence region in the output file
// to capture comprehensive metrics for before/after comparison and adaptive tuning.
//
// The region parameter should use the same Start/Duration as the NoiseProfile
// from Pass 1 analysis. Returns nil if the region cannot be measured.
func measureOutputSilenceRegion(outputPath string, region SilenceRegion) (*SilenceCandidateMetrics, error) {
// Open the processed audio file
reader, _, err := audio.OpenAudioFile(outputPath)
if err != nil {
return nil, fmt.Errorf("failed to open output file: %w", err)
}
defer reader.Close()

return measureOutputSilenceRegionFromReader(reader, region)
}

// measureOutputSpeechRegion analyses a speech region in the output file
// to capture comprehensive metrics for adaptive filter tuning and validation.
//
// The region parameter should identify a representative speech section from
// the processed audio. Returns nil if the region cannot be measured.
func measureOutputSpeechRegion(outputPath string, region SpeechRegion) (*SpeechCandidateMetrics, error) {
// Open the processed audio file
reader, _, err := audio.OpenAudioFile(outputPath)
if err != nil {
return nil, fmt.Errorf("failed to open output file: %w", err)
}
defer reader.Close()

return measureOutputSpeechRegionFromReader(reader, region)
}
Loading