From 8ac6d23768130edcd552291b496e335d86e26374 Mon Sep 17 00:00:00 2001 From: Kevin Burke Date: Fri, 12 Dec 2025 13:37:51 -0800 Subject: [PATCH] engine: fix stats reporting - Ensure that any user provided tags are included with the go-version and stats version reporting tags, this ensures better correlation on the Datadog side. - Don't report a timestamp with the go/stats version reporting, to avoid problems where Prometheus says that the metric is too old (we only report it one time). --- engine.go | 20 ++++---- engine_test.go | 137 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+), 10 deletions(-) diff --git a/engine.go b/engine.go index fb2d3d2e..af8ba00d 100644 --- a/engine.go +++ b/engine.go @@ -159,7 +159,7 @@ var truthyValues = map[string]bool{ var GoVersionReportingEnabled = !truthyValues[os.Getenv("STATS_DISABLE_GO_VERSION_REPORTING")] -func (e *Engine) reportVersionOnce(t time.Time) { +func (e *Engine) reportVersionOnce() { if !GoVersionReportingEnabled { return } @@ -167,6 +167,8 @@ func (e *Engine) reportVersionOnce(t time.Time) { // configure it after creation time with e.g. the Register function. So // instead we try to do it at the moment you try to send your first metric. e.once.Do(func() { + // Include engine tags so version metrics can be correlated with + // service-identifying tags (e.g. service:myapp). measures := []Measure{ { Name: "stats_version", @@ -174,9 +176,7 @@ func (e *Engine) reportVersionOnce(t time.Time) { Name: "value", Value: intValue(1), }}, - Tags: []Tag{ - {"stats_version", version.Version}, - }, + Tags: mergeTags(e.Tags, []Tag{{"stats_version", version.Version}}), }, } // We don't want to report weird compiled Go versions like "devel" with @@ -189,17 +189,17 @@ func (e *Engine) reportVersionOnce(t time.Time) { Name: "value", Value: intValue(1), }}, - Tags: []Tag{ - {"go_version", version.GoVersion()}, - }, + Tags: mergeTags(e.Tags, []Tag{{"go_version", version.GoVersion()}}), }) } - e.Handler.HandleMeasures(t, measures...) + // Use zero time so Prometheus doesn't include a stale timestamp. + // Prometheus will use scrape time instead. Datadog ignores timestamps. + e.Handler.HandleMeasures(time.Time{}, measures...) }) } func (e *Engine) measure(t time.Time, name string, value interface{}, ftype FieldType, tags ...Tag) { - e.reportVersionOnce(t) + e.reportVersionOnce() e.measureOne(t, name, value, ftype, tags...) } @@ -248,7 +248,7 @@ func (e *Engine) Report(metrics interface{}, tags ...Tag) { // type struct, pointer to struct, or a slice or array to one of those. See // MakeMeasures for details about how to make struct types exposing metrics. func (e *Engine) ReportAt(t time.Time, metrics interface{}, tags ...Tag) { - e.reportVersionOnce(t) + e.reportVersionOnce() var tb *tagsBuffer if len(tags) == 0 { diff --git a/engine_test.go b/engine_test.go index 6c28d2e5..f263cf15 100644 --- a/engine_test.go +++ b/engine_test.go @@ -561,3 +561,140 @@ func (t *discardTransport) RoundTrip(req *http.Request) (*http.Response, error) Request: req, }, nil } + +// TestVersionMetricsIncludeEngineTags verifies that version metrics include +// the engine's configured tags for service correlation. +func TestVersionMetricsIncludeEngineTags(t *testing.T) { + initValue := stats.GoVersionReportingEnabled + stats.GoVersionReportingEnabled = true + defer func() { stats.GoVersionReportingEnabled = initValue }() + + h := &statstest.Handler{} + e := stats.NewEngine("test", h, stats.T("service", "my-app"), stats.T("env", "prod")) + + // Trigger version reporting by sending a metric + e.Incr("trigger") + + measures := h.Measures() + + // Find version metrics + var statsVersionMeasure, goVersionMeasure *stats.Measure + for i := range measures { + switch measures[i].Name { + case "stats_version": + statsVersionMeasure = &measures[i] + case "go_version": + goVersionMeasure = &measures[i] + } + } + + if statsVersionMeasure == nil { + t.Fatal("stats_version metric not found") + } + + // Check that engine tags are present in stats_version + hasServiceTag := false + hasEnvTag := false + hasVersionTag := false + for _, tag := range statsVersionMeasure.Tags { + switch tag.Name { + case "service": + if tag.Value == "my-app" { + hasServiceTag = true + } + case "env": + if tag.Value == "prod" { + hasEnvTag = true + } + case "stats_version": + hasVersionTag = true + } + } + + if !hasServiceTag { + t.Errorf("stats_version missing service tag, got tags: %v", statsVersionMeasure.Tags) + } + if !hasEnvTag { + t.Errorf("stats_version missing env tag, got tags: %v", statsVersionMeasure.Tags) + } + if !hasVersionTag { + t.Errorf("stats_version missing stats_version tag, got tags: %v", statsVersionMeasure.Tags) + } + + // Check go_version if present (may be skipped for devel versions) + if goVersionMeasure != nil { + hasServiceTag = false + hasEnvTag = false + hasGoVersionTag := false + for _, tag := range goVersionMeasure.Tags { + switch tag.Name { + case "service": + if tag.Value == "my-app" { + hasServiceTag = true + } + case "env": + if tag.Value == "prod" { + hasEnvTag = true + } + case "go_version": + hasGoVersionTag = true + } + } + + if !hasServiceTag { + t.Errorf("go_version missing service tag, got tags: %v", goVersionMeasure.Tags) + } + if !hasEnvTag { + t.Errorf("go_version missing env tag, got tags: %v", goVersionMeasure.Tags) + } + if !hasGoVersionTag { + t.Errorf("go_version missing go_version tag, got tags: %v", goVersionMeasure.Tags) + } + } +} + +// TestVersionMetricsPrometheusNoTimestamp verifies that version metrics +// are reported with zero time so Prometheus doesn't include a stale timestamp. +func TestVersionMetricsPrometheusNoTimestamp(t *testing.T) { + initValue := stats.GoVersionReportingEnabled + stats.GoVersionReportingEnabled = true + defer func() { stats.GoVersionReportingEnabled = initValue }() + + h := &prometheus.Handler{} + e := stats.NewEngine("", h) + + // Trigger version reporting + e.Incr("trigger") + + // Get prometheus output + var buf strings.Builder + h.WriteStats(&buf) + output := buf.String() + + // The prometheus output should NOT contain timestamps for version metrics. + // Timestamps in Prometheus format appear as a number after the value, e.g.: + // metric_name 1 1496614320000 + // If there's no timestamp, it's just: + // metric_name 1 + + lines := strings.Split(output, "\n") + for _, line := range lines { + // Skip empty lines, comments, and TYPE/HELP lines + if line == "" || strings.HasPrefix(line, "#") { + continue + } + + // Check version metric lines + if strings.HasPrefix(line, "stats_version") || strings.HasPrefix(line, "go_version") { + // Count space-separated parts. With timestamp: "name{labels} value timestamp" + // Without timestamp: "name{labels} value" + parts := strings.Fields(line) + // After removing the metric name (possibly with labels), we should have just the value + // The metric line format is: name{label="value"} value [timestamp] + // So we expect 2 parts without timestamp, 3 with timestamp + if len(parts) > 2 { + t.Errorf("version metric appears to have timestamp: %q", line) + } + } + } +}