From 48f252a9fe56d117df8f8aaef1e5b930dd5ecdc0 Mon Sep 17 00:00:00 2001 From: Daphne Pareas Date: Wed, 29 Apr 2026 14:13:28 -0400 Subject: [PATCH] Rename Hostname label to hostname All the other labels are all uppercase or lowercase Joining it with slurm-derived metrics (label hostname identical except for case) activates weird Grafana bugs such as duplicate timeseries Tested: DISABLE_STARTUP_VALIDATE=true go test ./internal/pkg/rendermetrics/... --- internal/pkg/integration_test/collector_test.go | 2 +- internal/pkg/rendermetrics/render_metrics.go | 8 ++++---- internal/pkg/rendermetrics/render_metrics_test.go | 8 ++++---- internal/pkg/server/server_test.go | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/internal/pkg/integration_test/collector_test.go b/internal/pkg/integration_test/collector_test.go index 7a2099e8..5c186575 100644 --- a/internal/pkg/integration_test/collector_test.go +++ b/internal/pkg/integration_test/collector_test.go @@ -769,7 +769,7 @@ func TestXIDCollector_Gather_Encode(t *testing.T) { assert.NotEmpty(t, *mv.Label[2].Value) assert.Equal(t, "device", *mv.Label[3].Name) assert.Equal(t, "modelName", *mv.Label[4].Name) - assert.Equal(t, "Hostname", *mv.Label[5].Name) + assert.Equal(t, "hostname", *mv.Label[5].Name) assert.Equal(t, "window_size_in_ms", *mv.Label[6].Name) assert.Equal(t, "xid", *mv.Label[7].Name) assert.NotEmpty(t, *mv.Label[7].Value) diff --git a/internal/pkg/rendermetrics/render_metrics.go b/internal/pkg/rendermetrics/render_metrics.go index 712fc750..7b171115 100644 --- a/internal/pkg/rendermetrics/render_metrics.go +++ b/internal/pkg/rendermetrics/render_metrics.go @@ -44,7 +44,7 @@ var ( # HELP {{ $counter.FieldName }} {{ $counter.Help }} # TYPE {{ $counter.FieldName }} {{ $counter.PromType }} {{- range $metric := $metrics }} -{{ $counter.FieldName }}{gpu="{{ $metric.GPU }}",{{ $metric.UUID }}="{{ $metric.GPUUUID }}",pci_bus_id="{{ $metric.GPUPCIBusID }}",device="{{ $metric.GPUDevice }}",modelName="{{ $metric.GPUModelName }}"{{if $metric.MigProfile}},GPU_I_PROFILE="{{ $metric.MigProfile }}",GPU_I_ID="{{ $metric.GPUInstanceID }}"{{end}}{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}} +{{ $counter.FieldName }}{gpu="{{ $metric.GPU }}",{{ $metric.UUID }}="{{ $metric.GPUUUID }}",pci_bus_id="{{ $metric.GPUPCIBusID }}",device="{{ $metric.GPUDevice }}",modelName="{{ $metric.GPUModelName }}"{{if $metric.MigProfile}},GPU_I_PROFILE="{{ $metric.MigProfile }}",GPU_I_ID="{{ $metric.GPUInstanceID }}"{{end}}{{if $metric.Hostname }},hostname="{{ $metric.Hostname }}"{{end}} {{- range $k, $v := $metric.Labels -}} ,{{ $k }}="{{ $v }}" @@ -80,7 +80,7 @@ var ( # HELP {{ $counter.FieldName }} {{ $counter.Help }} # TYPE {{ $counter.FieldName }} {{ $counter.PromType }} {{- range $metric := $metrics }} -{{ $counter.FieldName }}{nvswitch="{{ $metric.NvSwitch }}"{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}} +{{ $counter.FieldName }}{nvswitch="{{ $metric.NvSwitch }}"{{if $metric.Hostname }},hostname="{{ $metric.Hostname }}"{{end}} {{- range $k, $v := $metric.Labels -}} ,{{ $k }}="{{ $v }}" @@ -94,7 +94,7 @@ var ( # HELP {{ $counter.FieldName }} {{ $counter.Help }} # TYPE {{ $counter.FieldName }} {{ $counter.PromType }} {{- range $metric := $metrics }} -{{ $counter.FieldName }}{cpu="{{ $metric.GPU }}"{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}} +{{ $counter.FieldName }}{cpu="{{ $metric.GPU }}"{{if $metric.Hostname }},hostname="{{ $metric.Hostname }}"{{end}} {{- range $k, $v := $metric.Labels -}} ,{{ $k }}="{{ $v }}" @@ -108,7 +108,7 @@ var ( # HELP {{ $counter.FieldName }} {{ $counter.Help }} # TYPE {{ $counter.FieldName }} {{ $counter.PromType }} {{- range $metric := $metrics }} -{{ $counter.FieldName }}{cpucore="{{ $metric.GPU }}",cpu="{{ $metric.GPUDevice }}"{{if $metric.Hostname }},Hostname="{{ $metric.Hostname }}"{{end}} +{{ $counter.FieldName }}{cpucore="{{ $metric.GPU }}",cpu="{{ $metric.GPUDevice }}"{{if $metric.Hostname }},hostname="{{ $metric.Hostname }}"{{end}} {{- range $k, $v := $metric.Labels -}} ,{{ $k }}="{{ $v }}" diff --git a/internal/pkg/rendermetrics/render_metrics_test.go b/internal/pkg/rendermetrics/render_metrics_test.go index 1947ca91..d632eacc 100644 --- a/internal/pkg/rendermetrics/render_metrics_test.go +++ b/internal/pkg/rendermetrics/render_metrics_test.go @@ -73,7 +73,7 @@ func Test_render(t *testing.T) { metrics: metrics, want: `# HELP TEST_METRIC # TYPE TEST_METRIC gauge -TEST_METRIC{gpu="0",test-uuid="GPU-test-uuid-0000-0000-0000-000000000000",pci_bus_id="",device="testdevice",modelName="Test GPU Model",Hostname="testhost"} 42 +TEST_METRIC{gpu="0",test-uuid="GPU-test-uuid-0000-0000-0000-000000000000",pci_bus_id="",device="testdevice",modelName="Test GPU Model",hostname="testhost"} 42 `, }, { @@ -82,7 +82,7 @@ TEST_METRIC{gpu="0",test-uuid="GPU-test-uuid-0000-0000-0000-000000000000",pci_bu metrics: metrics, want: `# HELP TEST_METRIC # TYPE TEST_METRIC gauge -TEST_METRIC{nvswitch="0",Hostname="testhost"} 42 +TEST_METRIC{nvswitch="0",hostname="testhost"} 42 `, }, { @@ -100,7 +100,7 @@ TEST_METRIC{nvlink="0",nvswitch="0",gpu="0",gpu_uuid="GPU-test-uuid-0000-0000-00 metrics: metrics, want: `# HELP TEST_METRIC # TYPE TEST_METRIC gauge -TEST_METRIC{cpu="0",Hostname="testhost"} 42 +TEST_METRIC{cpu="0",hostname="testhost"} 42 `, }, { @@ -109,7 +109,7 @@ TEST_METRIC{cpu="0",Hostname="testhost"} 42 metrics: metrics, want: `# HELP TEST_METRIC # TYPE TEST_METRIC gauge -TEST_METRIC{cpucore="0",cpu="testdevice",Hostname="testhost"} 42 +TEST_METRIC{cpucore="0",cpu="testdevice",hostname="testhost"} 42 `, }, { diff --git a/internal/pkg/server/server_test.go b/internal/pkg/server/server_test.go index 07fdad61..8f61c0ef 100644 --- a/internal/pkg/server/server_test.go +++ b/internal/pkg/server/server_test.go @@ -44,7 +44,7 @@ import ( const expectedResponse = `# HELP TEST_METRIC # TYPE TEST_METRIC gauge -TEST_METRIC{gpu="0",UUID="GPU-00000000-0000-0000-0000-000000000000",pci_bus_id="",device="nvidia0",modelName="NVIDIA T400 4GB",Hostname="testhost"} 42 +TEST_METRIC{gpu="0",UUID="GPU-00000000-0000-0000-0000-000000000000",pci_bus_id="",device="nvidia0",modelName="NVIDIA T400 4GB",hostname="testhost"} 42 ` var deviceWatcher = devicewatcher.NewDeviceWatcher()