diff --git a/cmd/optiqor/golden_test.go b/cmd/optiqor/golden_test.go index 3f93bc8..d2331ff 100644 --- a/cmd/optiqor/golden_test.go +++ b/cmd/optiqor/golden_test.go @@ -67,11 +67,19 @@ func TestCmd_Golden_Stable(t *testing.T) { // normalize strips the test's cwd and the repo root from filepath.Abs // output so goldens stay bit-identical across laptops and CI runners. func normalize(s string) string { + s = strings.ReplaceAll(s, `\`, `/`) + s = strings.ReplaceAll(s, `//`, `/`) + s = strings.ReplaceAll(s, `https:/`, `https://`) + s = strings.ReplaceAll(s, `http:/`, `http://`) + if cwd, err := os.Getwd(); err == nil { - s = strings.ReplaceAll(s, cwd, "") + cwdNorm := strings.ReplaceAll(cwd, `\`, `/`) + if repo, err := filepath.Abs(filepath.Join(cwd, "..", "..")); err == nil { - s = strings.ReplaceAll(s, repo, "") + repoNorm := strings.ReplaceAll(repo, `\`, `/`) + s = strings.ReplaceAll(s, repoNorm, "") } + s = strings.ReplaceAll(s, cwdNorm, "") } return s } diff --git a/pkg/rules/excessive_replicas.go b/pkg/rules/excessive_replicas.go index 0f88349..af0b9cd 100644 --- a/pkg/rules/excessive_replicas.go +++ b/pkg/rules/excessive_replicas.go @@ -36,5 +36,13 @@ func (excessiveReplicaCount) Run(w parser.Workload) []Finding { Detail: fmt.Sprintf("Replicas set to %d. Past ~20 replicas the cost grows linearly while the marginal availability gain approaches zero — most cloud zones can't fit that many across enough fault domains. Cap the HPA's maxReplicas or split the workload across multiple deployments.", w.Replicas), Severity: SeverityMed, Confidence: ConfidenceMed, + Signal: &Signal{ + Label: "replicas", + Have: float64(excessiveReplicaThreshold), + Want: float64(w.Replicas), + HaveDisplay: fmt.Sprintf("%d", excessiveReplicaThreshold), + WantDisplay: fmt.Sprintf("%d", w.Replicas), + Note: fmt.Sprintf("%d replicas", w.Replicas), + }, }} } diff --git a/pkg/rules/incomplete_request.go b/pkg/rules/incomplete_request.go index b4528ee..d14db0b 100644 --- a/pkg/rules/incomplete_request.go +++ b/pkg/rules/incomplete_request.go @@ -33,6 +33,14 @@ func (cpuWithoutMemoryRequest) Run(w parser.Workload) []Finding { Detail: "requests.cpu is set but requests.memory is not. The scheduler can't reserve memory accurately, so the pod is BestEffort for memory — first to evict under pressure. Add a memory request matching observed P95.", Severity: SeverityLow, Confidence: ConfidenceHigh, + Signal: &Signal{ + Label: "memory", + Have: 0, + Want: 0, + HaveDisplay: "unset", + WantDisplay: "required", + Note: "memory request missing", + }, }} } @@ -54,5 +62,13 @@ func (memoryWithoutCPURequest) Run(w parser.Workload) []Finding { Detail: "requests.memory is set but requests.cpu is not. The scheduler can't reserve CPU, so bin-packing assumes zero — pods can stack onto a single node and starve each other. Add a CPU request matching observed P95.", Severity: SeverityLow, Confidence: ConfidenceHigh, + Signal: &Signal{ + Label: "CPU", + Have: 0, + Want: 0, + HaveDisplay: "unset", + WantDisplay: "required", + Note: "CPU request missing", + }, }} } diff --git a/pkg/rules/oversized_limits.go b/pkg/rules/oversized_limits.go index 2bdda32..ca31b0f 100644 --- a/pkg/rules/oversized_limits.go +++ b/pkg/rules/oversized_limits.go @@ -38,6 +38,14 @@ func (oversizedCPULimit) Run(w parser.Workload) []Finding { Detail: fmt.Sprintf("CPU limit is %s. Above 4 vCPU, the pod can only land on large nodes; smaller / Spot instance types are excluded from bin-packing. Either split the workload or confirm the high limit is justified by P99.", w.Limits.CPU), Severity: SeverityMed, Confidence: ConfidenceMed, + Signal: &Signal{ + Label: "CPU", + Have: float64(w.Requests.CPU.Value), + Want: float64(w.Limits.CPU.Value), + HaveDisplay: w.Requests.CPU.String(), + WantDisplay: w.Limits.CPU.String(), + Note: "limit > 4 vCPU", + }, }} } @@ -59,5 +67,13 @@ func (oversizedMemoryLimit) Run(w parser.Workload) []Finding { Detail: fmt.Sprintf("Memory limit is %s. Above 16 GiB, the pod can only land on memory-class nodes; balanced / Spot bin-packing is excluded. Confirm the workload genuinely uses this much, or split the workload.", w.Limits.Memory), Severity: SeverityMed, Confidence: ConfidenceMed, + Signal: &Signal{ + Label: "memory", + Have: float64(w.Requests.Memory.Value), + Want: float64(w.Limits.Memory.Value), + HaveDisplay: w.Requests.Memory.String(), + WantDisplay: w.Limits.Memory.String(), + Note: "limit > 16 GiB", + }, }} } diff --git a/pkg/rules/tiny_request.go b/pkg/rules/tiny_request.go index 89b5060..39b0dee 100644 --- a/pkg/rules/tiny_request.go +++ b/pkg/rules/tiny_request.go @@ -38,6 +38,14 @@ func (tinyCPURequest) Run(w parser.Workload) []Finding { Detail: fmt.Sprintf("requests.cpu is %s — below the 10m threshold most charts use as a sentinel. Probably a placeholder from a Helm scaffold. Set it to your observed P95 (or remove the limit-without-request asymmetry the scheduler is currently dealing with).", w.Requests.CPU), Severity: SeverityLow, Confidence: ConfidenceHigh, + Signal: &Signal{ + Label: "CPU", + Have: float64(w.Requests.CPU.Value), + Want: float64(tinyCPUMillicores), + HaveDisplay: w.Requests.CPU.String(), + WantDisplay: "10m", + Note: "below 10m sentinel", + }, }} } @@ -62,5 +70,13 @@ func (tinyMemoryRequest) Run(w parser.Workload) []Finding { Detail: fmt.Sprintf("requests.memory is %s — below 32 MiB. A workload that genuinely needs less memory is a rarity; most charts setting tiny memory requests are using a placeholder. Set it to your observed P95.", w.Requests.Memory), Severity: SeverityLow, Confidence: ConfidenceHigh, + Signal: &Signal{ + Label: "memory", + Have: float64(w.Requests.Memory.Value), + Want: float64(tinyMemoryBytes), + HaveDisplay: w.Requests.Memory.String(), + WantDisplay: "32Mi", + Note: "below 32Mi sentinel", + }, }} } diff --git a/testdata/golden/analyze_fixture_plain.txt b/testdata/golden/analyze_fixture_plain.txt index 15c9d01..337e71c 100644 --- a/testdata/golden/analyze_fixture_plain.txt +++ b/testdata/golden/analyze_fixture_plain.txt @@ -156,6 +156,8 @@ │ │ │ Replica count past the HA inflection point │ │ │ + │ replicas 20 ███████████████████░░░░░ 25 25 replicas │ + │ │ │ Replicas set to 25. Past ~20 replicas the cost grows linearly while the │ │ marginal availability gain approaches zero — most cloud zones can't fit │ │ that many across enough fault domains. Cap the HPA's maxReplicas or split │ @@ -168,6 +170,8 @@ │ │ │ CPU limit forces large-node scheduling │ │ │ + │ CPU 2 ██████░░░░░░░░░░░░░░░░░░ 8 limit > 4 vCPU │ + │ │ │ CPU limit is 8. Above 4 vCPU, the pod can only land on large nodes; │ │ smaller / Spot instance types are excluded from bin-packing. Either split │ │ the workload or confirm the high limit is justified by P99. │ @@ -179,6 +183,8 @@ │ │ │ Memory limit forces large-node scheduling │ │ │ + │ memory 8Gi ██████░░░░░░░░░░░░░░░░░░ 32Gi limit > 16 GiB │ + │ │ │ Memory limit is 32Gi. Above 16 GiB, the pod can only land on memory-class │ │ nodes; balanced / Spot bin-packing is excluded. Confirm the workload │ │ genuinely uses this much, or split the workload. │ @@ -286,6 +292,8 @@ │ │ │ CPU request set without memory request │ │ │ + │ memory unset ░░░░░░░░░░░░░░░░░░░░░░░░ required memory request missing │ + │ │ │ requests.cpu is set but requests.memory is not. The scheduler can't │ │ reserve memory accurately, so the pod is BestEffort for memory — first │ │ to evict under pressure. Add a memory request matching observed P95. │ @@ -297,6 +305,8 @@ │ │ │ CPU request below the placeholder threshold │ │ │ + │ CPU 5m ████████████░░░░░░░░░░░░ 10m below 10m sentinel │ + │ │ │ requests.cpu is 5m — below the 10m threshold most charts use as a │ │ sentinel. Probably a placeholder from a Helm scaffold. Set it to your │ │ observed P95 (or remove the limit-without-request asymmetry the scheduler │ @@ -309,6 +319,8 @@ │ │ │ Memory request set without CPU request │ │ │ + │ CPU unset ░░░░░░░░░░░░░░░░░░░░░░░░ required CPU request missing │ + │ │ │ requests.memory is set but requests.cpu is not. The scheduler can't │ │ reserve CPU, so bin-packing assumes zero — pods can stack onto a single │ │ node and starve each other. Add a CPU request matching observed P95. │ @@ -320,6 +332,8 @@ │ │ │ Memory request below the placeholder threshold │ │ │ + │ memory 16Mi ████████████░░░░░░░░░░░░ 32Mi below 32Mi sentinel │ + │ │ │ requests.memory is 16Mi — below 32 MiB. A workload that genuinely needs │ │ less memory is a rarity; most charts setting tiny memory requests are │ │ using a placeholder. Set it to your observed P95. │ diff --git a/testdata/golden/demo_json.txt b/testdata/golden/demo_json.txt index 09191db..662a7a0 100644 --- a/testdata/golden/demo_json.txt +++ b/testdata/golden/demo_json.txt @@ -470,7 +470,14 @@ "Severity": "LOW", "Confidence": "high", "Category": "cost", - "Signal": null + "Signal": { + "label": "memory", + "have": 0, + "want": 0, + "have_display": "unset", + "want_display": "required", + "note": "memory request missing" + } }, { "DetectorID": "tiny-cpu-request", @@ -481,7 +488,14 @@ "Severity": "LOW", "Confidence": "high", "Category": "cost", - "Signal": null + "Signal": { + "label": "CPU", + "have": 5, + "want": 10, + "have_display": "5m", + "want_display": "10m", + "note": "below 10m sentinel" + } }, { "DetectorID": "allow-privilege-escalation", @@ -565,7 +579,14 @@ "Severity": "MED", "Confidence": "medium", "Category": "cost", - "Signal": null + "Signal": { + "label": "replicas", + "have": 20, + "want": 25, + "have_display": "20", + "want_display": "25", + "note": "25 replicas" + } }, { "DetectorID": "oversized-cpu-limit", @@ -576,7 +597,14 @@ "Severity": "MED", "Confidence": "medium", "Category": "cost", - "Signal": null + "Signal": { + "label": "CPU", + "have": 2000, + "want": 8000, + "have_display": "2", + "want_display": "8", + "note": "limit /u003e 4 vCPU" + } }, { "DetectorID": "oversized-memory-limit", @@ -587,7 +615,14 @@ "Severity": "MED", "Confidence": "medium", "Category": "cost", - "Signal": null + "Signal": { + "label": "memory", + "have": 8589934592, + "want": 34359738368, + "have_display": "8Gi", + "want_display": "32Gi", + "note": "limit /u003e 16 GiB" + } }, { "DetectorID": "replicas-too-high", @@ -638,7 +673,14 @@ "Severity": "LOW", "Confidence": "high", "Category": "cost", - "Signal": null + "Signal": { + "label": "CPU", + "have": 0, + "want": 0, + "have_display": "unset", + "want_display": "required", + "note": "CPU request missing" + } }, { "DetectorID": "tiny-memory-request", @@ -649,7 +691,14 @@ "Severity": "LOW", "Confidence": "high", "Category": "cost", - "Signal": null + "Signal": { + "label": "memory", + "have": 16777216, + "want": 33554432, + "have_display": "16Mi", + "want_display": "32Mi", + "note": "below 32Mi sentinel" + } }, { "DetectorID": "dangerous-capability-added", @@ -1174,7 +1223,14 @@ "Severity": "LOW", "Confidence": "high", "Category": "cost", - "Signal": null + "Signal": { + "label": "memory", + "have": 0, + "want": 0, + "have_display": "unset", + "want_display": "required", + "note": "memory request missing" + } }, { "DetectorID": "tiny-cpu-request", @@ -1185,7 +1241,14 @@ "Severity": "LOW", "Confidence": "high", "Category": "cost", - "Signal": null + "Signal": { + "label": "CPU", + "have": 5, + "want": 10, + "have_display": "5m", + "want_display": "10m", + "note": "below 10m sentinel" + } }, { "DetectorID": "cpu-limit-far-above-request", @@ -1214,7 +1277,14 @@ "Severity": "MED", "Confidence": "medium", "Category": "cost", - "Signal": null + "Signal": { + "label": "replicas", + "have": 20, + "want": 25, + "have_display": "20", + "want_display": "25", + "note": "25 replicas" + } }, { "DetectorID": "oversized-cpu-limit", @@ -1225,7 +1295,14 @@ "Severity": "MED", "Confidence": "medium", "Category": "cost", - "Signal": null + "Signal": { + "label": "CPU", + "have": 2000, + "want": 8000, + "have_display": "2", + "want_display": "8", + "note": "limit /u003e 4 vCPU" + } }, { "DetectorID": "oversized-memory-limit", @@ -1236,7 +1313,14 @@ "Severity": "MED", "Confidence": "medium", "Category": "cost", - "Signal": null + "Signal": { + "label": "memory", + "have": 8589934592, + "want": 34359738368, + "have_display": "8Gi", + "want_display": "32Gi", + "note": "limit /u003e 16 GiB" + } }, { "DetectorID": "replicas-too-high", @@ -1265,7 +1349,14 @@ "Severity": "LOW", "Confidence": "high", "Category": "cost", - "Signal": null + "Signal": { + "label": "CPU", + "have": 0, + "want": 0, + "have_display": "unset", + "want_display": "required", + "note": "CPU request missing" + } }, { "DetectorID": "tiny-memory-request", @@ -1276,7 +1367,14 @@ "Severity": "LOW", "Confidence": "high", "Category": "cost", - "Signal": null + "Signal": { + "label": "memory", + "have": 16777216, + "want": 33554432, + "have_display": "16Mi", + "want_display": "32Mi", + "note": "below 32Mi sentinel" + } }, { "DetectorID": "cpu-limit-far-above-request", diff --git a/testdata/golden/demo_plain.txt b/testdata/golden/demo_plain.txt index 4b7a54e..17a6f5c 100644 --- a/testdata/golden/demo_plain.txt +++ b/testdata/golden/demo_plain.txt @@ -156,6 +156,8 @@ │ │ │ Replica count past the HA inflection point │ │ │ + │ replicas 20 ███████████████████░░░░░ 25 25 replicas │ + │ │ │ Replicas set to 25. Past ~20 replicas the cost grows linearly while the │ │ marginal availability gain approaches zero — most cloud zones can't fit │ │ that many across enough fault domains. Cap the HPA's maxReplicas or split │ @@ -168,6 +170,8 @@ │ │ │ CPU limit forces large-node scheduling │ │ │ + │ CPU 2 ██████░░░░░░░░░░░░░░░░░░ 8 limit > 4 vCPU │ + │ │ │ CPU limit is 8. Above 4 vCPU, the pod can only land on large nodes; │ │ smaller / Spot instance types are excluded from bin-packing. Either split │ │ the workload or confirm the high limit is justified by P99. │ @@ -179,6 +183,8 @@ │ │ │ Memory limit forces large-node scheduling │ │ │ + │ memory 8Gi ██████░░░░░░░░░░░░░░░░░░ 32Gi limit > 16 GiB │ + │ │ │ Memory limit is 32Gi. Above 16 GiB, the pod can only land on memory-class │ │ nodes; balanced / Spot bin-packing is excluded. Confirm the workload │ │ genuinely uses this much, or split the workload. │ @@ -286,6 +292,8 @@ │ │ │ CPU request set without memory request │ │ │ + │ memory unset ░░░░░░░░░░░░░░░░░░░░░░░░ required memory request missing │ + │ │ │ requests.cpu is set but requests.memory is not. The scheduler can't │ │ reserve memory accurately, so the pod is BestEffort for memory — first │ │ to evict under pressure. Add a memory request matching observed P95. │ @@ -297,6 +305,8 @@ │ │ │ CPU request below the placeholder threshold │ │ │ + │ CPU 5m ████████████░░░░░░░░░░░░ 10m below 10m sentinel │ + │ │ │ requests.cpu is 5m — below the 10m threshold most charts use as a │ │ sentinel. Probably a placeholder from a Helm scaffold. Set it to your │ │ observed P95 (or remove the limit-without-request asymmetry the scheduler │ @@ -309,6 +319,8 @@ │ │ │ Memory request set without CPU request │ │ │ + │ CPU unset ░░░░░░░░░░░░░░░░░░░░░░░░ required CPU request missing │ + │ │ │ requests.memory is set but requests.cpu is not. The scheduler can't │ │ reserve CPU, so bin-packing assumes zero — pods can stack onto a single │ │ node and starve each other. Add a CPU request matching observed P95. │ @@ -320,6 +332,8 @@ │ │ │ Memory request below the placeholder threshold │ │ │ + │ memory 16Mi ████████████░░░░░░░░░░░░ 32Mi below 32Mi sentinel │ + │ │ │ requests.memory is 16Mi — below 32 MiB. A workload that genuinely needs │ │ less memory is a rarity; most charts setting tiny memory requests are │ │ using a placeholder. Set it to your observed P95. │ diff --git a/testdata/golden/score_fixture_json.txt b/testdata/golden/score_fixture_json.txt index bf22df8..90e620d 100644 --- a/testdata/golden/score_fixture_json.txt +++ b/testdata/golden/score_fixture_json.txt @@ -510,7 +510,14 @@ "Severity": "LOW", "Confidence": "high", "Category": "cost", - "Signal": null + "Signal": { + "label": "memory", + "have": 0, + "want": 0, + "have_display": "unset", + "want_display": "required", + "note": "memory request missing" + } }, { "DetectorID": "tiny-cpu-request", @@ -521,7 +528,14 @@ "Severity": "LOW", "Confidence": "high", "Category": "cost", - "Signal": null + "Signal": { + "label": "CPU", + "have": 5, + "want": 10, + "have_display": "5m", + "want_display": "10m", + "note": "below 10m sentinel" + } }, { "DetectorID": "allow-privilege-escalation", @@ -605,7 +619,14 @@ "Severity": "MED", "Confidence": "medium", "Category": "cost", - "Signal": null + "Signal": { + "label": "replicas", + "have": 20, + "want": 25, + "have_display": "20", + "want_display": "25", + "note": "25 replicas" + } }, { "DetectorID": "oversized-cpu-limit", @@ -616,7 +637,14 @@ "Severity": "MED", "Confidence": "medium", "Category": "cost", - "Signal": null + "Signal": { + "label": "CPU", + "have": 2000, + "want": 8000, + "have_display": "2", + "want_display": "8", + "note": "limit /u003e 4 vCPU" + } }, { "DetectorID": "oversized-memory-limit", @@ -627,7 +655,14 @@ "Severity": "MED", "Confidence": "medium", "Category": "cost", - "Signal": null + "Signal": { + "label": "memory", + "have": 8589934592, + "want": 34359738368, + "have_display": "8Gi", + "want_display": "32Gi", + "note": "limit /u003e 16 GiB" + } }, { "DetectorID": "replicas-too-high", @@ -678,7 +713,14 @@ "Severity": "LOW", "Confidence": "high", "Category": "cost", - "Signal": null + "Signal": { + "label": "CPU", + "have": 0, + "want": 0, + "have_display": "unset", + "want_display": "required", + "note": "CPU request missing" + } }, { "DetectorID": "tiny-memory-request", @@ -689,7 +731,14 @@ "Severity": "LOW", "Confidence": "high", "Category": "cost", - "Signal": null + "Signal": { + "label": "memory", + "have": 16777216, + "want": 33554432, + "have_display": "16Mi", + "want_display": "32Mi", + "note": "below 32Mi sentinel" + } }, { "DetectorID": "dangerous-capability-added",