diff --git a/src/lib/groomer/llm.test.ts b/src/lib/groomer/llm.test.ts index 7dae9e6a..b4545b03 100644 --- a/src/lib/groomer/llm.test.ts +++ b/src/lib/groomer/llm.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it, vi, beforeEach } from "vitest"; import { callGroomerLLM, buildGroomerResponseSchema } from "./llm"; +import { ALLOWED_GROOMER_LABELS } from "./schema"; import { getLaneIds } from "@/lib/lane-config"; describe("callGroomerLLM", () => { @@ -198,6 +199,25 @@ describe("buildGroomerResponseSchema", () => { expect(laneId.enum).toEqual(getLaneIds()); expect(laneId.enum.length).toBeGreaterThan(0); }); + + it("enum-constrains label arrays to the validator's allowlist", () => { + const schema = buildGroomerResponseSchema() as any; + expect(schema.properties.labelsToAdd.items.enum).toEqual([...ALLOWED_GROOMER_LABELS]); + expect(schema.properties.labelsToRemove.items.enum).toEqual([...ALLOWED_GROOMER_LABELS]); + // The exact failure seen in prod: a 4B inventing "type/refactor". + expect(schema.properties.labelsToAdd.items.enum).not.toContain("type/refactor"); + }); + + it("bounds proposedTitle to the validator's 10-200 chars (or null)", () => { + const schema = buildGroomerResponseSchema() as any; + const title = schema.properties.proposedTitle; + expect(title.anyOf).toEqual([ + { type: "null" }, + { type: "string", minLength: 10, maxLength: 200 }, + ]); + const body = schema.properties.proposedBody; + expect(body.anyOf).toEqual([{ type: "null" }, { type: "string", maxLength: 9999 }]); + }); }); describe("callGroomerLLM response_format", () => { diff --git a/src/lib/groomer/llm.ts b/src/lib/groomer/llm.ts index d0f48e0a..aab596fe 100644 --- a/src/lib/groomer/llm.ts +++ b/src/lib/groomer/llm.ts @@ -1,4 +1,4 @@ -import type { GroomerOutput } from "./schema"; +import { ALLOWED_GROOMER_LABELS, type GroomerOutput } from "./schema"; import { getConfiguredLanes, getClaimableLanes, getBacklogLane, getLaneIds } from "@/lib/lane-config"; import { STATUS_LABELS, PRIORITY_LABELS } from "@/types"; @@ -82,8 +82,10 @@ export function buildGroomerResponseSchema(): Record { properties: { actionability: { type: "string", enum: ["ready", "needs_info", "blocked", "backlog", "already_done"] }, confidence, - labelsToAdd: { type: "array", items: { type: "string" } }, - labelsToRemove: { type: "array", items: { type: "string" } }, + // Enum-constrained to the validator's allowlist so the model cannot + // invent labels (a 4B happily emits "type/refactor" otherwise). + labelsToAdd: { type: "array", items: { type: "string", enum: [...ALLOWED_GROOMER_LABELS] } }, + labelsToRemove: { type: "array", items: { type: "string", enum: [...ALLOWED_GROOMER_LABELS] } }, lane: { type: "object", additionalProperties: false, @@ -102,8 +104,10 @@ export function buildGroomerResponseSchema(): Record { type: "string", enum: ["promote_to_ready", "escalate", "mark_not_ready", "mark_needs_info", "mark_blocked"], }, - proposedTitle: { type: "string" }, - proposedBody: { type: "string" }, + // The validator requires 10-200 chars (or omitted/null). Without the + // bounds in the grammar a small model emits "" instead of omitting. + proposedTitle: { anyOf: [{ type: "null" }, { type: "string", minLength: 10, maxLength: 200 }] }, + proposedBody: { anyOf: [{ type: "null" }, { type: "string", maxLength: 9999 }] }, }, }; } diff --git a/src/lib/groomer/schema.ts b/src/lib/groomer/schema.ts index c3e92e6b..013fd0dd 100644 --- a/src/lib/groomer/schema.ts +++ b/src/lib/groomer/schema.ts @@ -33,11 +33,20 @@ export interface ValidationResult { const validTypeLabels = ["type/bug", "type/feature", "type/chore", "type/research", "type/security"]; +/** + * Every label the groomer may add or remove. Exported so the LLM response + * schema (`buildGroomerResponseSchema`) can enum-constrain label output to + * exactly this set — a grammar-constrained small model then cannot invent + * labels (e.g. "type/refactor") that this validator would reject. + */ +export const ALLOWED_GROOMER_LABELS: readonly string[] = [ + ...STATUS_LABELS, + ...PRIORITY_LABELS, + ...validTypeLabels, +]; + function isAllowedLabel(label: string): boolean { - if (STATUS_LABELS.some((s) => s === label)) return true; - if (PRIORITY_LABELS.some((s) => s === label)) return true; - if (validTypeLabels.includes(label)) return true; - return false; + return ALLOWED_GROOMER_LABELS.includes(label); } function validateLabelList(list: unknown[], kind: string): string[] {