From 709b0f5db71bd1706a36db4ae421eac5b68d3c91 Mon Sep 17 00:00:00 2001
From: Jory Irving <jory.irving@users.noreply.github.com>
Date: Wed, 1 Jul 2026 15:22:59 -0600
Subject: [PATCH] feat(groomer): schema-constrained LLM output (json_schema)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The groomer used response_format {type:"json_object"} — valid JSON, but not
schema-conformant, so a model could emit wrong/missing fields. Switch to
{type:"json_schema"} with a JSON Schema for the groomer output, so a
llama.cpp backend (via litellm) grammar-constrains decoding to the exact
shape. This is what lets a small model (e.g. the 4B vision model) produce
reliable structure.

- buildGroomerResponseSchema() constrains structure + fixed enums, and builds
  lane.id as a dynamic enum from the configured lanes (getLaneIds) so the
  model can't emit a hallucinated lane.
- Falls back to json_object on a 400 (backend without json_schema support), so
  grooming never breaks. validateGroomerOutput still runs as the safety net and
  handles enum alias canonicalization — belt and suspenders.

Related: #498 (dispatch audit). Enables running the groomer on a small local model.
---
 src/lib/groomer/llm.test.ts |  45 +++++++++++++++-
 src/lib/groomer/llm.ts      | 103 +++++++++++++++++++++++++++++-------
 2 files changed, 128 insertions(+), 20 deletions(-)

diff --git a/src/lib/groomer/llm.test.ts b/src/lib/groomer/llm.test.ts
index 142dddee..7dae9e6a 100644
--- a/src/lib/groomer/llm.test.ts
+++ b/src/lib/groomer/llm.test.ts
@@ -1,5 +1,6 @@
 import { describe, expect, it, vi, beforeEach } from "vitest";
-import { callGroomerLLM } from "./llm";
+import { callGroomerLLM, buildGroomerResponseSchema } from "./llm";
+import { getLaneIds } from "@/lib/lane-config";
 
 describe("callGroomerLLM", () => {
   const originalFetch = global.fetch;
@@ -30,7 +31,7 @@ describe("callGroomerLLM", () => {
     capturedBody = JSON.parse((global.fetch as any).mock.calls[0][1].body);
     expect(capturedUrl).toBe("https://llm.example.com/chat/completions");
     expect(capturedBody.model).toBe("gpt-4o-mini");
-    expect(capturedBody.response_format?.type).toBe("json_object");
+    expect(capturedBody.response_format?.type).toBe("json_schema");
   });
 
   it("returns parsed JSON from LLM response", async () => {
@@ -186,3 +187,43 @@ describe("callGroomerLLM", () => {
     expect(result.lane.id).toBe("local");
   });
 });
+
+describe("buildGroomerResponseSchema", () => {
+  it("requires lane/labels, forbids extra props, and constrains lane.id to configured lanes", () => {
+    const schema = buildGroomerResponseSchema() as any;
+    expect(schema.required).toEqual(expect.arrayContaining(["labelsToAdd", "labelsToRemove", "lane"]));
+    expect(schema.additionalProperties).toBe(false);
+    expect(schema.properties.lane.required).toEqual(["id", "confidence", "reason"]);
+    const laneId = schema.properties.lane.properties.id;
+    expect(laneId.enum).toEqual(getLaneIds());
+    expect(laneId.enum.length).toBeGreaterThan(0);
+  });
+});
+
+describe("callGroomerLLM response_format", () => {
+  beforeEach(() => vi.restoreAllMocks());
+
+  const okContent = () =>
+    `{"labelsToAdd":[],"labelsToRemove":[],"lane":{"id":"${getLaneIds()[0]}","confidence":"high","reason":"r"}}`;
+
+  it("sends json_schema (name + dynamic lane enum) on the first attempt", async () => {
+    global.fetch = vi.fn().mockResolvedValue({ ok: true, json: async () => ({ choices: [{ message: { content: okContent() } }] }) });
+    await callGroomerLLM({ baseUrl: "https://llm.example.com", apiKey: "k", model: "vision", prompt: "p", timeoutMs: 1000 });
+    const body = JSON.parse((global.fetch as any).mock.calls[0][1].body);
+    expect(body.response_format.type).toBe("json_schema");
+    expect(body.response_format.json_schema.name).toBe("groomer_output");
+    expect(body.response_format.json_schema.schema.properties.lane.properties.id.enum).toEqual(getLaneIds());
+  });
+
+  it("falls back to json_object when the backend rejects json_schema (400)", async () => {
+    const fetchMock = vi.fn()
+      .mockResolvedValueOnce({ ok: false, status: 400, text: async () => "unsupported response_format" })
+      .mockResolvedValueOnce({ ok: true, status: 200, json: async () => ({ choices: [{ message: { content: okContent() } }] }) });
+    global.fetch = fetchMock as any;
+    const result = await callGroomerLLM({ baseUrl: "https://llm.example.com", apiKey: "k", model: "vision", prompt: "p", timeoutMs: 1000 });
+    expect(fetchMock).toHaveBeenCalledTimes(2);
+    expect(JSON.parse(fetchMock.mock.calls[0][1].body).response_format.type).toBe("json_schema");
+    expect(JSON.parse(fetchMock.mock.calls[1][1].body).response_format.type).toBe("json_object");
+    expect(result.lane.confidence).toBe("high");
+  });
+});
diff --git a/src/lib/groomer/llm.ts b/src/lib/groomer/llm.ts
index 22d77c27..d0f48e0a 100644
--- a/src/lib/groomer/llm.ts
+++ b/src/lib/groomer/llm.ts
@@ -1,5 +1,5 @@
 import type { GroomerOutput } from "./schema";
-import { getConfiguredLanes, getClaimableLanes, getBacklogLane } from "@/lib/lane-config";
+import { getConfiguredLanes, getClaimableLanes, getBacklogLane, getLaneIds } from "@/lib/lane-config";
 import { STATUS_LABELS, PRIORITY_LABELS } from "@/types";
 
 export interface CallLlmOptions {
@@ -61,6 +61,78 @@ Body enrichment rules:
 - Keep enriched body under 10000 characters`;
 }
 
+const CONFIDENCE_ENUM = ["high", "medium", "low"] as const;
+
+/**
+ * JSON Schema for the groomer's output, used as an OpenAI-style `json_schema`
+ * response_format. On a self-hosted llama.cpp backend (via litellm) this
+ * grammar-constrains decoding to the exact shape — the key to reliable output
+ * from a small model. `lane.id` is a dynamic enum built from the configured
+ * lanes so the model can only emit a real lane, never a hallucinated one.
+ * `validateGroomerOutput` still runs afterward as the safety net (and handles
+ * enum alias canonicalization), so this is belt-and-suspenders.
+ */
+export function buildGroomerResponseSchema(): Record<string, unknown> {
+  const laneIds = getLaneIds();
+  const confidence = { type: "string", enum: [...CONFIDENCE_ENUM] };
+  return {
+    type: "object",
+    additionalProperties: false,
+    required: ["labelsToAdd", "labelsToRemove", "lane"],
+    properties: {
+      actionability: { type: "string", enum: ["ready", "needs_info", "blocked", "backlog", "already_done"] },
+      confidence,
+      labelsToAdd: { type: "array", items: { type: "string" } },
+      labelsToRemove: { type: "array", items: { type: "string" } },
+      lane: {
+        type: "object",
+        additionalProperties: false,
+        required: ["id", "confidence", "reason"],
+        properties: {
+          id: laneIds.length > 0 ? { type: "string", enum: laneIds } : { type: "string" },
+          confidence,
+          reason: { type: "string" },
+        },
+      },
+      summary: { type: "string" },
+      githubComment: { type: "string" },
+      needsInfoReason: { type: "string" },
+      blockedReason: { type: "string" },
+      nextGroomingAction: {
+        type: "string",
+        enum: ["promote_to_ready", "escalate", "mark_not_ready", "mark_needs_info", "mark_blocked"],
+      },
+      proposedTitle: { type: "string" },
+      proposedBody: { type: "string" },
+    },
+  };
+}
+
+function postChatCompletion(
+  url: string,
+  options: CallLlmOptions,
+  responseFormat: unknown,
+  signal: AbortSignal,
+): Promise<Response> {
+  return fetch(url, {
+    method: "POST",
+    headers: {
+      Authorization: `Bearer ${options.apiKey}`,
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({
+      model: options.model,
+      messages: [
+        { role: "system", content: buildSystemPrompt() },
+        { role: "user", content: options.prompt },
+      ],
+      response_format: responseFormat,
+      temperature: 0.1,
+    }),
+    signal,
+  });
+}
+
 export async function callGroomerLLM(options: CallLlmOptions): Promise<GroomerOutput> {
   const url = `${options.baseUrl}/chat/completions`;
 
@@ -68,23 +140,18 @@ export async function callGroomerLLM(options: CallLlmOptions): Promise<GroomerOu
   const timeoutId = setTimeout(() => controller.abort(), options.timeoutMs);
 
   try {
-    const response = await fetch(url, {
-      method: "POST",
-      headers: {
-        Authorization: `Bearer ${options.apiKey}`,
-        "Content-Type": "application/json",
-      },
-      body: JSON.stringify({
-        model: options.model,
-        messages: [
-          { role: "system", content: buildSystemPrompt() },
-          { role: "user", content: options.prompt },
-        ],
-        response_format: { type: "json_object" },
-        temperature: 0.1,
-      }),
-      signal: controller.signal,
-    });
+    // Prefer schema-constrained decoding. Fall back to plain JSON mode if the
+    // backend rejects json_schema (400), so grooming never breaks on a serving
+    // stack that doesn't support it; validateGroomerOutput repairs content either way.
+    let response = await postChatCompletion(
+      url,
+      options,
+      { type: "json_schema", json_schema: { name: "groomer_output", schema: buildGroomerResponseSchema() } },
+      controller.signal,
+    );
+    if (response.status === 400) {
+      response = await postChatCompletion(url, options, { type: "json_object" }, controller.signal);
+    }
 
     if (!response.ok) {
       const text = await response.text();