From c5db21498497957130305028987f4cc9b22e6d4d Mon Sep 17 00:00:00 2001 From: Ziyang Guo <121015044+RerankerGuo@users.noreply.github.com> Date: Thu, 11 Jun 2026 01:31:09 +0800 Subject: [PATCH] fix(record): persist L1 extraction failure diagnostics Signed-off-by: Ziyang Guo <121015044+RerankerGuo@users.noreply.github.com> --- src/core/record/l1-extractor.test.ts | 61 +++++++++++++++ src/core/record/l1-extractor.ts | 111 ++++++++++++++++++++++++--- 2 files changed, 162 insertions(+), 10 deletions(-) create mode 100644 src/core/record/l1-extractor.test.ts diff --git a/src/core/record/l1-extractor.test.ts b/src/core/record/l1-extractor.test.ts new file mode 100644 index 0000000..4764ef4 --- /dev/null +++ b/src/core/record/l1-extractor.test.ts @@ -0,0 +1,61 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { extractL1Memories } from "./l1-extractor.js"; +import type { ConversationMessage } from "../conversation/l0-recorder.js"; +import type { LLMRunner } from "../types.js"; + +const tempDirs: string[] = []; + +describe("extractL1Memories diagnostics", () => { + afterEach(async () => { + await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true }))); + }); + + it("persists malformed LLM extraction responses for troubleshooting", async () => { + const dataDir = await fs.mkdtemp(path.join(os.tmpdir(), "tdai-l1-diagnostics-")); + tempDirs.push(dataDir); + + const messages: ConversationMessage[] = [ + { + id: "msg_1", + role: "user", + content: "Remember that I prefer short status updates for engineering work.", + timestamp: Date.now(), + }, + { + id: "msg_2", + role: "assistant", + content: "Got it. I will keep status updates concise.", + timestamp: Date.now() + 1, + }, + ]; + const llmRunner: LLMRunner = { + async run() { + return '[{"scene_name":"work","message_ids":["msg_1"],"memories":[{"content":"broken quote}]}]'; + }, + }; + + const result = await extractL1Memories({ + messages, + sessionKey: "diagnostic-session", + sessionId: "session-1", + baseDir: dataDir, + config: {}, + options: { llmRunner }, + }); + + expect(result.success).toBe(true); + expect(result.extractedCount).toBe(0); + + const diagnosticPath = path.join(dataDir, ".metadata", "l1-extraction-failures.jsonl"); + const lines = (await fs.readFile(diagnosticPath, "utf-8")).trim().split("\n"); + expect(lines).toHaveLength(1); + const entry = JSON.parse(lines[0]!); + expect(entry.reason).toBe("parse_error"); + expect(entry.sessionKey).toBe("diagnostic-session"); + expect(entry.newMessageIds).toEqual(["msg_1", "msg_2"]); + expect(entry.rawResponse).toContain("broken quote"); + }); +}); diff --git a/src/core/record/l1-extractor.ts b/src/core/record/l1-extractor.ts index d38fd4b..c400375 100644 --- a/src/core/record/l1-extractor.ts +++ b/src/core/record/l1-extractor.ts @@ -12,6 +12,9 @@ * 4. Write to L1 JSONL files */ +import fs from "node:fs/promises"; +import path from "node:path"; +import crypto from "node:crypto"; import type { ConversationMessage } from "../conversation/l0-recorder.js"; import { EXTRACT_MEMORIES_SYSTEM_PROMPT, formatExtractionPrompt } from "../prompts/l1-extraction.js"; import { batchDedup } from "./l1-dedup.js"; @@ -43,6 +46,14 @@ interface SceneSegment { }>; } +interface ParsedExtractionResult { + scenes: SceneSegment[]; + failure?: { + reason: "empty_response" | "no_json_array" | "not_array" | "parse_error"; + message: string; + }; +} + export interface L1ExtractionResult { /** Whether extraction succeeded */ success: boolean; @@ -156,6 +167,9 @@ export async function extractL1Memories(params: { backgroundMessages, previousSceneName: options.previousSceneName, config, + baseDir, + sessionKey, + sessionId, logger, model: options.model, llmRunner: options.llmRunner, @@ -298,12 +312,15 @@ async function callLlmExtraction(params: { backgroundMessages: ConversationMessage[]; previousSceneName?: string; config: unknown; + baseDir: string; + sessionKey: string; + sessionId?: string; logger?: Logger; model?: string; /** Host-neutral LLM runner — when provided, used instead of CleanContextRunner. */ llmRunner?: LLMRunner; }): Promise { - const { newMessages, backgroundMessages, previousSceneName, config, logger, model, llmRunner } = params; + const { newMessages, backgroundMessages, previousSceneName, config, baseDir, sessionKey, sessionId, logger, model, llmRunner } = params; const userPrompt = formatExtractionPrompt({ newMessages, @@ -343,17 +360,40 @@ async function callLlmExtraction(params: { }); } - return parseExtractionResult(result, logger); + const parsed = parseExtractionResult(result, logger); + if (parsed.failure) { + await writeExtractionFailureDiagnostic({ + baseDir, + sessionKey, + sessionId, + reason: parsed.failure.reason, + message: parsed.failure.message, + rawResponse: result, + newMessages, + backgroundMessages, + model, + previousSceneName, + logger, + }); + } + + return parsed.scenes; } /** * Parse the LLM's JSON response into SceneSegment array. * Expected format: [{scene_name, message_ids, memories: [...]}] */ -function parseExtractionResult(raw: string, logger?: Logger): SceneSegment[] { +function parseExtractionResult(raw: string, logger?: Logger): ParsedExtractionResult { try { // Strip markdown code block wrappers if present let cleaned = raw.trim(); + if (!cleaned) { + const message = "LLM returned an empty extraction response"; + logger?.warn?.(`${TAG} ${message}`); + return { scenes: [], failure: { reason: "empty_response", message } }; + } + if (cleaned.startsWith("```")) { cleaned = cleaned.replace(/^```(?:json)?\s*\n?/, "").replace(/\n?```\s*$/, ""); } @@ -361,13 +401,14 @@ function parseExtractionResult(raw: string, logger?: Logger): SceneSegment[] { // Try to extract JSON array const arrayMatch = cleaned.match(/\[[\s\S]*\]/); if (!arrayMatch) { - logger?.warn?.(`${TAG} No JSON array found in extraction response`); + const message = "No JSON array found in extraction response"; + logger?.warn?.(`${TAG} ${message}`); // [l1-debug] NO_JSON — dump the full raw so we can see what the LLM actually said const rawPreview = raw.slice(0, 2048); logger?.warn?.( `${TAG} [l1-debug] NO_JSON taskId=l1-extraction, rawLen=${raw.length}, cleanedLen=${cleaned.length}, rawFull=${JSON.stringify(rawPreview)}${raw.length > 2048 ? `…(+${raw.length - 2048})` : ""}`, ); - return []; + return { scenes: [], failure: { reason: "no_json_array", message } }; } // Sanitize control characters inside JSON string literals that LLM may produce @@ -375,8 +416,9 @@ function parseExtractionResult(raw: string, logger?: Logger): SceneSegment[] { const parsed = JSON.parse(sanitized) as unknown[]; if (!Array.isArray(parsed)) { - logger?.warn?.(`${TAG} Extraction response is not an array`); - return []; + const message = "Extraction response is not an array"; + logger?.warn?.(`${TAG} ${message}`); + return { scenes: [], failure: { reason: "not_array", message } }; } const scenes: SceneSegment[] = []; @@ -401,10 +443,59 @@ function parseExtractionResult(raw: string, logger?: Logger): SceneSegment[] { }); } - return scenes; + return { scenes }; } catch (err) { - logger?.warn?.(`${TAG} Failed to parse extraction result: ${err instanceof Error ? err.message : String(err)}`); - return []; + const message = `Failed to parse extraction result: ${err instanceof Error ? err.message : String(err)}`; + logger?.warn?.(`${TAG} ${message}`); + return { scenes: [], failure: { reason: "parse_error", message } }; + } +} + +async function writeExtractionFailureDiagnostic(params: { + baseDir: string; + sessionKey: string; + sessionId?: string; + reason: ParsedExtractionResult["failure"]["reason"]; + message: string; + rawResponse: string; + newMessages: ConversationMessage[]; + backgroundMessages: ConversationMessage[]; + model?: string; + previousSceneName?: string; + logger?: Logger; +}): Promise { + const now = new Date().toISOString(); + const diagnosticsDir = path.join(params.baseDir, ".metadata"); + const filePath = path.join(diagnosticsDir, "l1-extraction-failures.jsonl"); + const rawResponseLimit = 20_000; + const rawResponseTruncated = params.rawResponse.length > rawResponseLimit; + const entry = { + id: `l1fail_${Date.now()}_${crypto.randomBytes(4).toString("hex")}`, + timestamp: now, + reason: params.reason, + message: params.message, + sessionKey: params.sessionKey, + sessionId: params.sessionId ?? "", + model: params.model ?? "", + previousSceneName: params.previousSceneName ?? "", + newMessageIds: params.newMessages.map((m) => m.id), + backgroundMessageIds: params.backgroundMessages.map((m) => m.id), + rawResponse: params.rawResponse.slice(0, rawResponseLimit), + rawResponseLength: params.rawResponse.length, + rawResponseTruncated, + }; + + try { + await fs.mkdir(diagnosticsDir, { recursive: true }); + await fs.appendFile(filePath, JSON.stringify(entry) + "\n", "utf-8"); + params.logger?.warn?.( + `${TAG} Wrote L1 extraction failure diagnostic: ${filePath} ` + + `(reason=${params.reason}, rawLen=${params.rawResponse.length})`, + ); + } catch (err) { + params.logger?.warn?.( + `${TAG} Failed to write L1 extraction failure diagnostic: ${err instanceof Error ? err.message : String(err)}`, + ); } }