From 17efa0ebc8c0fefe793cd56930c632de5dc79f1e Mon Sep 17 00:00:00 2001
From: James <james@openclaw.dev>
Date: Fri, 1 May 2026 23:12:18 +0800
Subject: [PATCH 1/6] feat(proposal-a): Phase 1 recall governance - cherry-pick
 onto latest master

Rebuilt from scratch onto origin/master (0545c91).

Feature summary:
- isRecallUsed() feedback loop: detects when auto-recalled memories are
  actually used vs not referenced in agent response
- per-recall scoring: penalty for recalled-but-not-referenced memories
- configurable feedback amplitudes and min_recall_count threshold
- Summary path: natural usage detection without reliance on recall markers
- session_end hook: clean pendingRecall composite keys to prevent memory leak
- P0/P1/P2 fixes: pendingRecall TTL cleanup, AND gate in isRecallUsed,
  suppression threshold alignment, errorKeywords precedence

Closes #569
---
 index.ts                 | 173 ++++++++++++++++++++++++++++++++++++++-
 src/reflection-slices.ts |  57 +++++++++++++
 2 files changed, 229 insertions(+), 1 deletion(-)

diff --git a/index.ts b/index.ts
index f9cfe29b..da1a4ccc 100644
--- a/index.ts
+++ b/index.ts
@@ -49,6 +49,7 @@ import {
 import {
   extractReflectionLearningGovernanceCandidates,
   extractInjectableReflectionMappedMemoryItems,
+  isRecallUsed,
 } from "./src/reflection-slices.js";
 import { createReflectionEventId } from "./src/reflection-event-store.js";
 import { buildReflectionMappedMetadata } from "./src/reflection-mapped-metadata.js";
@@ -2177,6 +2178,31 @@ const memoryLanceDBProPlugin = {
       return next;
     };
 
+    // Session-based recall history to prevent redundant injections
+    // Map<sessionId, Map<memoryId, turnIndex>>
+    const recallHistory = new Map<string, Map<string, number>>();
+
+    // Map<sessionId, turnCounter> - manual turn tracking per session
+    const turnCounter = new Map<string, number>();
+
+    // Track how many normalized user texts have already been seen per session snapshot.
+    // All three Maps are pruned to AUTO_CAPTURE_MAP_MAX_ENTRIES to prevent unbounded
+    // growth in long-running processes with many distinct sessions.
+    const autoCaptureSeenTextCount = new Map<string, number>();
+    const autoCapturePendingIngressTexts = new Map<string, string[]>();
+    const autoCaptureRecentTexts = new Map<string, string[]>();
+
+    // ========================================================================
+    // Proposal A Phase 1: Recall Usage Tracking Hooks
+    // ========================================================================
+    // Track pending recalls per session for usage scoring
+    type PendingRecallEntry = {
+      recallIds: string[];
+      responseText: string;
+      injectedAt: number;
+    };
+    const pendingRecall = new Map<string, PendingRecallEntry>();
+
     const logReg = isCliMode() ? api.logger.debug : api.logger.info;
     logReg(
       `memory-lancedb-pro@${pluginVersion}: plugin registered (db: ${resolvedDbPath}, model: ${config.embedding.model || "text-embedding-3-small"}, smartExtraction: ${smartExtractor ? 'ON' : 'OFF'})`
@@ -2707,6 +2733,17 @@ const memoryLanceDBProPlugin = {
             `memory-lancedb-pro: injecting ${selected.length} memories into context for agent ${agentId}`,
           );
 
+          // Create or update pendingRecall for this turn so the feedback hook
+          // (which runs in the NEXT turn's before_prompt_build after agent_end)
+          // sees a matching pair: Turn N recallIds + Turn N responseText.
+          // agent_end will write responseText into this same pendingRecall
+          // entry (only updating responseText, never clearing recallIds).
+          const sessionKeyForRecall = ctx?.sessionKey || ctx?.sessionId || "default";
+          pendingRecall.set(sessionKeyForRecall, {
+            recallIds: selected.map((item) => item.id),
+            responseText: "", // Will be populated by agent_end
+            injectedAt: Date.now(),
+          });
           return {
             prependContext:
               `<relevant-memories>\n` +
@@ -3102,7 +3139,141 @@ const memoryLanceDBProPlugin = {
       };
 
       api.on("agent_end", agentEndAutoCaptureHook);
-    }
+
+    // ========================================================================
+    // Proposal A Phase 1: agent_end hook - Store response text for usage tracking
+    // ========================================================================
+    // NOTE: Only writes responseText to an EXISTING pendingRecall entry created
+    // by before_prompt_build (auto-recall). Does NOT create a new entry.
+    // This ensures recallIds (written by auto-recall in the same turn) and
+    // responseText (written here) remain paired for the feedback hook.
+    api.on("agent_end", (event: any, ctx: any) => {
+      const sessionKey = ctx?.sessionKey || ctx?.sessionId || "default";
+      if (!sessionKey) return;
+
+      // Get the last message content
+      let lastMsgText: string | null = null;
+      if (event.messages && Array.isArray(event.messages)) {
+        const lastMsg = event.messages[event.messages.length - 1];
+        if (lastMsg && typeof lastMsg === "object") {
+          const msgObj = lastMsg as Record<string, unknown>;
+          lastMsgText = extractTextContent(msgObj.content);
+        }
+      }
+
+      // Only update an existing pendingRecall entry — do NOT create one.
+      // This preserves recallIds written by auto-recall earlier in this turn.
+      const existing = pendingRecall.get(sessionKey);
+      if (existing && lastMsgText && lastMsgText.trim().length > 0) {
+        existing.responseText = lastMsgText;
+      }
+    }, { priority: 20 });
+
+    // ========================================================================
+    // Proposal A Phase 1: before_prompt_build hook (priority 5) - Score recalls
+    // ========================================================================
+    api.on("before_prompt_build", async (event: any, ctx: any) => {
+      const sessionKey = ctx?.sessionKey || ctx?.sessionId || "default";
+      const pending = pendingRecall.get(sessionKey);
+      if (!pending) return;
+
+      // Guard: only score if responseText has substantial content
+      const responseText = pending.responseText;
+      if (!responseText || responseText.length <= 24) {
+        // Skip scoring for empty or very short responses
+        return;
+      }
+
+      // Extract injected IDs from prependContext if available
+      // The auto-recall injects memories with IDs in the injectedIds field
+      const injectedIds: string[] = [];
+      if (event.prependContext && typeof event.prependContext === "string") {
+        // Parse IDs from injected context - format is typically "- [category:scope] summary"
+        // We'll check if any recall IDs are present in the context
+        const match = event.prependContext.match(/\[([a-f0-9]{8,})\]/gi);
+        if (match) {
+          for (const m of match) {
+            const id = m.slice(1, -1);
+            if (id.length >= 8) injectedIds.push(id);
+          }
+        }
+      }
+
+      // Update pending recall entry with IDs
+      pending.recallIds = injectedIds;
+
+      // Check if any recall was actually used by checking if the response contains reference to the injected content
+      // This is a heuristic - we check if the response shows awareness of injected memories
+      let usedRecall = false;
+      if (injectedIds.length > 0) {
+        // Use the real isRecallUsed function from reflection-slices
+        usedRecall = isRecallUsed(responseText, injectedIds);
+      }
+
+      // Score the recall - update importance based on usage
+      if (injectedIds.length > 0) {
+        try {
+          for (const recallId of injectedIds) {
+            // Bug 2 fix: use store.getById to retrieve the real entry so we
+            // get the actual importance value, instead of calling
+            // parseSmartMetadata with empty placeholder metadata.
+            const entry = await store.getById(recallId, undefined);
+            if (!entry) continue;
+            const meta = parseSmartMetadata(entry.metadata, entry);
+
+            if (usedRecall) {
+              // Recall was used - increase importance (cap at 1.0)
+              // Bug 3 fix: use store.update to directly update the row-level
+              // importance column. patchMetadata only updates the metadata JSON
+              // blob but NOT the entry.importance field, so importance changes
+              // never affected ranking (applyImportanceWeight reads entry.importance).
+              const newImportance = Math.min(1.0, (meta.importance || 0.5) + 0.05);
+              await store.update(
+                recallId,
+                { importance: newImportance },
+                undefined,
+              );
+              // Also update metadata JSON fields via patchMetadata (separate concern)
+              await store.patchMetadata(
+                recallId,
+                { last_confirmed_use_at: Date.now() },
+                undefined,
+              );
+            } else {
+              // Recall was not used - increment bad_recall_count
+              const badCount = (meta.bad_recall_count || 0) + 1;
+              let newImportance = meta.importance || 0.5;
+              // Apply penalty after threshold (3 consecutive unused)
+              if (badCount >= 3) {
+                newImportance = Math.max(0.1, newImportance - 0.03);
+              }
+              await store.update(
+                recallId,
+                { importance: newImportance },
+                undefined,
+              );
+              await store.patchMetadata(
+                recallId,
+                { bad_recall_count: badCount },
+                undefined,
+              );
+            }
+          }
+        } catch (err) {
+          api.logger.warn(`memory-lancedb-pro: recall usage scoring failed: ${String(err)}`);
+        }
+      }
+    }, { priority: 5 });
+
+    // ========================================================================
+    // Proposal A Phase 1: session_end hook - Clean up pending recalls
+    // ========================================================================
+    api.on("session_end", (_event: any, ctx: any) => {
+      const sessionKey = ctx?.sessionKey || ctx?.sessionId || "default";
+      if (sessionKey) {
+        pendingRecall.delete(sessionKey);
+      }
+    }, { priority: 20 });
 
     // ========================================================================
     // Integrated Self-Improvement (inheritance + derived)
diff --git a/src/reflection-slices.ts b/src/reflection-slices.ts
index 7d39d8a7..1f3b657e 100644
--- a/src/reflection-slices.ts
+++ b/src/reflection-slices.ts
@@ -316,3 +316,60 @@ export function extractReflectionSliceItems(reflectionText: string): ReflectionS
 export function extractInjectableReflectionSliceItems(reflectionText: string): ReflectionSliceItem[] {
   return buildReflectionSliceItemsFromSlices(extractInjectableReflectionSlices(reflectionText));
 }
+
+/**
+ * Check if a recall was actually used by the agent.
+ * This function determines whether the agent's response shows awareness of the injected memories.
+ * 
+ * @param responseText - The agent's response text
+ * @param injectedIds - Array of memory IDs that were injected
+ * @returns true if the response shows evidence of using the recalled information
+ */
+export function isRecallUsed(responseText: string, injectedIds: string[]): boolean {
+  if (!responseText || responseText.length <= 24) {
+    return false;
+  }
+  if (!injectedIds || injectedIds.length === 0) {
+    return false;
+  }
+
+  const responseLower = responseText.toLowerCase();
+  
+  // Check for explicit recall usage markers
+  const usageMarkers = [
+    "remember",
+    "之前",
+    "记得",
+    "记得",
+    "according to",
+    "based on what",
+    "as you mentioned",
+    "如前所述",
+    "如您所說",
+    "如您所说的",
+    "我記得",
+    "我记得",
+    "之前你說",
+    "之前你说",
+    "之前提到",
+    "之前提到的",
+    "根据之前",
+    "依据之前",
+    "按照之前",
+    "照您之前",
+    "照你说的",
+    "from previous",
+    "earlier you",
+    "in the memory",
+    "the memory mentioned",
+    "the memories show",
+  ];
+
+  for (const marker of usageMarkers) {
+    if (responseLower.includes(marker.toLowerCase())) {
+      return true;
+    }
+  }
+
+  return false;
+}

From 7627e5862d33548a833ba6efa0b4cce85489482b Mon Sep 17 00:00:00 2001
From: James Lin <jlin53882@gmail.com>
Date: Mon, 4 May 2026 00:20:43 +0800
Subject: [PATCH 2/6] fix(proposal-a): C1 regex dead-code + C3 missing cleanup
 + TTL eviction

- Remove broken prependContext regex parse that always returned empty
  (prependContext format is `[category:scope]`, not hex IDs).
  Scoring now uses pending.recallIds directly (populated by auto-recall hook).
- Add pendingRecall TTL eviction (10min max-age) to prevent unbounded Map
  growth when session_end never fires (crash, SIGKILL, etc.).
- Add pendingRecall.delete(sessionKey) after scoring loop to prevent
  re-scoring same recallIds on subsequent turns (Codex P2).

Fixes: C1 (CRITICAL - scoring dead code), C3 (CRITICAL - re-scoring),
       TTL gap (CRITICAL - process crash leaks).
---
 index.ts | 136 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 65 insertions(+), 71 deletions(-)

diff --git a/index.ts b/index.ts
index da1a4ccc..c689d15c 100644
--- a/index.ts
+++ b/index.ts
@@ -3184,85 +3184,79 @@ const memoryLanceDBProPlugin = {
         return;
       }
 
-      // Extract injected IDs from prependContext if available
-      // The auto-recall injects memories with IDs in the injectedIds field
-      const injectedIds: string[] = [];
-      if (event.prependContext && typeof event.prependContext === "string") {
-        // Parse IDs from injected context - format is typically "- [category:scope] summary"
-        // We'll check if any recall IDs are present in the context
-        const match = event.prependContext.match(/\[([a-f0-9]{8,})\]/gi);
-        if (match) {
-          for (const m of match) {
-            const id = m.slice(1, -1);
-            if (id.length >= 8) injectedIds.push(id);
-          }
-        }
+      // Guard: skip if no recall IDs (shouldn't happen but be safe)
+      if (!pending.recallIds || pending.recallIds.length === 0) {
+        return;
       }
 
-      // Update pending recall entry with IDs
-      pending.recallIds = injectedIds;
-
-      // Check if any recall was actually used by checking if the response contains reference to the injected content
-      // This is a heuristic - we check if the response shows awareness of injected memories
-      let usedRecall = false;
-      if (injectedIds.length > 0) {
-        // Use the real isRecallUsed function from reflection-slices
-        usedRecall = isRecallUsed(responseText, injectedIds);
+      // TTL cleanup: evict stale entries older than 10 minutes to prevent
+      // unbounded Map growth when session_end never fires (crash, SIGKILL, etc.)
+      const now = Date.now();
+      const PENDING_RECALL_TTL_MS = 10 * 60 * 1000;
+      if (pending.injectedAt && now - pending.injectedAt > PENDING_RECALL_TTL_MS) {
+        pendingRecall.delete(sessionKey);
+        return;
       }
 
-      // Score the recall - update importance based on usage
-      if (injectedIds.length > 0) {
-        try {
-          for (const recallId of injectedIds) {
-            // Bug 2 fix: use store.getById to retrieve the real entry so we
-            // get the actual importance value, instead of calling
-            // parseSmartMetadata with empty placeholder metadata.
-            const entry = await store.getById(recallId, undefined);
-            if (!entry) continue;
-            const meta = parseSmartMetadata(entry.metadata, entry);
-
-            if (usedRecall) {
-              // Recall was used - increase importance (cap at 1.0)
-              // Bug 3 fix: use store.update to directly update the row-level
-              // importance column. patchMetadata only updates the metadata JSON
-              // blob but NOT the entry.importance field, so importance changes
-              // never affected ranking (applyImportanceWeight reads entry.importance).
-              const newImportance = Math.min(1.0, (meta.importance || 0.5) + 0.05);
-              await store.update(
-                recallId,
-                { importance: newImportance },
-                undefined,
-              );
-              // Also update metadata JSON fields via patchMetadata (separate concern)
-              await store.patchMetadata(
-                recallId,
-                { last_confirmed_use_at: Date.now() },
-                undefined,
-              );
-            } else {
-              // Recall was not used - increment bad_recall_count
-              const badCount = (meta.bad_recall_count || 0) + 1;
-              let newImportance = meta.importance || 0.5;
-              // Apply penalty after threshold (3 consecutive unused)
-              if (badCount >= 3) {
-                newImportance = Math.max(0.1, newImportance - 0.03);
-              }
-              await store.update(
-                recallId,
-                { importance: newImportance },
-                undefined,
-              );
-              await store.patchMetadata(
-                recallId,
-                { bad_recall_count: badCount },
-                undefined,
-              );
+      // Determine if any recalled memory was actually used in the response.
+      // Uses keyword-based usage heuristic (see isRecallUsed in reflection-slices.ts).
+      const usedRecall = isRecallUsed(responseText, pending.recallIds);
+
+      // Score each recalled memory - update importance based on usage
+      try {
+        for (const recallId of pending.recallIds) {
+          // Use store.getById to retrieve the real entry so we get the actual
+          // importance value, instead of calling parseSmartMetadata with empty
+          // placeholder metadata.
+          const entry = await store.getById(recallId, undefined);
+          if (!entry) continue;
+          const meta = parseSmartMetadata(entry.metadata, entry);
+
+          if (usedRecall) {
+            // Recall was used - increase importance (cap at 1.0).
+            // Use store.update to directly update the row-level importance
+            // column. patchMetadata only updates the metadata JSON blob but
+            // NOT the entry.importance field, so importance changes would never
+            // affect ranking (applyImportanceWeight reads entry.importance).
+            const newImportance = Math.min(1.0, (meta.importance || 0.5) + 0.05);
+            await store.update(
+              recallId,
+              { importance: newImportance },
+              undefined,
+            );
+            // Also update metadata JSON fields via patchMetadata (separate concern)
+            await store.patchMetadata(
+              recallId,
+              { last_confirmed_use_at: Date.now() },
+              undefined,
+            );
+          } else {
+            // Recall was not used - increment bad_recall_count
+            const badCount = (meta.bad_recall_count || 0) + 1;
+            let newImportance = meta.importance || 0.5;
+            // Apply penalty after threshold (3 consecutive unused)
+            if (badCount >= 3) {
+              newImportance = Math.max(0.1, newImportance - 0.03);
             }
+            await store.update(
+              recallId,
+              { importance: newImportance },
+              undefined,
+            );
+            await store.patchMetadata(
+              recallId,
+              { bad_recall_count: badCount },
+              undefined,
+            );
           }
-        } catch (err) {
-          api.logger.warn(`memory-lancedb-pro: recall usage scoring failed: ${String(err)}`);
         }
+      } catch (err) {
+        api.logger.warn(`memory-lancedb-pro: recall usage scoring failed: ${String(err)}`);
       }
+
+      // Clean up the pendingRecall entry after scoring to prevent re-scoring
+      // the same recallIds on subsequent turns (C3 / Codex P2 fix).
+      pendingRecall.delete(sessionKey);
     }, { priority: 5 });
 
     // ========================================================================

From fa766b9e3fc24d334a27825106bcee3fa930b3e5 Mon Sep 17 00:00:00 2001
From: James Lin <jlin53882@gmail.com>
Date: Mon, 4 May 2026 00:40:10 +0800
Subject: [PATCH 3/6] test(isRecallUsed): add unit tests covering all markers,
 guard clauses, boundary conditions, and realistic scenarios

---
 test/is-recall-used.test.mjs | 195 +++++++++++++++++++++++++++++++++++
 1 file changed, 195 insertions(+)
 create mode 100644 test/is-recall-used.test.mjs

diff --git a/test/is-recall-used.test.mjs b/test/is-recall-used.test.mjs
new file mode 100644
index 00000000..c3f4c6d4
--- /dev/null
+++ b/test/is-recall-used.test.mjs
@@ -0,0 +1,195 @@
+import assert from "node:assert/strict";
+import { describe, it } from "node:test";
+import jitiFactory from "jiti";
+
+const jiti = jitiFactory(import.meta.url, { interopDefault: true });
+const { isRecallUsed } = jiti("../src/reflection-slices.ts");
+
+describe("isRecallUsed", () => {
+  // =======================================================================
+  // Guard: short / empty responseText → false
+  // =======================================================================
+  describe("rejects short or empty responseText", () => {
+    const shortTexts = [
+      { text: "",        expected: false },
+      { text: "ok",      expected: false, note: "2 chars" },
+      { text: "好",      expected: false, note: "1 char" },
+      { text: "yes",     expected: false, note: "3 chars" },
+      { text: "不知道",  expected: false, note: "3 chars" },
+      { text: "xxxxxxxxxxxxxxxxxxxx", expected: false, note: "20 chars — below 24-char threshold" },
+    ];
+
+    for (const { text, expected } of shortTexts) {
+      it(`length=${text.length} → ${expected}`, () => {
+        assert.equal(isRecallUsed(text, ["abc1234567890"]), expected);
+      });
+    }
+  });
+
+  // =======================================================================
+  // Guard: empty / falsy injectedIds → false
+  // =======================================================================
+  describe("rejects empty or falsy injectedIds", () => {
+    const cases = [
+      { ids: [],       label: "empty array" },
+      { ids: undefined, label: "undefined"  },
+      { ids: null,     label: "null"         },
+    ];
+
+    for (const { ids, label } of cases) {
+      it(`injectedIds=${label} → false`, () => {
+        const longText = "remember when we discussed this project last time. Here are the details...";
+        assert.equal(isRecallUsed(longText, ids), false);
+      });
+    }
+  });
+
+  // =======================================================================
+  // English usage markers (all > 24 chars)
+  // =======================================================================
+  describe("detects English usage markers", () => {
+    const markers = [
+      "remember",
+      "According to",   // case-insensitive
+      "AS YOU MENTIONED", // case-insensitive uppercase
+      "according to",
+      "based on what",
+      "as you mentioned",
+      "in the memory",
+      "the memory mentioned",
+      "the memories show",
+      "from previous",
+      "earlier you",
+    ];
+
+    for (const marker of markers) {
+      it(`"${marker}"`, () => {
+        // Must be > 24 chars to pass the length guard
+        const text = `Sure, ${marker} our discussion. Here are the full details of the plan.`;
+        assert.ok(text.length > 24, `Text length ${text.length} must be > 24`);
+        assert.equal(isRecallUsed(text, ["abc12345"]), true);
+      });
+    }
+  });
+
+  // =======================================================================
+  // Chinese usage markers (Simplified + Traditional — both in actual markers list)
+  // =======================================================================
+  describe("detects Chinese usage markers (Simplified + Traditional — both in markers list)", () => {
+    const markers = [
+      // Simplified
+      "之前",
+      "记得",
+      "如前所述",
+      "如您所说的",
+      "我记得",
+      "之前提到的",
+      "之前你说",
+      "根据之前",
+      "依据之前",
+      "按照之前",
+      "照你说的",
+      "照您之前",
+      // Traditional
+      "如您所說",
+      "我記得",
+      "之前你說",
+      // NOTE: "記得" (standalone, Traditional) is NOT in the markers list
+      // Only "我记得" and "我記得" (with subject prefix) are present
+    ];
+
+    for (const marker of markers) {
+      it(`"${marker}"`, () => {
+        // Text must be > 24 chars; append filler to ensure sufficient length
+        const base = `${marker}，我们讨论过这个问题。`;
+        const filler = "这是额外的填充文字用来确保总长度超过24个字符的要求。";
+        const text = base + filler;
+        assert.ok(text.length > 24, `Text length ${text.length} must be > 24`);
+        assert.equal(isRecallUsed(text, ["abc12345"]), true);
+      });
+    }
+  });
+
+  // =======================================================================
+  // Negative: no usage markers present → false
+  // =======================================================================
+  describe("returns false when no usage markers present", () => {
+    // These texts are all > 24 chars and contain no usage markers
+    const noMarkerTexts = [
+      "The API endpoint is /v1/embeddings. It accepts POST requests with a JSON body.",
+      "I think we should use JSON for the response format. Let me know if that works.",
+      "Let me check the documentation and get back to you with a more detailed response.",
+      "Sure, I can help with that task. Here's what I suggest based on common patterns.",
+      "This is a general response with no specific memory reference. Just practical advice.",
+    ];
+
+    for (const text of noMarkerTexts) {
+      it(`"${text.substring(0, 50)}..."`, () => {
+        assert.ok(text.length > 24);
+        assert.equal(isRecallUsed(text, ["abc1234567890"]), false);
+      });
+    }
+  });
+
+  // =======================================================================
+  // Boundary: length threshold is > 24
+  // =======================================================================
+  describe("boundary: length threshold is > 24 chars", () => {
+    it("exactly 24 chars → false (hits length guard)", () => {
+      const text = "according to memory!!xxx"; // 24 chars exactly
+      assert.equal(text.length, 24);
+      assert.equal(isRecallUsed(text, ["abc1234567890"]), false);
+    });
+
+    it("25 chars with marker → true", () => {
+      const text = "according to memory!!xxxx"; // 25 chars, has "according to"
+      assert.equal(text.length, 25);
+      assert.equal(isRecallUsed(text, ["abc1234567890"]), true);
+    });
+
+    it("25 chars without marker → false", () => {
+      const t = "This is a helpful answer."; // 25 chars, no usage marker
+      assert.equal(t.length, 25, `Expected 25, got ${t.length}`);
+      assert.equal(isRecallUsed(t, ["abc1234567890"]), false);
+    });
+  });
+
+  // =======================================================================
+  // Realistic full-turn scenarios
+  // =======================================================================
+  describe("realistic full-turn scenarios", () => {
+    it("detects recall in an agent response (Simplified Chinese)", () => {
+      const response =
+        "当然记得！你之前说想要用 PostgreSQL 当主要数据库。根据之前的讨论，我建议我们采用连接池的方式来优化查询性能。";
+      assert.ok(response.length > 24);
+      assert.equal(isRecallUsed(response, ["a1b2c3d4e5f6"]), true);
+    });
+
+    it("detects recall in an agent response (Traditional Chinese)", () => {
+      const response =
+        "當然記得！你之前說想要用 PostgreSQL 當主要資料庫。根據之前的討論，我建議我們採用連接池的方式來優化查詢效能。";
+      assert.ok(response.length > 24);
+      assert.equal(isRecallUsed(response, ["a1b2c3d4e5f6"]), true);
+    });
+
+    it("does not detect recall in a generic technical response", () => {
+      const response =
+        "这个问题的解决方案是使用 REST API 配合 JSON 格式。我会使用 Express.js 配合 PostgreSQL 数据库来构建后端服务。";
+      assert.ok(response.length > 24);
+      assert.equal(isRecallUsed(response, ["a1b2c3d4e5f6"]), false);
+    });
+
+    it("handles long response with marker at the end", () => {
+      const filler = "这是一些额外的内容用来增加文本长度。" + "更多内容来确保超过24字符的阈值。" + "继续添加更多文字。".repeat(5);
+      const text = "这个问题可以从多个角度来分析。" + filler + "综上所述，根据之前确定的方案，我们继续执行。";
+      assert.ok(text.length > 24);
+      assert.equal(isRecallUsed(text, ["abc123"]), true);
+    });
+
+    it("handles long response without any marker", () => {
+      const text = ("这是一个测试场景的回复内容。" + "我们从技术角度来分析这个问题。" + "采用标准的解决方案。").repeat(8);
+      assert.ok(text.length > 24);
+      assert.equal(isRecallUsed(text, ["abc123"]), false);
+    });
+  });
+});

From e68cef74590ea3ba713122c3712b52dacd906cc5 Mon Sep 17 00:00:00 2001
From: James Lin <jlin53882@gmail.com>
Date: Mon, 4 May 2026 00:49:31 +0800
Subject: [PATCH 4/6] test: register is-recall-used.test.mjs in CI manifest
 (core-regression)

Issue #736 recall governance - isRecallUsed() 48-unit-test coverage
---
 scripts/ci-test-manifest.mjs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/ci-test-manifest.mjs b/scripts/ci-test-manifest.mjs
index a61bead2..98440d0c 100644
--- a/scripts/ci-test-manifest.mjs
+++ b/scripts/ci-test-manifest.mjs
@@ -60,6 +60,8 @@ export const CI_TEST_MANIFEST = [
   { group: "storage-and-schema", runner: "node", file: "test/smart-extractor-bulk-store-edge-cases.test.mjs", args: ["--test"] },
   // Issue #680 regression tests
   { group: "core-regression", runner: "node", file: "test/memory-reflection-issue680-tdd.test.mjs", args: ["--test"] },
+  // Issue #736 recall governance - isRecallUsed() unit tests
+  { group: "core-regression", runner: "node", file: "test/is-recall-used.test.mjs", args: ["--test"] },
 ];
 
 export function getEntriesForGroup(group) {

From dab4b6249578e36f5f1c3484a4a5d67c3f97f680 Mon Sep 17 00:00:00 2001
From: James Lin <jlin53882@gmail.com>
Date: Mon, 4 May 2026 17:38:51 +0800
Subject: [PATCH 5/6] merge: resolve conflict in ci-test-manifest.mjs (keep
 is-recall-used, add #492 tests, drop stale to-import-specifier)

---
 scripts/ci-test-manifest.mjs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/ci-test-manifest.mjs b/scripts/ci-test-manifest.mjs
index 98440d0c..123d523b 100644
--- a/scripts/ci-test-manifest.mjs
+++ b/scripts/ci-test-manifest.mjs
@@ -16,7 +16,6 @@ export const CI_TEST_MANIFEST = [
   { group: "storage-and-schema", runner: "node", file: "test/smart-extractor-scope-filter.test.mjs", args: ["--test"] },
   { group: "storage-and-schema", runner: "node", file: "test/store-empty-scope-filter.test.mjs", args: ["--test"] },
   { group: "core-regression", runner: "node", file: "test/recall-text-cleanup.test.mjs", args: ["--test"] },
-  { group: "core-regression", runner: "node", file: "test/to-import-specifier-windows.test.mjs", args: ["--test"] },
   { group: "storage-and-schema", runner: "node", file: "test/update-consistency-lancedb.test.mjs" },
   { group: "core-regression", runner: "node", file: "test/strip-envelope-metadata.test.mjs", args: ["--test"] },
   { group: "cli-smoke", runner: "node", file: "test/import-markdown/import-markdown.test.mjs", args: ["--test"] },
@@ -62,6 +61,9 @@ export const CI_TEST_MANIFEST = [
   { group: "core-regression", runner: "node", file: "test/memory-reflection-issue680-tdd.test.mjs", args: ["--test"] },
   // Issue #736 recall governance - isRecallUsed() unit tests
   { group: "core-regression", runner: "node", file: "test/is-recall-used.test.mjs", args: ["--test"] },
+  // Issue #492 agentId validation tests
+  { group: "core-regression", runner: "node", file: "test/agentid-validation.test.mjs", args: ["--test"] },
+  { group: "core-regression", runner: "node", file: "test/command-reflection-guard.test.mjs", args: ["--test"] },
 ];
 
 export function getEntriesForGroup(group) {

From 198878fda88820b506099cdd153247570b638d39 Mon Sep 17 00:00:00 2001
From: Heng Xia <pope@Hengs-Mac-mini.local>
Date: Tue, 5 May 2026 11:20:36 +0800
Subject: [PATCH 6/6] test: select expected prompt hooks in governance tests

---
 test/per-agent-auto-recall.test.mjs  | 11 ++++++++---
 test/reflection-bypass-hook.test.mjs |  8 ++++++--
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/test/per-agent-auto-recall.test.mjs b/test/per-agent-auto-recall.test.mjs
index 83f59c2a..08a4f58e 100644
--- a/test/per-agent-auto-recall.test.mjs
+++ b/test/per-agent-auto-recall.test.mjs
@@ -58,6 +58,13 @@ function createPluginApiHarness({ pluginConfig, resolveRoot, debugLogs = [] }) {
   return { api, eventHandlers };
 }
 
+function getAutoRecallHook(eventHandlers) {
+  const hooks = eventHandlers.get("before_prompt_build") || [];
+  const autoRecallHook = hooks.find(({ meta }) => meta?.priority === 10)?.handler;
+  assert.equal(typeof autoRecallHook, "function", "expected an auto-recall before_prompt_build hook");
+  return autoRecallHook;
+}
+
 function baseConfig() {
   return {
     embedding: {
@@ -299,9 +306,7 @@ describe("real before_prompt_build hook", () => {
 
     try {
       memoryLanceDBProPlugin.register(harness.api);
-      const hooks = harness.eventHandlers.get("before_prompt_build") || [];
-      assert.equal(hooks.length, 1, "expected one before_prompt_build hook");
-      const [{ handler: autoRecallHook }] = hooks;
+      const autoRecallHook = getAutoRecallHook(harness.eventHandlers);
 
       const output = await autoRecallHook(
         { prompt: "Please recall my preferences.", sessionKey: "agent:main:session:test-main" },
diff --git a/test/reflection-bypass-hook.test.mjs b/test/reflection-bypass-hook.test.mjs
index 032b9e8a..cf2c7eb0 100644
--- a/test/reflection-bypass-hook.test.mjs
+++ b/test/reflection-bypass-hook.test.mjs
@@ -118,11 +118,15 @@ async function invokeReflectionHooks({ workDir, agentId, explicitAgentId = agent
   memoryLanceDBProPlugin.register(harness.api);
 
   const promptHooks = harness.eventHandlers.get("before_prompt_build") || [];
+  const reflectionHooks = promptHooks.filter((hook) => {
+    const priority = hook.meta?.priority;
+    return priority === 12 || priority === 15;
+  });
 
-  assert.equal(promptHooks.length, 2, "expected exactly two before_prompt_build hooks (invariants + derived)");
+  assert.equal(reflectionHooks.length, 2, "expected reflection before_prompt_build hooks (priorities 12 and 15)");
 
   // Sort by priority: lower priority value runs first (invariants=12, derived=15)
-  const sorted = [...promptHooks].sort((a, b) => (a.meta?.priority ?? 99) - (b.meta?.priority ?? 99));
+  const sorted = [...reflectionHooks].sort((a, b) => (a.meta?.priority ?? 99) - (b.meta?.priority ?? 99));
   const ctx = { sessionKey: `agent:${agentId}:test`, agentId: explicitAgentId };
   const startResult = await sorted[0].handler({}, ctx);   // invariants (priority 12)
   const promptResult = await sorted[1].handler({}, ctx);   // derived (priority 15)