diff --git a/src/context.ts b/src/context.ts index a046aaa..37e5eea 100644 --- a/src/context.ts +++ b/src/context.ts @@ -35,6 +35,13 @@ function logConfigOnce(): void { ); } +// How many user turns at the tail to keep tool-result bodies intact. +// Older toolResults beyond this window are replaced with a 1-line stub. +// This is the biggest in-session lever: a single gh-diff or browser-scrape +// otherwise re-bills itself on every subsequent agent iteration. +const FRESH_TURNS = Math.floor(Number(process.env.MAX_FRESH_TURNS || 4)); +const STALE_STUB_CHARS = 200; // keep a tiny prefix for continuity + /** Rough token estimate: ~4 chars per token for text, actual usage for assistant messages */ function estimateMessageTokens(msg: AgentMessage): number { const m = msg as Message; @@ -102,8 +109,69 @@ export function getContextStats(messages: AgentMessage[]): ContextStats { * - Extract key information: tool calls made, results, decisions, errors * - Replace with a single compact user message containing the summary */ +/** + * Replace tool-result bodies older than the last `FRESH_TURNS` user turns with + * a short stub. The result preserves the message structure (role, toolCallId, + * isError) so tool_use ↔ tool_result pairing remains valid, but strips the + * bulk of text content that would otherwise be re-sent to the model on every + * subsequent iteration within the same session. + * + * Idempotent: a message whose content is already the stub marker is left alone. + */ +export function pruneStaleToolResults( + messages: AgentMessage[], + freshTurns = FRESH_TURNS +): AgentMessage[] { + if (freshTurns <= 0 || messages.length === 0) return messages; + + // Identify the cut index: the index of the (freshTurns)th user message from the end. + let seen = 0; + let cutIdx = 0; + for (let i = messages.length - 1; i >= 0; i--) { + if ((messages[i] as Message).role === "user") { + seen++; + if (seen === freshTurns) { + cutIdx = i; + break; + } + } + } + if (cutIdx <= 0) return messages; // nothing stale + + let changed = false; + const out = messages.map((msg, idx) => { + if (idx >= cutIdx) return msg; + const m = msg as any; + if (m.role !== "toolResult") return msg; + if (!Array.isArray(m.content)) return msg; + + // Compute total text length; if already tiny, leave alone. + let totalLen = 0; + for (const c of m.content) { + if (c.type === "text" && typeof c.text === "string") totalLen += c.text.length; + } + if (totalLen <= STALE_STUB_CHARS * 2) return msg; // already small + + const name = m.toolName || "tool"; + // Keep a short head prefix of the first text block (often contains path / + // status / counts) to preserve a breadcrumb for the model. + const firstText = m.content.find((c: any) => c.type === "text")?.text ?? ""; + const head = firstText.slice(0, STALE_STUB_CHARS).replace(/\s+/g, " ").trim(); + const stubText = `[${name} result — body pruned from context (${totalLen} chars). head: ${head}${head.length < firstText.length ? "…" : ""}]`; + changed = true; + return { ...m, content: [{ type: "text", text: stubText }] }; + }); + + return changed ? out : messages; +} + export async function transformContext(messages: AgentMessage[]): Promise { logConfigOnce(); + + // Cheap in-session pruning first — runs every turn, strips old toolResult + // bodies so a long merge/debug session doesn't re-bill huge diffs forever. + messages = pruneStaleToolResults(messages); + const totalTokens = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0); if (totalTokens <= TOKEN_LIMIT) { diff --git a/src/tools/claude-subagent.ts b/src/tools/claude-subagent.ts index 80b91d6..3b57624 100644 --- a/src/tools/claude-subagent.ts +++ b/src/tools/claude-subagent.ts @@ -6,6 +6,7 @@ import { join } from "path"; import { log } from "../logger.js"; import { getAgentWeaveSession } from "../agentweave-context.js"; import { relayJobCompletionToTelegram } from "../telegram-notify.js"; +import { headAndTail } from "./truncate.js"; type DelegateJobStatus = "running" | "completed" | "failed" | "timed_out"; @@ -82,9 +83,8 @@ export function evictOldJobs(): void { } export function truncateOutput(text: string): string { - if (text.length <= MAX_OUTPUT_CHARS) return text; - // Keep the tail — most recent output is most useful - return text.slice(text.length - MAX_OUTPUT_CHARS); + // Keep head (initial plan/progress) and tail (final answer/errors). + return headAndTail(text, MAX_OUTPUT_CHARS); } function makeCustomHeaders(headers: Record): string { diff --git a/src/tools/fs.ts b/src/tools/fs.ts index a9aa4eb..125af3a 100644 --- a/src/tools/fs.ts +++ b/src/tools/fs.ts @@ -12,12 +12,18 @@ function resolvePath(p: string): string { return path.join(MAX_HOME, p); } +const MAX_READ_CHARS = 20_000; +const DEFAULT_LIMIT_LINES = 500; + export const readFileTool: AgentTool = { name: "read_file", label: "Read File", - description: "Read the contents of a file. Paths are relative to ~/max/ by default.", + description: + "Read the contents of a file. Paths are relative to ~/max/ by default. Large files are paginated: default returns the first 500 lines (capped at ~20K chars). Use `offset` + `limit` to page through larger files.", parameters: Type.Object({ path: Type.String({ description: "File path (relative to ~/max/ or absolute)" }), + offset: Type.Optional(Type.Number({ description: "Starting line (0-indexed). Default: 0" })), + limit: Type.Optional(Type.Number({ description: "Max lines to return. Default: 500" })), }), execute: async (_id, params: any) => { try { @@ -28,8 +34,34 @@ export const readFileTool: AgentTool = { details: { path: resolved, ignored: true }, }; } - const content = await readFile(resolved, "utf-8"); - return { content: [{ type: "text", text: content }], details: { path: resolved, size: content.length } }; + const offset = Math.max(0, Math.floor(params.offset ?? 0)); + const limit = Math.max(1, Math.floor(params.limit ?? DEFAULT_LIMIT_LINES)); + + const full = await readFile(resolved, "utf-8"); + const allLines = full.split("\n"); + const totalLines = allLines.length; + const slice = allLines.slice(offset, offset + limit); + let text = slice.join("\n"); + let charTruncated = false; + if (text.length > MAX_READ_CHARS) { + text = text.slice(0, MAX_READ_CHARS); + charTruncated = true; + } + + const lastReturnedLine = offset + slice.length; + const notes: string[] = []; + if (lastReturnedLine < totalLines) { + notes.push(`...[${totalLines - lastReturnedLine} more lines — call with offset=${lastReturnedLine}]`); + } + if (charTruncated) { + notes.push(`...[truncated at ${MAX_READ_CHARS} chars — call with a smaller limit]`); + } + if (notes.length) text += "\n" + notes.join("\n"); + + return { + content: [{ type: "text", text }], + details: { path: resolved, size: full.length, totalLines, offset, returnedLines: slice.length }, + }; } catch (e: any) { return { content: [{ type: "text", text: `Error reading file: ${e.message}` }], details: { error: e.message } }; } diff --git a/src/tools/shell.ts b/src/tools/shell.ts index a180c4a..2e09340 100644 --- a/src/tools/shell.ts +++ b/src/tools/shell.ts @@ -2,6 +2,7 @@ import { Type } from "@mariozechner/pi-ai"; import type { AgentTool } from "@mariozechner/pi-agent-core"; import { exec } from "child_process"; import { promisify } from "util"; +import { headAndTail } from "./truncate.js"; const execAsync = promisify(exec); @@ -31,11 +32,11 @@ export const runShell: AgentTool = { }); const output = [stdout, stderr].filter(Boolean).join("\n").trim(); - const truncated = output.length > MAX_OUTPUT ? output.slice(0, MAX_OUTPUT) + "\n...(truncated)" : output; + const truncated = headAndTail(output, MAX_OUTPUT); return { content: [{ type: "text", text: truncated || "(no output)" }], details: { success: true } }; } catch (e: any) { const output = [e.stdout || "", e.stderr || ""].filter(Boolean).join("\n").trim(); - const truncated = output.length > MAX_OUTPUT ? output.slice(0, MAX_OUTPUT) + "\n...(truncated)" : output; + const truncated = headAndTail(output, MAX_OUTPUT); return { content: [{ type: "text", text: `Command failed (exit ${e.code}): ${truncated || e.message}` }], details: { success: false, exitCode: e.code, error: e.message }, diff --git a/src/tools/truncate.ts b/src/tools/truncate.ts new file mode 100644 index 0000000..34b3881 --- /dev/null +++ b/src/tools/truncate.ts @@ -0,0 +1,19 @@ +/** + * Keep the beginning and end of a large text, dropping the middle. + * + * Error tails and success markers often live at the bottom of a shell or + * subagent output; a pure head truncation silently loses them. + * + * By default splits ~70% head / 30% tail of the total budget. + */ +export function headAndTail(text: string, max: number, headRatio = 0.7): string { + if (text.length <= max) return text; + const headLen = Math.floor(max * headRatio); + const tailLen = max - headLen; + const dropped = text.length - headLen - tailLen; + return ( + text.slice(0, headLen) + + `\n...[truncated ${dropped} chars]...\n` + + text.slice(text.length - tailLen) + ); +} diff --git a/tests/claude-subagent.test.ts b/tests/claude-subagent.test.ts index ede5657..0740f78 100644 --- a/tests/claude-subagent.test.ts +++ b/tests/claude-subagent.test.ts @@ -71,19 +71,19 @@ describe("output truncation", () => { expect(truncateOutput(text)).toBe(text); }); - it("truncates to the last MAX_OUTPUT_CHARS characters when over limit", () => { - // prefix is longer than MAX_OUTPUT_CHARS — entirely dropped by truncation - const prefix = "OLD".repeat(6000); // 18000 chars — older output, should be cut - const tail = "NEW".repeat(2000); // 6000 chars — recent output, should be kept - const combined = prefix + tail; + it("keeps head and tail, drops the middle, when over limit", () => { + const head = "HEAD".repeat(3000); // 12000 chars — start of output + const middle = "MID".repeat(4000); // 12000 chars — will be dropped + const tail = "TAIL".repeat(1000); // 4000 chars — end of output + const combined = head + middle + tail; expect(combined.length).toBeGreaterThan(MAX_OUTPUT_CHARS); const result = truncateOutput(combined); - expect(result.length).toBe(MAX_OUTPUT_CHARS); - // The tail must be fully present at the end of the result + // Head+tail split (default 70/30) keeps ~10500 head + 4500 tail from budget 15000 + expect(result.startsWith(head.slice(0, 1000))).toBe(true); expect(result.endsWith(tail)).toBe(true); - // The very start of the result should not be the beginning of prefix - expect(result).not.toBe(combined); + expect(result).toContain("[truncated"); + expect(result).not.toContain(middle); }); }); diff --git a/tests/context.test.ts b/tests/context.test.ts new file mode 100644 index 0000000..2fa26cc --- /dev/null +++ b/tests/context.test.ts @@ -0,0 +1,118 @@ +import { describe, it, expect } from "@jest/globals"; +import { pruneStaleToolResults } from "../src/context.js"; + +function mkUser(text: string): any { + return { role: "user", content: [{ type: "text", text }], timestamp: Date.now() }; +} +function mkAssistantToolCall(name: string, id: string): any { + return { + role: "assistant", + content: [{ type: "toolCall", name, toolCallId: id, input: {} }], + timestamp: Date.now(), + }; +} +function mkToolResult(name: string, id: string, body: string, isError = false): any { + return { + role: "toolResult", + toolName: name, + toolCallId: id, + isError, + content: [{ type: "text", text: body }], + timestamp: Date.now(), + }; +} + +describe("pruneStaleToolResults", () => { + const heavyBody = "x".repeat(5000); + const tinyBody = "hello"; + + it("no-ops when under freshTurns", () => { + const msgs = [ + mkUser("first"), + mkAssistantToolCall("run_shell", "t1"), + mkToolResult("run_shell", "t1", heavyBody), + mkUser("second"), + ]; + const out = pruneStaleToolResults(msgs, 4); + expect(out).toBe(msgs); // same reference when unchanged + }); + + it("prunes tool results older than the last N user turns", () => { + const old = mkToolResult("run_shell", "t1", heavyBody); + const msgs = [ + mkUser("turn1"), + mkAssistantToolCall("run_shell", "t1"), + old, + mkUser("turn2"), + mkUser("turn3"), + mkAssistantToolCall("read_file", "t2"), + mkToolResult("read_file", "t2", heavyBody), + mkUser("turn4"), + mkUser("turn5"), + ]; + const out = pruneStaleToolResults(msgs, 2); + // Last 2 user turns = turn4 + turn5. Everything before turn4 is stale. + const firstTR = out.find((m: any) => m.role === "toolResult" && m.toolCallId === "t1") as any; + const secondTR = out.find((m: any) => m.role === "toolResult" && m.toolCallId === "t2") as any; + expect(firstTR.content[0].text).toMatch(/run_shell result — body pruned/); + expect(secondTR.content[0].text).toMatch(/read_file result — body pruned/); + // Structure preserved + expect(firstTR.toolCallId).toBe("t1"); + expect(firstTR.role).toBe("toolResult"); + }); + + it("keeps fresh tool results intact", () => { + const fresh = mkToolResult("run_shell", "t1", heavyBody); + const msgs = [ + mkUser("old"), + mkUser("fresh"), + mkAssistantToolCall("run_shell", "t1"), + fresh, + ]; + const out = pruneStaleToolResults(msgs, 2); + const tr = out.find((m: any) => m.toolCallId === "t1") as any; + expect(tr.content[0].text).toBe(heavyBody); + }); + + it("leaves small tool results alone even if stale", () => { + const small = mkToolResult("gpu_status", "t1", tinyBody); + const msgs = [ + mkUser("u1"), + mkAssistantToolCall("gpu_status", "t1"), + small, + mkUser("u2"), + mkUser("u3"), + ]; + const out = pruneStaleToolResults(msgs, 1); + const tr = out.find((m: any) => m.toolCallId === "t1") as any; + expect(tr.content[0].text).toBe(tinyBody); + }); + + it("is idempotent", () => { + const msgs = [ + mkUser("u1"), + mkAssistantToolCall("run_shell", "t1"), + mkToolResult("run_shell", "t1", heavyBody), + mkUser("u2"), + mkUser("u3"), + ]; + const once = pruneStaleToolResults(msgs, 1); + const twice = pruneStaleToolResults(once, 1); + expect(twice).toBe(once); // second pass returns same reference + }); + + it("does not alter non-toolResult messages", () => { + const msgs = [ + mkUser("u1"), + mkAssistantToolCall("run_shell", "t1"), + mkToolResult("run_shell", "t1", heavyBody), + mkUser("u2"), + mkUser("u3"), + ]; + const out = pruneStaleToolResults(msgs, 1); + expect(out[0]).toBe(msgs[0]); + expect(out[1]).toBe(msgs[1]); + expect(out[3]).toBe(msgs[3]); + expect(out[4]).toBe(msgs[4]); + }); +});