arniesaha · arniesaha · Apr 23, 2026 · Apr 22, 2026
diff --git a/src/context.ts b/src/context.ts
@@ -35,6 +35,13 @@ function logConfigOnce(): void {
   );
 }
 
+// How many user turns at the tail to keep tool-result bodies intact.
+// Older toolResults beyond this window are replaced with a 1-line stub.
+// This is the biggest in-session lever: a single gh-diff or browser-scrape
+// otherwise re-bills itself on every subsequent agent iteration.
+const FRESH_TURNS = Math.floor(Number(process.env.MAX_FRESH_TURNS || 4));
+const STALE_STUB_CHARS = 200; // keep a tiny prefix for continuity
+
 /** Rough token estimate: ~4 chars per token for text, actual usage for assistant messages */
 function estimateMessageTokens(msg: AgentMessage): number {
   const m = msg as Message;
@@ -102,8 +109,69 @@ export function getContextStats(messages: AgentMessage[]): ContextStats {
  * - Extract key information: tool calls made, results, decisions, errors
  * - Replace with a single compact user message containing the summary
  */
+/**
+ * Replace tool-result bodies older than the last `FRESH_TURNS` user turns with
+ * a short stub. The result preserves the message structure (role, toolCallId,
+ * isError) so tool_use ↔ tool_result pairing remains valid, but strips the
+ * bulk of text content that would otherwise be re-sent to the model on every
+ * subsequent iteration within the same session.
+ *
+ * Idempotent: a message whose content is already the stub marker is left alone.
+ */
+export function pruneStaleToolResults(
+  messages: AgentMessage[],
+  freshTurns = FRESH_TURNS
+): AgentMessage[] {
+  if (freshTurns <= 0 || messages.length === 0) return messages;
+
+  // Identify the cut index: the index of the (freshTurns)th user message from the end.
+  let seen = 0;
+  let cutIdx = 0;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if ((messages[i] as Message).role === "user") {
+      seen++;
+      if (seen === freshTurns) {
+        cutIdx = i;
+        break;
+      }
+    }
+  }
+  if (cutIdx <= 0) return messages; // nothing stale
+
+  let changed = false;
+  const out = messages.map((msg, idx) => {
+    if (idx >= cutIdx) return msg;
+    const m = msg as any;
+    if (m.role !== "toolResult") return msg;
+    if (!Array.isArray(m.content)) return msg;
+
+    // Compute total text length; if already tiny, leave alone.
+    let totalLen = 0;
+    for (const c of m.content) {
+      if (c.type === "text" && typeof c.text === "string") totalLen += c.text.length;
+    }
+    if (totalLen <= STALE_STUB_CHARS * 2) return msg; // already small
+
+    const name = m.toolName || "tool";
+    // Keep a short head prefix of the first text block (often contains path /
+    // status / counts) to preserve a breadcrumb for the model.
+    const firstText = m.content.find((c: any) => c.type === "text")?.text ?? "";
+    const head = firstText.slice(0, STALE_STUB_CHARS).replace(/\s+/g, " ").trim();
+    const stubText = `[${name} result — body pruned from context (${totalLen} chars). head: ${head}${head.length < firstText.length ? "…" : ""}]`;
+    changed = true;
+    return { ...m, content: [{ type: "text", text: stubText }] };
+  });
+
+  return changed ? out : messages;
+}
+
 export async function transformContext(messages: AgentMessage[]): Promise<AgentMessage[]> {
   logConfigOnce();
+
+  // Cheap in-session pruning first — runs every turn, strips old toolResult
+  // bodies so a long merge/debug session doesn't re-bill huge diffs forever.
+  messages = pruneStaleToolResults(messages);
+
   const totalTokens = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
 
   if (totalTokens <= TOKEN_LIMIT) {

diff --git a/src/tools/claude-subagent.ts b/src/tools/claude-subagent.ts
@@ -6,6 +6,7 @@ import { join } from "path";
 import { log } from "../logger.js";
 import { getAgentWeaveSession } from "../agentweave-context.js";
 import { relayJobCompletionToTelegram } from "../telegram-notify.js";
+import { headAndTail } from "./truncate.js";
 
 type DelegateJobStatus = "running" | "completed" | "failed" | "timed_out";
 
@@ -82,9 +83,8 @@ export function evictOldJobs(): void {
 }
 
 export function truncateOutput(text: string): string {
-  if (text.length <= MAX_OUTPUT_CHARS) return text;
-  // Keep the tail — most recent output is most useful
-  return text.slice(text.length - MAX_OUTPUT_CHARS);
+  // Keep head (initial plan/progress) and tail (final answer/errors).
+  return headAndTail(text, MAX_OUTPUT_CHARS);
 }
 
 function makeCustomHeaders(headers: Record<string, string>): string {

diff --git a/src/tools/fs.ts b/src/tools/fs.ts
@@ -12,12 +12,18 @@ function resolvePath(p: string): string {
   return path.join(MAX_HOME, p);
 }
 
+const MAX_READ_CHARS = 20_000;
+const DEFAULT_LIMIT_LINES = 500;
+
 export const readFileTool: AgentTool = {
   name: "read_file",
   label: "Read File",
-  description: "Read the contents of a file. Paths are relative to ~/max/ by default.",
+  description:
+    "Read the contents of a file. Paths are relative to ~/max/ by default. Large files are paginated: default returns the first 500 lines (capped at ~20K chars). Use `offset` + `limit` to page through larger files.",
   parameters: Type.Object({
     path: Type.String({ description: "File path (relative to ~/max/ or absolute)" }),
+    offset: Type.Optional(Type.Number({ description: "Starting line (0-indexed). Default: 0" })),
+    limit: Type.Optional(Type.Number({ description: "Max lines to return. Default: 500" })),
   }),
   execute: async (_id, params: any) => {
     try {
@@ -28,8 +34,34 @@ export const readFileTool: AgentTool = {
           details: { path: resolved, ignored: true },
         };
       }
-      const content = await readFile(resolved, "utf-8");
-      return { content: [{ type: "text", text: content }], details: { path: resolved, size: content.length } };
+      const offset = Math.max(0, Math.floor(params.offset ?? 0));
+      const limit = Math.max(1, Math.floor(params.limit ?? DEFAULT_LIMIT_LINES));
+
+      const full = await readFile(resolved, "utf-8");
+      const allLines = full.split("\n");
+      const totalLines = allLines.length;
+      const slice = allLines.slice(offset, offset + limit);
+      let text = slice.join("\n");
+      let charTruncated = false;
+      if (text.length > MAX_READ_CHARS) {
+        text = text.slice(0, MAX_READ_CHARS);
+        charTruncated = true;
+      }
+
+      const lastReturnedLine = offset + slice.length;
+      const notes: string[] = [];
+      if (lastReturnedLine < totalLines) {
+        notes.push(`...[${totalLines - lastReturnedLine} more lines — call with offset=${lastReturnedLine}]`);
+      }
+      if (charTruncated) {
+        notes.push(`...[truncated at ${MAX_READ_CHARS} chars — call with a smaller limit]`);
+      }
+      if (notes.length) text += "\n" + notes.join("\n");
+
+      return {
+        content: [{ type: "text", text }],
+        details: { path: resolved, size: full.length, totalLines, offset, returnedLines: slice.length },
+      };
     } catch (e: any) {
       return { content: [{ type: "text", text: `Error reading file: ${e.message}` }], details: { error: e.message } };
     }

diff --git a/src/tools/shell.ts b/src/tools/shell.ts
@@ -2,6 +2,7 @@ import { Type } from "@mariozechner/pi-ai";
 import type { AgentTool } from "@mariozechner/pi-agent-core";
 import { exec } from "child_process";
 import { promisify } from "util";
+import { headAndTail } from "./truncate.js";
 
 const execAsync = promisify(exec);
 
@@ -31,11 +32,11 @@ export const runShell: AgentTool = {
       });
 
       const output = [stdout, stderr].filter(Boolean).join("\n").trim();
-      const truncated = output.length > MAX_OUTPUT ? output.slice(0, MAX_OUTPUT) + "\n...(truncated)" : output;
+      const truncated = headAndTail(output, MAX_OUTPUT);
       return { content: [{ type: "text", text: truncated || "(no output)" }], details: { success: true } };
     } catch (e: any) {
       const output = [e.stdout || "", e.stderr || ""].filter(Boolean).join("\n").trim();
-      const truncated = output.length > MAX_OUTPUT ? output.slice(0, MAX_OUTPUT) + "\n...(truncated)" : output;
+      const truncated = headAndTail(output, MAX_OUTPUT);
       return {
         content: [{ type: "text", text: `Command failed (exit ${e.code}): ${truncated || e.message}` }],
         details: { success: false, exitCode: e.code, error: e.message },

diff --git a/src/tools/truncate.ts b/src/tools/truncate.ts
@@ -0,0 +1,19 @@
+/**
+ * Keep the beginning and end of a large text, dropping the middle.
+ *
+ * Error tails and success markers often live at the bottom of a shell or
+ * subagent output; a pure head truncation silently loses them.
+ *
+ * By default splits ~70% head / 30% tail of the total budget.
+ */
+export function headAndTail(text: string, max: number, headRatio = 0.7): string {
+  if (text.length <= max) return text;
+  const headLen = Math.floor(max * headRatio);
+  const tailLen = max - headLen;
+  const dropped = text.length - headLen - tailLen;
+  return (
+    text.slice(0, headLen) +
+    `\n...[truncated ${dropped} chars]...\n` +
+    text.slice(text.length - tailLen)
+  );
+}
diff --git a/tests/claude-subagent.test.ts b/tests/claude-subagent.test.ts
@@ -71,19 +71,19 @@ describe("output truncation", () => {
     expect(truncateOutput(text)).toBe(text);
   });
 
-  it("truncates to the last MAX_OUTPUT_CHARS characters when over limit", () => {
-    // prefix is longer than MAX_OUTPUT_CHARS — entirely dropped by truncation
-    const prefix = "OLD".repeat(6000);   // 18000 chars — older output, should be cut
-    const tail = "NEW".repeat(2000);     // 6000 chars — recent output, should be kept
-    const combined = prefix + tail;
+  it("keeps head and tail, drops the middle, when over limit", () => {
+    const head = "HEAD".repeat(3000);    // 12000 chars — start of output
+    const middle = "MID".repeat(4000);   // 12000 chars — will be dropped
+    const tail = "TAIL".repeat(1000);    // 4000 chars — end of output
+    const combined = head + middle + tail;
     expect(combined.length).toBeGreaterThan(MAX_OUTPUT_CHARS);
 
     const result = truncateOutput(combined);
-    expect(result.length).toBe(MAX_OUTPUT_CHARS);
-    // The tail must be fully present at the end of the result
+    // Head+tail split (default 70/30) keeps ~10500 head + 4500 tail from budget 15000
+    expect(result.startsWith(head.slice(0, 1000))).toBe(true);
     expect(result.endsWith(tail)).toBe(true);
-    // The very start of the result should not be the beginning of prefix
-    expect(result).not.toBe(combined);
+    expect(result).toContain("[truncated");
+    expect(result).not.toContain(middle);
   });
 });
 

diff --git a/tests/context.test.ts b/tests/context.test.ts
@@ -0,0 +1,118 @@
+import { describe, it, expect } from "@jest/globals";
+import { pruneStaleToolResults } from "../src/context.js";
+
+function mkUser(text: string): any {
+  return { role: "user", content: [{ type: "text", text }], timestamp: Date.now() };
+}
+function mkAssistantToolCall(name: string, id: string): any {
+  return {
+    role: "assistant",
+    content: [{ type: "toolCall", name, toolCallId: id, input: {} }],
+    timestamp: Date.now(),
+  };
+}
+function mkToolResult(name: string, id: string, body: string, isError = false): any {
+  return {
+    role: "toolResult",
+    toolName: name,
+    toolCallId: id,
+    isError,
+    content: [{ type: "text", text: body }],
+    timestamp: Date.now(),
+  };
+}
+
+describe("pruneStaleToolResults", () => {
+  const heavyBody = "x".repeat(5000);
+  const tinyBody = "hello";
+
+  it("no-ops when under freshTurns", () => {
+    const msgs = [
+      mkUser("first"),
+      mkAssistantToolCall("run_shell", "t1"),
+      mkToolResult("run_shell", "t1", heavyBody),
+      mkUser("second"),
+    ];
+    const out = pruneStaleToolResults(msgs, 4);
+    expect(out).toBe(msgs); // same reference when unchanged
+  });
+
+  it("prunes tool results older than the last N user turns", () => {
+    const old = mkToolResult("run_shell", "t1", heavyBody);
+    const msgs = [
+      mkUser("turn1"),
+      mkAssistantToolCall("run_shell", "t1"),
+      old,
+      mkUser("turn2"),
+      mkUser("turn3"),
+      mkAssistantToolCall("read_file", "t2"),
+      mkToolResult("read_file", "t2", heavyBody),
+      mkUser("turn4"),
+      mkUser("turn5"),
+    ];
+    const out = pruneStaleToolResults(msgs, 2);
+    // Last 2 user turns = turn4 + turn5. Everything before turn4 is stale.
+    const firstTR = out.find((m: any) => m.role === "toolResult" && m.toolCallId === "t1") as any;
+    const secondTR = out.find((m: any) => m.role === "toolResult" && m.toolCallId === "t2") as any;
+    expect(firstTR.content[0].text).toMatch(/run_shell result — body pruned/);
+    expect(secondTR.content[0].text).toMatch(/read_file result — body pruned/);
+    // Structure preserved
+    expect(firstTR.toolCallId).toBe("t1");
+    expect(firstTR.role).toBe("toolResult");
+  });
+
+  it("keeps fresh tool results intact", () => {
+    const fresh = mkToolResult("run_shell", "t1", heavyBody);
+    const msgs = [
+      mkUser("old"),
+      mkUser("fresh"),
+      mkAssistantToolCall("run_shell", "t1"),
+      fresh,
+    ];
+    const out = pruneStaleToolResults(msgs, 2);
+    const tr = out.find((m: any) => m.toolCallId === "t1") as any;
+    expect(tr.content[0].text).toBe(heavyBody);
+  });
+
+  it("leaves small tool results alone even if stale", () => {
+    const small = mkToolResult("gpu_status", "t1", tinyBody);
+    const msgs = [
+      mkUser("u1"),
+      mkAssistantToolCall("gpu_status", "t1"),
+      small,
+      mkUser("u2"),
+      mkUser("u3"),
+    ];
+    const out = pruneStaleToolResults(msgs, 1);
+    const tr = out.find((m: any) => m.toolCallId === "t1") as any;
+    expect(tr.content[0].text).toBe(tinyBody);
+  });
+
+  it("is idempotent", () => {
+    const msgs = [
+      mkUser("u1"),
+      mkAssistantToolCall("run_shell", "t1"),
+      mkToolResult("run_shell", "t1", heavyBody),
+      mkUser("u2"),
+      mkUser("u3"),
+    ];
+    const once = pruneStaleToolResults(msgs, 1);
+    const twice = pruneStaleToolResults(once, 1);
+    expect(twice).toBe(once); // second pass returns same reference
+  });
+
+  it("does not alter non-toolResult messages", () => {
+    const msgs = [
+      mkUser("u1"),
+      mkAssistantToolCall("run_shell", "t1"),
+      mkToolResult("run_shell", "t1", heavyBody),
+      mkUser("u2"),
+      mkUser("u3"),
+    ];
+    const out = pruneStaleToolResults(msgs, 1);
+    expect(out[0]).toBe(msgs[0]);
+    expect(out[1]).toBe(msgs[1]);
+    expect(out[3]).toBe(msgs[3]);
+    expect(out[4]).toBe(msgs[4]);
+  });
+});