Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions src/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ function logConfigOnce(): void {
);
}

// How many user turns at the tail to keep tool-result bodies intact.
// Older toolResults beyond this window are replaced with a 1-line stub.
// This is the biggest in-session lever: a single gh-diff or browser-scrape
// otherwise re-bills itself on every subsequent agent iteration.
const FRESH_TURNS = Math.floor(Number(process.env.MAX_FRESH_TURNS || 4));
const STALE_STUB_CHARS = 200; // keep a tiny prefix for continuity

/** Rough token estimate: ~4 chars per token for text, actual usage for assistant messages */
function estimateMessageTokens(msg: AgentMessage): number {
const m = msg as Message;
Expand Down Expand Up @@ -102,8 +109,69 @@ export function getContextStats(messages: AgentMessage[]): ContextStats {
* - Extract key information: tool calls made, results, decisions, errors
* - Replace with a single compact user message containing the summary
*/
/**
* Replace tool-result bodies older than the last `FRESH_TURNS` user turns with
* a short stub. The result preserves the message structure (role, toolCallId,
* isError) so tool_use ↔ tool_result pairing remains valid, but strips the
* bulk of text content that would otherwise be re-sent to the model on every
* subsequent iteration within the same session.
*
* Idempotent: a message whose content is already the stub marker is left alone.
*/
export function pruneStaleToolResults(
messages: AgentMessage[],
freshTurns = FRESH_TURNS
): AgentMessage[] {
if (freshTurns <= 0 || messages.length === 0) return messages;

// Identify the cut index: the index of the (freshTurns)th user message from the end.
let seen = 0;
let cutIdx = 0;
for (let i = messages.length - 1; i >= 0; i--) {
if ((messages[i] as Message).role === "user") {
seen++;
if (seen === freshTurns) {
cutIdx = i;
break;
}
}
}
if (cutIdx <= 0) return messages; // nothing stale

let changed = false;
const out = messages.map((msg, idx) => {
if (idx >= cutIdx) return msg;
const m = msg as any;
if (m.role !== "toolResult") return msg;
if (!Array.isArray(m.content)) return msg;

// Compute total text length; if already tiny, leave alone.
let totalLen = 0;
for (const c of m.content) {
if (c.type === "text" && typeof c.text === "string") totalLen += c.text.length;
}
if (totalLen <= STALE_STUB_CHARS * 2) return msg; // already small

const name = m.toolName || "tool";
// Keep a short head prefix of the first text block (often contains path /
// status / counts) to preserve a breadcrumb for the model.
const firstText = m.content.find((c: any) => c.type === "text")?.text ?? "";
const head = firstText.slice(0, STALE_STUB_CHARS).replace(/\s+/g, " ").trim();
const stubText = `[${name} result — body pruned from context (${totalLen} chars). head: ${head}${head.length < firstText.length ? "…" : ""}]`;
changed = true;
return { ...m, content: [{ type: "text", text: stubText }] };
});

return changed ? out : messages;
}

export async function transformContext(messages: AgentMessage[]): Promise<AgentMessage[]> {
logConfigOnce();

// Cheap in-session pruning first — runs every turn, strips old toolResult
// bodies so a long merge/debug session doesn't re-bill huge diffs forever.
messages = pruneStaleToolResults(messages);

const totalTokens = messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);

if (totalTokens <= TOKEN_LIMIT) {
Expand Down
6 changes: 3 additions & 3 deletions src/tools/claude-subagent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { join } from "path";
import { log } from "../logger.js";
import { getAgentWeaveSession } from "../agentweave-context.js";
import { relayJobCompletionToTelegram } from "../telegram-notify.js";
import { headAndTail } from "./truncate.js";

type DelegateJobStatus = "running" | "completed" | "failed" | "timed_out";

Expand Down Expand Up @@ -82,9 +83,8 @@ export function evictOldJobs(): void {
}

export function truncateOutput(text: string): string {
if (text.length <= MAX_OUTPUT_CHARS) return text;
// Keep the tail — most recent output is most useful
return text.slice(text.length - MAX_OUTPUT_CHARS);
// Keep head (initial plan/progress) and tail (final answer/errors).
return headAndTail(text, MAX_OUTPUT_CHARS);
}

function makeCustomHeaders(headers: Record<string, string>): string {
Expand Down
38 changes: 35 additions & 3 deletions src/tools/fs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,18 @@ function resolvePath(p: string): string {
return path.join(MAX_HOME, p);
}

const MAX_READ_CHARS = 20_000;
const DEFAULT_LIMIT_LINES = 500;

export const readFileTool: AgentTool = {
name: "read_file",
label: "Read File",
description: "Read the contents of a file. Paths are relative to ~/max/ by default.",
description:
"Read the contents of a file. Paths are relative to ~/max/ by default. Large files are paginated: default returns the first 500 lines (capped at ~20K chars). Use `offset` + `limit` to page through larger files.",
parameters: Type.Object({
path: Type.String({ description: "File path (relative to ~/max/ or absolute)" }),
offset: Type.Optional(Type.Number({ description: "Starting line (0-indexed). Default: 0" })),
limit: Type.Optional(Type.Number({ description: "Max lines to return. Default: 500" })),
}),
execute: async (_id, params: any) => {
try {
Expand All @@ -28,8 +34,34 @@ export const readFileTool: AgentTool = {
details: { path: resolved, ignored: true },
};
}
const content = await readFile(resolved, "utf-8");
return { content: [{ type: "text", text: content }], details: { path: resolved, size: content.length } };
const offset = Math.max(0, Math.floor(params.offset ?? 0));
const limit = Math.max(1, Math.floor(params.limit ?? DEFAULT_LIMIT_LINES));

const full = await readFile(resolved, "utf-8");
const allLines = full.split("\n");
const totalLines = allLines.length;
const slice = allLines.slice(offset, offset + limit);
let text = slice.join("\n");
let charTruncated = false;
if (text.length > MAX_READ_CHARS) {
text = text.slice(0, MAX_READ_CHARS);
charTruncated = true;
}

const lastReturnedLine = offset + slice.length;
const notes: string[] = [];
if (lastReturnedLine < totalLines) {
notes.push(`...[${totalLines - lastReturnedLine} more lines — call with offset=${lastReturnedLine}]`);
}
if (charTruncated) {
notes.push(`...[truncated at ${MAX_READ_CHARS} chars — call with a smaller limit]`);
}
if (notes.length) text += "\n" + notes.join("\n");

return {
content: [{ type: "text", text }],
details: { path: resolved, size: full.length, totalLines, offset, returnedLines: slice.length },
};
} catch (e: any) {
return { content: [{ type: "text", text: `Error reading file: ${e.message}` }], details: { error: e.message } };
}
Expand Down
5 changes: 3 additions & 2 deletions src/tools/shell.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { Type } from "@mariozechner/pi-ai";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import { exec } from "child_process";
import { promisify } from "util";
import { headAndTail } from "./truncate.js";

const execAsync = promisify(exec);

Expand Down Expand Up @@ -31,11 +32,11 @@ export const runShell: AgentTool = {
});

const output = [stdout, stderr].filter(Boolean).join("\n").trim();
const truncated = output.length > MAX_OUTPUT ? output.slice(0, MAX_OUTPUT) + "\n...(truncated)" : output;
const truncated = headAndTail(output, MAX_OUTPUT);
return { content: [{ type: "text", text: truncated || "(no output)" }], details: { success: true } };
} catch (e: any) {
const output = [e.stdout || "", e.stderr || ""].filter(Boolean).join("\n").trim();
const truncated = output.length > MAX_OUTPUT ? output.slice(0, MAX_OUTPUT) + "\n...(truncated)" : output;
const truncated = headAndTail(output, MAX_OUTPUT);
return {
content: [{ type: "text", text: `Command failed (exit ${e.code}): ${truncated || e.message}` }],
details: { success: false, exitCode: e.code, error: e.message },
Expand Down
19 changes: 19 additions & 0 deletions src/tools/truncate.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/**
* Keep the beginning and end of a large text, dropping the middle.
*
* Error tails and success markers often live at the bottom of a shell or
* subagent output; a pure head truncation silently loses them.
*
* By default splits ~70% head / 30% tail of the total budget.
*/
export function headAndTail(text: string, max: number, headRatio = 0.7): string {
if (text.length <= max) return text;
const headLen = Math.floor(max * headRatio);
const tailLen = max - headLen;
const dropped = text.length - headLen - tailLen;
return (
text.slice(0, headLen) +
`\n...[truncated ${dropped} chars]...\n` +
text.slice(text.length - tailLen)
);
}
18 changes: 9 additions & 9 deletions tests/claude-subagent.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,19 @@ describe("output truncation", () => {
expect(truncateOutput(text)).toBe(text);
});

it("truncates to the last MAX_OUTPUT_CHARS characters when over limit", () => {
// prefix is longer than MAX_OUTPUT_CHARS — entirely dropped by truncation
const prefix = "OLD".repeat(6000); // 18000 chars — older output, should be cut
const tail = "NEW".repeat(2000); // 6000 chars — recent output, should be kept
const combined = prefix + tail;
it("keeps head and tail, drops the middle, when over limit", () => {
const head = "HEAD".repeat(3000); // 12000 chars — start of output
const middle = "MID".repeat(4000); // 12000 chars — will be dropped
const tail = "TAIL".repeat(1000); // 4000 chars — end of output
const combined = head + middle + tail;
expect(combined.length).toBeGreaterThan(MAX_OUTPUT_CHARS);

const result = truncateOutput(combined);
expect(result.length).toBe(MAX_OUTPUT_CHARS);
// The tail must be fully present at the end of the result
// Head+tail split (default 70/30) keeps ~10500 head + 4500 tail from budget 15000
expect(result.startsWith(head.slice(0, 1000))).toBe(true);
expect(result.endsWith(tail)).toBe(true);
// The very start of the result should not be the beginning of prefix
expect(result).not.toBe(combined);
expect(result).toContain("[truncated");
expect(result).not.toContain(middle);
});
});

Expand Down
118 changes: 118 additions & 0 deletions tests/context.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import { describe, it, expect } from "@jest/globals";
import { pruneStaleToolResults } from "../src/context.js";

function mkUser(text: string): any {
return { role: "user", content: [{ type: "text", text }], timestamp: Date.now() };
}
function mkAssistantToolCall(name: string, id: string): any {
return {
role: "assistant",
content: [{ type: "toolCall", name, toolCallId: id, input: {} }],
timestamp: Date.now(),
};
}
function mkToolResult(name: string, id: string, body: string, isError = false): any {
return {
role: "toolResult",
toolName: name,
toolCallId: id,
isError,
content: [{ type: "text", text: body }],
timestamp: Date.now(),
};
}

describe("pruneStaleToolResults", () => {
const heavyBody = "x".repeat(5000);
const tinyBody = "hello";

it("no-ops when under freshTurns", () => {
const msgs = [
mkUser("first"),
mkAssistantToolCall("run_shell", "t1"),
mkToolResult("run_shell", "t1", heavyBody),
mkUser("second"),
];
const out = pruneStaleToolResults(msgs, 4);
expect(out).toBe(msgs); // same reference when unchanged
});

it("prunes tool results older than the last N user turns", () => {
const old = mkToolResult("run_shell", "t1", heavyBody);
const msgs = [
mkUser("turn1"),
mkAssistantToolCall("run_shell", "t1"),
old,
mkUser("turn2"),
mkUser("turn3"),
mkAssistantToolCall("read_file", "t2"),
mkToolResult("read_file", "t2", heavyBody),
mkUser("turn4"),
mkUser("turn5"),
];
const out = pruneStaleToolResults(msgs, 2);
// Last 2 user turns = turn4 + turn5. Everything before turn4 is stale.
const firstTR = out.find((m: any) => m.role === "toolResult" && m.toolCallId === "t1") as any;
const secondTR = out.find((m: any) => m.role === "toolResult" && m.toolCallId === "t2") as any;
expect(firstTR.content[0].text).toMatch(/run_shell result — body pruned/);
expect(secondTR.content[0].text).toMatch(/read_file result — body pruned/);
// Structure preserved
expect(firstTR.toolCallId).toBe("t1");
expect(firstTR.role).toBe("toolResult");
});

it("keeps fresh tool results intact", () => {
const fresh = mkToolResult("run_shell", "t1", heavyBody);
const msgs = [
mkUser("old"),
mkUser("fresh"),
mkAssistantToolCall("run_shell", "t1"),
fresh,
];
const out = pruneStaleToolResults(msgs, 2);
const tr = out.find((m: any) => m.toolCallId === "t1") as any;
expect(tr.content[0].text).toBe(heavyBody);
});

it("leaves small tool results alone even if stale", () => {
const small = mkToolResult("gpu_status", "t1", tinyBody);
const msgs = [
mkUser("u1"),
mkAssistantToolCall("gpu_status", "t1"),
small,
mkUser("u2"),
mkUser("u3"),
];
const out = pruneStaleToolResults(msgs, 1);
const tr = out.find((m: any) => m.toolCallId === "t1") as any;
expect(tr.content[0].text).toBe(tinyBody);
});

it("is idempotent", () => {
const msgs = [
mkUser("u1"),
mkAssistantToolCall("run_shell", "t1"),
mkToolResult("run_shell", "t1", heavyBody),
mkUser("u2"),
mkUser("u3"),
];
const once = pruneStaleToolResults(msgs, 1);
const twice = pruneStaleToolResults(once, 1);
expect(twice).toBe(once); // second pass returns same reference
});

it("does not alter non-toolResult messages", () => {
const msgs = [
mkUser("u1"),
mkAssistantToolCall("run_shell", "t1"),
mkToolResult("run_shell", "t1", heavyBody),
mkUser("u2"),
mkUser("u3"),
];
const out = pruneStaleToolResults(msgs, 1);
expect(out[0]).toBe(msgs[0]);
expect(out[1]).toBe(msgs[1]);
expect(out[3]).toBe(msgs[3]);
expect(out[4]).toBe(msgs[4]);
});
});
Loading