From 5cd100521c137c9b00e0d736ea79fbc0650338fe Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 8 Jun 2026 01:32:00 +0000 Subject: [PATCH] Capture token-savings as core mission; automate usage visibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - README: replace tagline with token-efficiency focus; add "Token efficiency" section documenting what's automated (per-turn display, bounded tool output, concise prompts, effort control). - AGENTS.md: add "Token minimalism" section making automated savings a first-class design principle. - TODO.md: promote conversation compaction to top of backlog with a clear rationale (input tokens compound every turn); cross-reference getUsage() hook for threshold tracking. - AgentLoop: accumulate session-level token totals; expose via getUsage() so callers can act on them without manual tracking. - render.ts: display per-turn token counts automatically (↑ in ↓ out) — no configuration needed. - repl.ts: print cumulative session token total on exit. - systemPrompt: instruct the agent to be concise and prefer targeted reads; every output token has a cost. - Tests: cover getUsage() accumulation across single and multi-turn runs, and onUsage() rendering. https://claude.ai/code/session_017GpDkpWRcYKmmuQAkZM5y4 --- AGENTS.md | 14 ++++++++++++++ README.md | 39 +++++++++++++++++++++++++++++++++++---- TODO.md | 18 ++++++++++++++++-- src/agent/loop.ts | 10 ++++++++++ src/agent/systemPrompt.ts | 4 +++- src/repl.ts | 9 ++++++++- src/ui/render.ts | 8 ++++++-- tests/agent/loop.test.ts | 25 +++++++++++++++++++++++++ tests/ui/render.test.ts | 9 +++++++++ 9 files changed, 126 insertions(+), 10 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 02f652d..2b900ff 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -8,6 +8,20 @@ This file is loaded into the agent's system prompt when it runs in this repo. - Add unit tests for new tools, providers, and loop behavior (Vitest). - Run `npm run lint && npm run typecheck && npm test` before considering a change done. +## Token minimalism +Keeping token counts low is a core design concern, not an afterthought. Savings +should be automatic — the user should not need custom configuration to avoid +runaway costs. + +- Keep the system prompt short. Tool descriptions are generated from Zod schemas; + don't add redundant prose. +- Tool output must be bounded. Always cap result sets (grep matches, glob hits, + file lines). Prefer targeted reads over full-file slurps. +- Surface usage automatically. Token counts appear after every turn and as a + session total on exit; no opt-in required. +- Prefer features that reduce tokens structurally (output caps, compaction) over + features that merely expose knobs for users to tune manually. + ## Boundaries - No business logic. This is a general-purpose tool. - Don't add a second state paradigm or heavy dependencies without a clear reason. diff --git a/README.md b/README.md index 0f8c0ab..0aca72d 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,11 @@ # tiny-code -A small, extensible CLI coding agent. Interactive terminal REPL, interchangeable -**Anthropic** and **Gemini** models, and just the core features you actually use: -read/write/edit files, run shell commands, search code, and a custom -commands/skills system. No business logic baked in. +A small, extensible CLI coding agent built around one constraint: **keep token +usage low**. As coding-agent costs climb, tiny-code automates the savings so +you don't have to. Interactive terminal REPL, interchangeable **Anthropic** and +**Gemini** models, and just the core features you actually use: read/write/edit +files, run shell commands, search code, and a custom commands/skills system. +No business logic baked in. > Status: early (v0.x). Published as `@therr/tiny-code`; the binary is > `tiny-code`. Names may change before the first npm publish. @@ -96,6 +98,35 @@ CLI flags. `allow` pre-approves mutating actions so they skip the confirmation prompt: `bash` matches command prefixes, `write` matches path globs for write/edit. +## Token efficiency + +Minimizing token usage is a first-class goal — coding-agent bills grow fast, +and you shouldn't need a complex setup to control them. tiny-code automates +the savings where it can: + +- **Usage visible by default.** Every assistant turn prints `↑ in ↓ out tokens` + with no configuration. On exit you get a session total. +- **Bounded tool output.** grep caps at 200 matches, glob at 500 files, and + `read_file` supports `offset`/`limit` to pull only the lines you need — + preventing runaway context growth automatically. +- **Minimal system prompt.** The built-in persona is kept short. Tool schemas + are generated from Zod (no duplicate prose). Project context is opt-in. +- **Concise agent instructions.** The agent is explicitly told to avoid + restating the task or narrating completed steps. +- **Effort control.** For Anthropic models, `effort` tunes the adaptive + thinking budget. Drop it from the default `"high"` to `"medium"` or `"low"` + for simpler, cheaper tasks: + + ```json + { "effort": "medium" } + ``` + + Or set it per-session with `TINY_CODE_EFFORT=medium`. + +**Coming:** automatic conversation compaction once histories grow long +(see `TODO.md`), which will keep input-token counts from compounding across +many turns without any user action. + ## Development ```bash diff --git a/TODO.md b/TODO.md index b35c72c..4de30a4 100644 --- a/TODO.md +++ b/TODO.md @@ -3,6 +3,20 @@ Deferred features, roughly in priority order. Each entry notes the rationale and a rough approach so it can be picked up later. +Token efficiency is a first-class goal. Features that reduce token usage +automatically (without user configuration) are prioritized over features that +add capability at higher cost. + +## Conversation compaction +Input tokens compound quickly in long sessions because the full message history +is resent every turn. Compaction trims the history automatically once it grows +past a threshold, keeping costs from ballooning without any user action. +**Approach:** track cumulative `inputTokens` from `AgentLoop.getUsage()`; when +it crosses a configurable threshold (e.g. 50k), summarize earlier messages into +a single condensed block. For Anthropic use the compaction beta; for Gemini +summarize via a lightweight call to a cheap model. Pair with conversation +persistence so compacted sessions can be resumed. + ## Sub-agents Spawn isolated agent runs for parallel exploration/research (like a lightweight Explore/Plan agent). **Approach:** a `spawn_agent` tool whose `execute` constructs @@ -34,8 +48,8 @@ exits; permission gate falls back to allowlist-only (no prompts). ## Conversation persistence / resume Save/restore `AgentLoop.getMessages()` to disk; `--resume` to continue a session. -Pair with token-budget-aware compaction once histories get long (Anthropic -compaction beta; manual summarization for Gemini). +Pair with the compaction feature above so resumed sessions don't carry a bloated +history. ## ripgrep-backed grep The `grep` tool currently walks the tree in JS. **Approach:** detect `rg` on diff --git a/src/agent/loop.ts b/src/agent/loop.ts index e482fa2..623ba0b 100644 --- a/src/agent/loop.ts +++ b/src/agent/loop.ts @@ -4,6 +4,8 @@ import type { PermissionGate } from '../permissions/gate.js'; import type { ToolResult } from '../tools/types.js'; import type { Message, ToolResultBlock, ToolUseBlock } from './types.js'; +export type { Usage }; + /** Sink for everything the loop wants to surface. The REPL provides the real one. */ export interface AgentUI { onText(delta: string): void; @@ -39,6 +41,7 @@ export class AgentLoop { private readonly cwd: string; private readonly maxIterations: number; private readonly messages: Message[] = []; + private sessionUsage: Usage = { inputTokens: 0, outputTokens: 0 }; constructor(opts: AgentLoopOptions) { this.provider = opts.provider; @@ -55,6 +58,11 @@ export class AgentLoop { return this.messages; } + /** Cumulative token usage across all turns in this session. */ + getUsage(): Usage { + return { ...this.sessionUsage }; + } + /** Run one user turn to completion (through any number of tool round-trips). */ async run(userInput: string): Promise { this.messages.push({ role: 'user', content: [{ type: 'text', text: userInput }] }); @@ -75,6 +83,8 @@ export class AgentLoop { } else if (event.type === 'tool_call') { toolCalls.push({ type: 'tool_use', id: event.id, name: event.name, input: event.input }); } else { + this.sessionUsage.inputTokens += event.usage.inputTokens; + this.sessionUsage.outputTokens += event.usage.outputTokens; this.ui.onUsage(event.usage); } } diff --git a/src/agent/systemPrompt.ts b/src/agent/systemPrompt.ts index 1ae17d0..2713379 100644 --- a/src/agent/systemPrompt.ts +++ b/src/agent/systemPrompt.ts @@ -13,7 +13,9 @@ Guidelines: - Read files before editing them. Prefer small, targeted edits. - When you run commands, explain briefly what you are doing only if it is non-obvious. - Match the conventions of the surrounding code. -- When the task is complete, give a short summary of what changed. Do not narrate routine steps.`; +- When the task is complete, give a short summary of what changed. Do not narrate routine steps. +- Be concise. Avoid restating the task or narrating completed steps. Every output token has a cost. +- Prefer targeted reads (offset/limit) and filtered searches (glob patterns) over reading entire files or scanning broadly.`; /** Compose the system prompt from persona, environment, tools, and project context. */ export function buildSystemPrompt(params: SystemPromptParams): string { diff --git a/src/repl.ts b/src/repl.ts index c5ce62f..3e9990e 100644 --- a/src/repl.ts +++ b/src/repl.ts @@ -111,7 +111,14 @@ export async function startRepl(overrides: CliOverrides): Promise { }; rl.on('close', () => { - console.log(pc.dim('\nBye.')); + const usage = agent.getUsage(); + if (usage.inputTokens > 0 || usage.outputTokens > 0) { + const fmtN = (n: number) => n.toLocaleString('en-US'); + console.log( + pc.dim(`\nSession: ↑ ${fmtN(usage.inputTokens)} ↓ ${fmtN(usage.outputTokens)} tokens total`), + ); + } + console.log(pc.dim('Bye.')); process.exit(0); }); diff --git a/src/ui/render.ts b/src/ui/render.ts index 1e86808..1364ddd 100644 --- a/src/ui/render.ts +++ b/src/ui/render.ts @@ -10,6 +10,10 @@ function preview(name: string, input: unknown): string { return JSON.stringify(obj); } +function fmtN(n: number): string { + return n.toLocaleString('en-US'); +} + function truncate(s: string, n: number): string { const oneLine = s.replace(/\s*\n\s*/g, ' ').trim(); return oneLine.length > n ? `${oneLine.slice(0, n)}…` : oneLine; @@ -46,8 +50,8 @@ export function createTerminalUI(): AgentUI { ensureNewline(); write(pc.yellow(` ⊘ ${name} denied\n`)); }, - onUsage() { - // Token usage is available here; kept silent to reduce noise in the MVP. + onUsage(usage) { + write(pc.dim(` ↑ ${fmtN(usage.inputTokens)} ↓ ${fmtN(usage.outputTokens)} tokens\n`)); }, onAssistantEnd() { ensureNewline(); diff --git a/tests/agent/loop.test.ts b/tests/agent/loop.test.ts index da380d5..fc043a9 100644 --- a/tests/agent/loop.test.ts +++ b/tests/agent/loop.test.ts @@ -165,6 +165,31 @@ describe('AgentLoop', () => { } }); + it('accumulates token usage across a single turn', async () => { + const provider = new ScriptedProvider([ + [ + { type: 'text', delta: 'hi' }, + { type: 'done', usage: { inputTokens: 10, outputTokens: 5 }, stopReason: 'end_turn' }, + ], + ]); + const { ui } = recordingUI(); + const loop = makeLoop(provider, ui, gateWith('yes')); + await loop.run('hello'); + expect(loop.getUsage()).toEqual({ inputTokens: 10, outputTokens: 5 }); + }); + + it('accumulates token usage across multiple run() calls', async () => { + const provider = new ScriptedProvider([ + [{ type: 'done', usage: { inputTokens: 10, outputTokens: 5 }, stopReason: 'end_turn' }], + [{ type: 'done', usage: { inputTokens: 20, outputTokens: 8 }, stopReason: 'end_turn' }], + ]); + const { ui } = recordingUI(); + const loop = makeLoop(provider, ui, gateWith('yes')); + await loop.run('first'); + await loop.run('second'); + expect(loop.getUsage()).toEqual({ inputTokens: 30, outputTokens: 13 }); + }); + it('stops at the iteration guard when tools never stop', async () => { const looping: ProviderEvent[][] = []; for (let i = 0; i < 10; i += 1) { diff --git a/tests/ui/render.test.ts b/tests/ui/render.test.ts index 3160e9a..f443bb1 100644 --- a/tests/ui/render.test.ts +++ b/tests/ui/render.test.ts @@ -39,6 +39,15 @@ describe('createTerminalUI', () => { expect(out).toContain('max iterations'); }); + it('displays token usage per turn', () => { + const out = capture(() => { + const ui = createTerminalUI(); + ui.onUsage({ inputTokens: 1234, outputTokens: 567 }); + }); + expect(out).toContain('1,234'); + expect(out).toContain('567'); + }); + it('previews path- and pattern-based tools', () => { const out = capture(() => { const ui = createTerminalUI();