rili-live · zizzle6717 · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026
diff --git a/.env.example b/.env.example
@@ -5,3 +5,7 @@ GEMINI_API_KEY=
 # Optional overrides (also settable via config file / CLI flags)
 # TINY_CODE_PROVIDER=anthropic   # anthropic | gemini
 # TINY_CODE_MODEL=claude-opus-4-8
+
+# Self-improvement: reflect on sessions and propose markdown-only improvement PRs.
+# On by default; set to 0 to disable. Requires the `gh` CLI installed + authed.
+# TINY_CODE_IMPROVE=1            # 1 | 0
diff --git a/AGENTS.md b/AGENTS.md
@@ -26,3 +26,12 @@ runaway costs.
 - No business logic. This is a general-purpose tool.
 - Don't add a second state paradigm or heavy dependencies without a clear reason.
 - New deferred features go in `TODO.md` with a rationale and rough approach.
+
+## Self-improvement (`src/improve/`)
+- Proposals are markdown-only PRs (`improvements/<slug>.md`). The "never code"
+  guarantee is structural — the PR creator validates the slug, writes one file,
+  and stages exactly one explicit path (never `git add -A`). Preserve this; do
+  not loosen `src/improve/pr.ts` to stage arbitrary paths.
+- Reflection (`src/improve/reflect.ts`) must call the provider with `tools: []`
+  so it can never execute anything from a transcript.
+- Opening PRs shells out to the `gh` CLI (assumed installed + authenticated).
diff --git a/README.md b/README.md
@@ -45,6 +45,7 @@ In the REPL: type a request, watch it work. Mutating actions (writes, edits,
 shell commands) prompt for approval unless pre-approved in config.
 
 - `/help` — list commands
+- `/improve` — reflect on the session and propose an improvement PR (see below)
 - `/<name> [args]` — run a custom command (see below)
 - `/exit` — quit
 
@@ -127,6 +128,37 @@ the savings where it can:
 (see `TODO.md`), which will keep input-token counts from compounding across
 many turns without any user action.
 
+## Self-improvement
+
+tiny-code can learn from how it's used. When a session ends (or when you run
+`/improve`), it reflects on the conversation transcript looking for recurring
+friction — tool errors, repeated retries, denied permissions, missing
+capabilities. If it finds a concrete improvement, it asks for your permission to
+open a pull request.
+
+That PR contains **only a single markdown file** under `improvements/`
+describing the proposed change, targeting `main` for a maintainer to review and
+implement separately. **It never contains code changes** — this is enforced
+structurally (the PR creator only ever stages one regex-validated markdown path),
+so a prompt-injected session cannot smuggle code into a PR.
+
+PRs are opened via the [`gh` CLI](https://cli.github.com/), which must be
+installed and authenticated (`gh auth login`); the working tree must be clean.
+
+```json
+{
+  "improve": {
+    "enabled": true,
+    "baseBranch": "main",
+    "onSessionEnd": true
+  }
+}
+```
+
+The feature is **on by default**. Set `improve.enabled` to `false` (or export
+`TINY_CODE_IMPROVE=0`) to disable it entirely; set `onSessionEnd` to `false` to
+keep `/improve` but skip the automatic reflection at exit.
+
 ## Development
 
 ```bash

diff --git a/src/config/load.ts b/src/config/load.ts
@@ -27,6 +27,17 @@ export interface ResolvedConfig {
   maxIterations: number;
   commandDirs: string[];
   allow: AllowRules;
+  improve: ImproveConfig;
+}
+
+/** Settings for the self-improvement / proposal-PR feature. */
+export interface ImproveConfig {
+  /** Master switch for the whole feature (manual and automatic). */
+  enabled: boolean;
+  /** Branch PRs target. */
+  baseBranch: string;
+  /** Whether to reflect automatically when the session ends. */
+  onSessionEnd: boolean;
 }
 
 export interface CliOverrides {
@@ -56,6 +67,13 @@ const FileConfigSchema = z
         write: z.array(z.string()).optional(),
       })
       .optional(),
+    improve: z
+      .object({
+        enabled: z.boolean().optional(),
+        baseBranch: z.string().optional(),
+        onSessionEnd: z.boolean().optional(),
+      })
+      .optional(),
   })
   .strict();
 
@@ -118,5 +136,15 @@ export function loadConfig(overrides: CliOverrides = {}, cwd: string = process.c
       bash: file.allow?.bash ?? [],
       write: file.allow?.write ?? [],
     },
+    improve: {
+      enabled:
+        env.TINY_CODE_IMPROVE === '0'
+          ? false
+          : env.TINY_CODE_IMPROVE === '1'
+            ? true
+            : (file.improve?.enabled ?? true),
+      baseBranch: file.improve?.baseBranch ?? 'main',
+      onSessionEnd: file.improve?.onSessionEnd ?? true,
+    },
   };
 }
diff --git a/src/improve/pr.ts b/src/improve/pr.ts
@@ -0,0 +1,126 @@
+import { execFile } from 'node:child_process';
+import { promisify } from 'node:util';
+import { mkdirSync, writeFileSync } from 'node:fs';
+import { join, resolve, sep } from 'node:path';
+import { SLUG_RE } from './slug.js';
+
+const run = promisify(execFile);
+
+export interface CreatePrOptions {
+  cwd: string;
+  /** Already-slugified identifier (validated again here, defensively). */
+  slug: string;
+  title: string;
+  markdown: string;
+  baseBranch?: string;
+}
+
+export interface PrResult {
+  ok: boolean;
+  url?: string;
+  reason?: string;
+}
+
+const IMPROVEMENTS_DIR = 'improvements';
+
+/**
+ * Open a PR containing exactly one markdown file (`improvements/<slug>.md`).
+ *
+ * The "markdown-only, never code" guarantee is structural, not advisory:
+ *  - filenames and branch names derive solely from a regex-validated slug;
+ *  - the file is the only thing written to disk;
+ *  - staging is a single explicit path (`git add improvements/<slug>.md`),
+ *    never `git add -A`/`.`;
+ *  - the staged set is asserted to be exactly that one path before committing.
+ * The model only ever influences the file's *contents* and the PR title.
+ */
+export async function createImprovementPr(opts: CreatePrOptions): Promise<PrResult> {
+  const { cwd, slug, title, markdown } = opts;
+  const baseBranch = opts.baseBranch ?? 'main';
+
+  // Defense in depth: never trust the caller's slug.
+  if (!SLUG_RE.test(slug)) {
+    return { ok: false, reason: `Refusing unsafe slug: ${slug}` };
+  }
+
+  const relPath = `${IMPROVEMENTS_DIR}/${slug}.md`;
+  const dirAbs = join(cwd, IMPROVEMENTS_DIR);
+  const fileAbs = join(dirAbs, `${slug}.md`);
+  // Path-traversal guard (redundant with SLUG_RE, kept as a hard boundary).
+  if (resolve(fileAbs) !== fileAbs || !fileAbs.startsWith(dirAbs + sep)) {
+    return { ok: false, reason: 'Resolved path escaped the improvements directory.' };
+  }
+
+  const branch = `improve/${slug}`;
+
+  // --- Preflight: fail gracefully rather than throw into the exit path. ---
+  try {
+    await run('gh', ['--version'], { cwd });
+  } catch {
+    return { ok: false, reason: 'gh CLI not found — install and authenticate it to open improvement PRs.' };
+  }
+  try {
+    await run('git', ['rev-parse', '--is-inside-work-tree'], { cwd });
+  } catch {
+    return { ok: false, reason: 'Not inside a git repository.' };
+  }
+  try {
+    await run('gh', ['auth', 'status'], { cwd });
+  } catch {
+    return { ok: false, reason: 'gh CLI is not authenticated (run `gh auth login`).' };
+  }
+
+  const dirty = (await run('git', ['status', '--porcelain'], { cwd })).stdout.trim();
+  if (dirty.length > 0) {
+    return {
+      ok: false,
+      reason: 'Working tree has uncommitted changes — commit or stash them before proposing an improvement.',
+    };
+  }
+
+  const original = (await run('git', ['rev-parse', '--abbrev-ref', 'HEAD'], { cwd })).stdout.trim();
+
+  try {
+    await run('git', ['checkout', '-b', branch], { cwd });
+
+    mkdirSync(dirAbs, { recursive: true });
+    writeFileSync(fileAbs, markdown.endsWith('\n') ? markdown : `${markdown}\n`);
+
+    // The single, explicit staged path — never `git add -A`/`.`.
+    await run('git', ['add', relPath], { cwd });
+
+    // Belt-and-suspenders: confirm nothing else got staged.
+    const staged = (await run('git', ['diff', '--cached', '--name-only'], { cwd })).stdout
+      .split('\n')
+      .map((s) => s.trim())
+      .filter(Boolean);
+    if (staged.length !== 1 || staged[0] !== relPath) {
+      throw new Error(`Unexpected staged files: ${staged.join(', ') || '(none)'}`);
+    }
+
+    await run('git', ['commit', '-m', `docs: propose improvement — ${title}`], { cwd });
+    await run('git', ['push', '-u', 'origin', branch], { cwd });
+
+    const body =
+      `Automated improvement proposal generated by tiny-code from session usage.\n\n` +
+      `This PR intentionally contains a single markdown file under \`${IMPROVEMENTS_DIR}/\` and no code changes — ` +
+      `it is for a maintainer to review and implement separately.`;
+    const created = await run(
+      'gh',
+      ['pr', 'create', '--base', baseBranch, '--head', branch, '--title', title, '--body', body],
+      { cwd },
+    );
+    const url = created.stdout.trim().split('\n').filter(Boolean).pop();
+
+    return url ? { ok: true, url } : { ok: true };
+  } catch (err) {
+    return { ok: false, reason: (err as Error).message };
+  } finally {
+    // Best-effort restore of the user's original branch.
+    try {
+      await run('git', ['checkout', original], { cwd });
+    } catch {
+      /* leave them on the improve branch rather than masking the real result */
+    }
+  }
+}
diff --git a/src/improve/reflect.ts b/src/improve/reflect.ts
@@ -0,0 +1,95 @@
+import type { ModelProvider } from '../providers/types.js';
+import type { Message } from '../agent/types.js';
+
+/** Sentinel the model emits when a session yields nothing worth proposing. */
+export const NO_IMPROVEMENT = 'NO_IMPROVEMENT';
+
+const MAX_RESULT_CHARS = 2_000;
+const MAX_TRANSCRIPT_CHARS = 60_000;
+
+const REFLECTION_SYSTEM = `You are a contributor reviewing how the "tiny-code" CLI coding agent itself performed in the session below. You are NOT here to finish the user's coding task — you are looking for ways to improve the agent (its prompts, tools, ergonomics, or docs).
+
+Look for recurring friction: tool errors, repeated retries on the same file, denied permissions, confusion, hitting the iteration limit, or missing capabilities.
+
+If — and only if — you find a concrete, worthwhile improvement, respond with a SINGLE markdown document and nothing else, in exactly this structure:
+
+# <concise title>
+
+## Summary
+<one or two sentences>
+
+## Motivation
+<evidence drawn from this specific session>
+
+## Proposed change
+<what should change and why>
+
+## Affected areas
+<files, tools, or prompts likely involved>
+
+## Risks
+<trade-offs or things to watch>
+
+If there is no clear improvement worth filing, respond with exactly:
+${NO_IMPROVEMENT}
+
+Do not propose code. Do not include anything outside the document or the sentinel.`;
+
+/** Flatten the conversation into a compact, readable transcript for reflection. */
+export function serializeTranscript(messages: readonly Message[]): string {
+  const lines: string[] = [];
+
+  for (const message of messages) {
+    for (const block of message.content) {
+      if (block.type === 'text') {
+        if (block.text.trim().length > 0) {
+          lines.push(`[${message.role}] ${block.text.trim()}`);
+        }
+      } else if (block.type === 'tool_use') {
+        lines.push(`[tool_use] ${block.name} ${JSON.stringify(block.input ?? {})}`);
+      } else {
+        const marker = block.isError ? ' (error)' : '';
+        lines.push(`[tool_result${marker}] ${truncate(block.content, MAX_RESULT_CHARS)}`);
+      }
+    }
+  }
+
+  const transcript = lines.join('\n');
+  return transcript.length > MAX_TRANSCRIPT_CHARS
+    ? transcript.slice(transcript.length - MAX_TRANSCRIPT_CHARS)
+    : transcript;
+}
+
+export interface ReflectOptions {
+  provider: ModelProvider;
+  transcript: string;
+  cwd: string;
+}
+
+/**
+ * Run a single tool-free reflection pass. Returns the proposal markdown, or
+ * `null` when the model declines (sentinel) or produces nothing usable.
+ *
+ * No tools are passed, so this call cannot execute anything — it can only emit
+ * text, which keeps reflection safe regardless of what the transcript contains.
+ */
+export async function reflect(opts: ReflectOptions): Promise<string | null> {
+  const userText = `Working directory: ${opts.cwd}\n\nSession transcript:\n\n${opts.transcript}`;
+
+  let text = '';
+  for await (const event of opts.provider.send({
+    system: REFLECTION_SYSTEM,
+    messages: [{ role: 'user', content: [{ type: 'text', text: userText }] }],
+    tools: [],
+  })) {
+    if (event.type === 'text') text += event.delta;
+  }
+
+  const trimmed = text.trim();
+  if (trimmed.length === 0 || trimmed === NO_IMPROVEMENT) return null;
+  return trimmed;
+}
+
+function truncate(s: string, n: number): string {
+  return s.length > n ? `${s.slice(0, n)}…` : s;
+}