diff --git a/.env.example b/.env.example
index 23d4820..fb759a3 100644
--- a/.env.example
+++ b/.env.example
@@ -5,3 +5,7 @@ GEMINI_API_KEY=
 # Optional overrides (also settable via config file / CLI flags)
 # TINY_CODE_PROVIDER=anthropic   # anthropic | gemini
 # TINY_CODE_MODEL=claude-opus-4-8
+
+# Self-improvement: reflect on sessions and propose markdown-only improvement PRs.
+# On by default; set to 0 to disable. Requires the `gh` CLI installed + authed.
+# TINY_CODE_IMPROVE=1            # 1 | 0
diff --git a/AGENTS.md b/AGENTS.md
index 2b900ff..05ae519 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -26,3 +26,12 @@ runaway costs.
 - No business logic. This is a general-purpose tool.
 - Don't add a second state paradigm or heavy dependencies without a clear reason.
 - New deferred features go in `TODO.md` with a rationale and rough approach.
+
+## Self-improvement (`src/improve/`)
+- Proposals are markdown-only PRs (`improvements/<slug>.md`). The "never code"
+  guarantee is structural — the PR creator validates the slug, writes one file,
+  and stages exactly one explicit path (never `git add -A`). Preserve this; do
+  not loosen `src/improve/pr.ts` to stage arbitrary paths.
+- Reflection (`src/improve/reflect.ts`) must call the provider with `tools: []`
+  so it can never execute anything from a transcript.
+- Opening PRs shells out to the `gh` CLI (assumed installed + authenticated).
diff --git a/README.md b/README.md
index 0aca72d..9d1363f 100644
--- a/README.md
+++ b/README.md
@@ -45,6 +45,7 @@ In the REPL: type a request, watch it work. Mutating actions (writes, edits,
 shell commands) prompt for approval unless pre-approved in config.
 
 - `/help` — list commands
+- `/improve` — reflect on the session and propose an improvement PR (see below)
 - `/<name> [args]` — run a custom command (see below)
 - `/exit` — quit
 
@@ -127,6 +128,37 @@ the savings where it can:
 (see `TODO.md`), which will keep input-token counts from compounding across
 many turns without any user action.
 
+## Self-improvement
+
+tiny-code can learn from how it's used. When a session ends (or when you run
+`/improve`), it reflects on the conversation transcript looking for recurring
+friction — tool errors, repeated retries, denied permissions, missing
+capabilities. If it finds a concrete improvement, it asks for your permission to
+open a pull request.
+
+That PR contains **only a single markdown file** under `improvements/`
+describing the proposed change, targeting `main` for a maintainer to review and
+implement separately. **It never contains code changes** — this is enforced
+structurally (the PR creator only ever stages one regex-validated markdown path),
+so a prompt-injected session cannot smuggle code into a PR.
+
+PRs are opened via the [`gh` CLI](https://cli.github.com/), which must be
+installed and authenticated (`gh auth login`); the working tree must be clean.
+
+```json
+{
+  "improve": {
+    "enabled": true,
+    "baseBranch": "main",
+    "onSessionEnd": true
+  }
+}
+```
+
+The feature is **on by default**. Set `improve.enabled` to `false` (or export
+`TINY_CODE_IMPROVE=0`) to disable it entirely; set `onSessionEnd` to `false` to
+keep `/improve` but skip the automatic reflection at exit.
+
 ## Development
 
 ```bash
diff --git a/src/config/load.ts b/src/config/load.ts
index f71112b..d2ce512 100644
--- a/src/config/load.ts
+++ b/src/config/load.ts
@@ -27,6 +27,17 @@ export interface ResolvedConfig {
   maxIterations: number;
   commandDirs: string[];
   allow: AllowRules;
+  improve: ImproveConfig;
+}
+
+/** Settings for the self-improvement / proposal-PR feature. */
+export interface ImproveConfig {
+  /** Master switch for the whole feature (manual and automatic). */
+  enabled: boolean;
+  /** Branch PRs target. */
+  baseBranch: string;
+  /** Whether to reflect automatically when the session ends. */
+  onSessionEnd: boolean;
 }
 
 export interface CliOverrides {
@@ -56,6 +67,13 @@ const FileConfigSchema = z
         write: z.array(z.string()).optional(),
       })
       .optional(),
+    improve: z
+      .object({
+        enabled: z.boolean().optional(),
+        baseBranch: z.string().optional(),
+        onSessionEnd: z.boolean().optional(),
+      })
+      .optional(),
   })
   .strict();
 
@@ -118,5 +136,15 @@ export function loadConfig(overrides: CliOverrides = {}, cwd: string = process.c
       bash: file.allow?.bash ?? [],
       write: file.allow?.write ?? [],
     },
+    improve: {
+      enabled:
+        env.TINY_CODE_IMPROVE === '0'
+          ? false
+          : env.TINY_CODE_IMPROVE === '1'
+            ? true
+            : (file.improve?.enabled ?? true),
+      baseBranch: file.improve?.baseBranch ?? 'main',
+      onSessionEnd: file.improve?.onSessionEnd ?? true,
+    },
   };
 }
diff --git a/src/improve/pr.ts b/src/improve/pr.ts
new file mode 100644
index 0000000..698bf17
--- /dev/null
+++ b/src/improve/pr.ts
@@ -0,0 +1,126 @@
+import { execFile } from 'node:child_process';
+import { promisify } from 'node:util';
+import { mkdirSync, writeFileSync } from 'node:fs';
+import { join, resolve, sep } from 'node:path';
+import { SLUG_RE } from './slug.js';
+
+const run = promisify(execFile);
+
+export interface CreatePrOptions {
+  cwd: string;
+  /** Already-slugified identifier (validated again here, defensively). */
+  slug: string;
+  title: string;
+  markdown: string;
+  baseBranch?: string;
+}
+
+export interface PrResult {
+  ok: boolean;
+  url?: string;
+  reason?: string;
+}
+
+const IMPROVEMENTS_DIR = 'improvements';
+
+/**
+ * Open a PR containing exactly one markdown file (`improvements/<slug>.md`).
+ *
+ * The "markdown-only, never code" guarantee is structural, not advisory:
+ *  - filenames and branch names derive solely from a regex-validated slug;
+ *  - the file is the only thing written to disk;
+ *  - staging is a single explicit path (`git add improvements/<slug>.md`),
+ *    never `git add -A`/`.`;
+ *  - the staged set is asserted to be exactly that one path before committing.
+ * The model only ever influences the file's *contents* and the PR title.
+ */
+export async function createImprovementPr(opts: CreatePrOptions): Promise<PrResult> {
+  const { cwd, slug, title, markdown } = opts;
+  const baseBranch = opts.baseBranch ?? 'main';
+
+  // Defense in depth: never trust the caller's slug.
+  if (!SLUG_RE.test(slug)) {
+    return { ok: false, reason: `Refusing unsafe slug: ${slug}` };
+  }
+
+  const relPath = `${IMPROVEMENTS_DIR}/${slug}.md`;
+  const dirAbs = join(cwd, IMPROVEMENTS_DIR);
+  const fileAbs = join(dirAbs, `${slug}.md`);
+  // Path-traversal guard (redundant with SLUG_RE, kept as a hard boundary).
+  if (resolve(fileAbs) !== fileAbs || !fileAbs.startsWith(dirAbs + sep)) {
+    return { ok: false, reason: 'Resolved path escaped the improvements directory.' };
+  }
+
+  const branch = `improve/${slug}`;
+
+  // --- Preflight: fail gracefully rather than throw into the exit path. ---
+  try {
+    await run('gh', ['--version'], { cwd });
+  } catch {
+    return { ok: false, reason: 'gh CLI not found — install and authenticate it to open improvement PRs.' };
+  }
+  try {
+    await run('git', ['rev-parse', '--is-inside-work-tree'], { cwd });
+  } catch {
+    return { ok: false, reason: 'Not inside a git repository.' };
+  }
+  try {
+    await run('gh', ['auth', 'status'], { cwd });
+  } catch {
+    return { ok: false, reason: 'gh CLI is not authenticated (run `gh auth login`).' };
+  }
+
+  const dirty = (await run('git', ['status', '--porcelain'], { cwd })).stdout.trim();
+  if (dirty.length > 0) {
+    return {
+      ok: false,
+      reason: 'Working tree has uncommitted changes — commit or stash them before proposing an improvement.',
+    };
+  }
+
+  const original = (await run('git', ['rev-parse', '--abbrev-ref', 'HEAD'], { cwd })).stdout.trim();
+
+  try {
+    await run('git', ['checkout', '-b', branch], { cwd });
+
+    mkdirSync(dirAbs, { recursive: true });
+    writeFileSync(fileAbs, markdown.endsWith('\n') ? markdown : `${markdown}\n`);
+
+    // The single, explicit staged path — never `git add -A`/`.`.
+    await run('git', ['add', relPath], { cwd });
+
+    // Belt-and-suspenders: confirm nothing else got staged.
+    const staged = (await run('git', ['diff', '--cached', '--name-only'], { cwd })).stdout
+      .split('\n')
+      .map((s) => s.trim())
+      .filter(Boolean);
+    if (staged.length !== 1 || staged[0] !== relPath) {
+      throw new Error(`Unexpected staged files: ${staged.join(', ') || '(none)'}`);
+    }
+
+    await run('git', ['commit', '-m', `docs: propose improvement — ${title}`], { cwd });
+    await run('git', ['push', '-u', 'origin', branch], { cwd });
+
+    const body =
+      `Automated improvement proposal generated by tiny-code from session usage.\n\n` +
+      `This PR intentionally contains a single markdown file under \`${IMPROVEMENTS_DIR}/\` and no code changes — ` +
+      `it is for a maintainer to review and implement separately.`;
+    const created = await run(
+      'gh',
+      ['pr', 'create', '--base', baseBranch, '--head', branch, '--title', title, '--body', body],
+      { cwd },
+    );
+    const url = created.stdout.trim().split('\n').filter(Boolean).pop();
+
+    return url ? { ok: true, url } : { ok: true };
+  } catch (err) {
+    return { ok: false, reason: (err as Error).message };
+  } finally {
+    // Best-effort restore of the user's original branch.
+    try {
+      await run('git', ['checkout', original], { cwd });
+    } catch {
+      /* leave them on the improve branch rather than masking the real result */
+    }
+  }
+}
diff --git a/src/improve/reflect.ts b/src/improve/reflect.ts
new file mode 100644
index 0000000..bf8a1a6
--- /dev/null
+++ b/src/improve/reflect.ts
@@ -0,0 +1,95 @@
+import type { ModelProvider } from '../providers/types.js';
+import type { Message } from '../agent/types.js';
+
+/** Sentinel the model emits when a session yields nothing worth proposing. */
+export const NO_IMPROVEMENT = 'NO_IMPROVEMENT';
+
+const MAX_RESULT_CHARS = 2_000;
+const MAX_TRANSCRIPT_CHARS = 60_000;
+
+const REFLECTION_SYSTEM = `You are a contributor reviewing how the "tiny-code" CLI coding agent itself performed in the session below. You are NOT here to finish the user's coding task — you are looking for ways to improve the agent (its prompts, tools, ergonomics, or docs).
+
+Look for recurring friction: tool errors, repeated retries on the same file, denied permissions, confusion, hitting the iteration limit, or missing capabilities.
+
+If — and only if — you find a concrete, worthwhile improvement, respond with a SINGLE markdown document and nothing else, in exactly this structure:
+
+# <concise title>
+
+## Summary
+<one or two sentences>
+
+## Motivation
+<evidence drawn from this specific session>
+
+## Proposed change
+<what should change and why>
+
+## Affected areas
+<files, tools, or prompts likely involved>
+
+## Risks
+<trade-offs or things to watch>
+
+If there is no clear improvement worth filing, respond with exactly:
+${NO_IMPROVEMENT}
+
+Do not propose code. Do not include anything outside the document or the sentinel.`;
+
+/** Flatten the conversation into a compact, readable transcript for reflection. */
+export function serializeTranscript(messages: readonly Message[]): string {
+  const lines: string[] = [];
+
+  for (const message of messages) {
+    for (const block of message.content) {
+      if (block.type === 'text') {
+        if (block.text.trim().length > 0) {
+          lines.push(`[${message.role}] ${block.text.trim()}`);
+        }
+      } else if (block.type === 'tool_use') {
+        lines.push(`[tool_use] ${block.name} ${JSON.stringify(block.input ?? {})}`);
+      } else {
+        const marker = block.isError ? ' (error)' : '';
+        lines.push(`[tool_result${marker}] ${truncate(block.content, MAX_RESULT_CHARS)}`);
+      }
+    }
+  }
+
+  const transcript = lines.join('\n');
+  return transcript.length > MAX_TRANSCRIPT_CHARS
+    ? transcript.slice(transcript.length - MAX_TRANSCRIPT_CHARS)
+    : transcript;
+}
+
+export interface ReflectOptions {
+  provider: ModelProvider;
+  transcript: string;
+  cwd: string;
+}
+
+/**
+ * Run a single tool-free reflection pass. Returns the proposal markdown, or
+ * `null` when the model declines (sentinel) or produces nothing usable.
+ *
+ * No tools are passed, so this call cannot execute anything — it can only emit
+ * text, which keeps reflection safe regardless of what the transcript contains.
+ */
+export async function reflect(opts: ReflectOptions): Promise<string | null> {
+  const userText = `Working directory: ${opts.cwd}\n\nSession transcript:\n\n${opts.transcript}`;
+
+  let text = '';
+  for await (const event of opts.provider.send({
+    system: REFLECTION_SYSTEM,
+    messages: [{ role: 'user', content: [{ type: 'text', text: userText }] }],
+    tools: [],
+  })) {
+    if (event.type === 'text') text += event.delta;
+  }
+
+  const trimmed = text.trim();
+  if (trimmed.length === 0 || trimmed === NO_IMPROVEMENT) return null;
+  return trimmed;
+}
+
+function truncate(s: string, n: number): string {
+  return s.length > n ? `${s.slice(0, n)}…` : s;
+}
diff --git a/src/improve/run.ts b/src/improve/run.ts
new file mode 100644
index 0000000..65d97c8
--- /dev/null
+++ b/src/improve/run.ts
@@ -0,0 +1,66 @@
+import type { ModelProvider } from '../providers/types.js';
+import type { Message } from '../agent/types.js';
+import { reflect, serializeTranscript } from './reflect.js';
+import { slugify } from './slug.js';
+import { createImprovementPr } from './pr.js';
+
+export interface RunImprovementOptions {
+  provider: ModelProvider;
+  messages: readonly Message[];
+  cwd: string;
+  baseBranch: string;
+  /** Surface a line of status to the user. */
+  log: (line: string) => void;
+  /** Ask the user to approve opening a PR; returns true to proceed. */
+  confirm: (title: string) => Promise<boolean>;
+}
+
+/** First `# ` heading in the markdown, used as the PR title and slug seed. */
+function extractTitle(markdown: string): string {
+  const match = markdown.match(/^#\s+(.+)$/m);
+  return match && match[1] ? match[1].trim() : 'tiny-code improvement';
+}
+
+/**
+ * End-to-end improvement flow: reflect on the session, and if there's a
+ * proposal, ask the user before opening a markdown-only PR. Never throws — it
+ * is safe to call from the REPL's exit path.
+ */
+export async function runImprovement(opts: RunImprovementOptions): Promise<void> {
+  try {
+    const transcript = serializeTranscript(opts.messages);
+    if (transcript.trim().length === 0) {
+      opts.log('No session activity to reflect on.');
+      return;
+    }
+
+    const proposal = await reflect({ provider: opts.provider, transcript, cwd: opts.cwd });
+    if (!proposal) {
+      opts.log('No improvements suggested for this session.');
+      return;
+    }
+
+    const title = extractTitle(proposal);
+    const approved = await opts.confirm(title);
+    if (!approved) {
+      opts.log('Skipped — no PR created.');
+      return;
+    }
+
+    const result = await createImprovementPr({
+      cwd: opts.cwd,
+      slug: slugify(title),
+      title,
+      markdown: proposal,
+      baseBranch: opts.baseBranch,
+    });
+
+    if (result.ok) {
+      opts.log(`Opened improvement PR${result.url ? `: ${result.url}` : '.'}`);
+    } else {
+      opts.log(`Could not open PR: ${result.reason ?? 'unknown error'}`);
+    }
+  } catch (err) {
+    opts.log(`Improvement step failed: ${(err as Error).message}`);
+  }
+}
diff --git a/src/improve/slug.ts b/src/improve/slug.ts
new file mode 100644
index 0000000..8b8f86e
--- /dev/null
+++ b/src/improve/slug.ts
@@ -0,0 +1,34 @@
+/**
+ * Security-critical filename derivation for improvement proposals.
+ *
+ * The PR creator only ever writes/stages a path built from this slug, so the
+ * slug pattern is the single source of truth that keeps an (possibly injected)
+ * model from influencing anything beyond a single markdown file's contents.
+ */
+
+/** A slug is lowercase alphanumerics joined by single dashes — no `/`, no `.`. */
+export const SLUG_RE = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
+
+const MAX_BASE_LENGTH = 50;
+
+/**
+ * Turn an arbitrary title into a safe, unique slug guaranteed to match
+ * {@link SLUG_RE}. Falls back to `improvement-<ts>` when the title yields
+ * nothing usable (e.g. all punctuation).
+ */
+export function slugify(title: string): string {
+  const suffix = Date.now().toString(36);
+
+  const base = title
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '')
+    .slice(0, MAX_BASE_LENGTH)
+    .replace(/^-+|-+$/g, '');
+
+  const slug = base.length > 0 ? `${base}-${suffix}` : `improvement-${suffix}`;
+
+  // The construction above should always satisfy SLUG_RE, but assert rather
+  // than trust it — this value becomes a filename and a branch name.
+  return SLUG_RE.test(slug) ? slug : `improvement-${suffix}`;
+}
diff --git a/src/repl.ts b/src/repl.ts
index 3e9990e..f660036 100644
--- a/src/repl.ts
+++ b/src/repl.ts
@@ -12,10 +12,12 @@ import { loadProjectContext } from './config/context.js';
 import { buildSystemPrompt } from './agent/systemPrompt.js';
 import { loadCommands, renderCommand } from './commands/loader.js';
 import type { Command } from './commands/types.js';
+import { runImprovement } from './improve/run.js';
 
 function printHelp(commands: Map<string, Command>): void {
   console.log(pc.bold('\nBuilt-in:'));
   console.log('  /help            Show this help');
+  console.log('  /improve         Reflect on this session and propose an improvement PR');
   console.log('  /exit, /quit     Leave the session');
   if (commands.size > 0) {
     console.log(pc.bold('\nCustom commands:'));
@@ -60,6 +62,44 @@ export async function startRepl(overrides: CliOverrides): Promise<void> {
     maxIterations: config.maxIterations,
   });
 
+  // Tracks the transcript length at the last reflection, so the auto-trigger on
+  // exit doesn't re-run when nothing happened since a manual /improve.
+  let lastImprovedAt = 0;
+
+  const confirmPr = (title: string): Promise<boolean> =>
+    new Promise((resolve) => {
+      const label = pc.yellow('\nOpen a PR with this improvement?');
+      rl.question(`${label} ${pc.dim(title)} [y/N] `, (answer) => {
+        resolve(/^y(es)?$/i.test(answer.trim()));
+      });
+    });
+
+  const improve = async (): Promise<void> => {
+    lastImprovedAt = agent.getMessages().length;
+    await runImprovement({
+      provider,
+      messages: agent.getMessages(),
+      cwd,
+      baseBranch: config.improve.baseBranch,
+      log: (line) => console.log(pc.dim(line)),
+      confirm: confirmPr,
+    });
+  };
+
+  // Auto-reflect when leaving via /exit or /quit — runs while readline is still
+  // open so the confirmation prompt works. Skipped if nothing happened since the
+  // last manual /improve.
+  const maybeAutoImprove = async (): Promise<void> => {
+    if (
+      config.improve.enabled &&
+      config.improve.onSessionEnd &&
+      agent.getMessages().length > lastImprovedAt
+    ) {
+      console.log(pc.dim('\nReflecting on this session…'));
+      await improve();
+    }
+  };
+
   console.log(
     pc.bold('tiny-code') + pc.dim(` · ${provider.name}:${provider.model} · ${cwd}`),
   );
@@ -75,6 +115,7 @@ export async function startRepl(overrides: CliOverrides): Promise<void> {
       return;
     }
     if (input === '/exit' || input === '/quit') {
+      await maybeAutoImprove();
       rl.close();
       return;
     }
@@ -83,6 +124,15 @@ export async function startRepl(overrides: CliOverrides): Promise<void> {
       ask();
       return;
     }
+    if (input === '/improve') {
+      if (config.improve.enabled) {
+        await improve();
+      } else {
+        console.log(pc.dim('Self-improvement is disabled in config.'));
+      }
+      ask();
+      return;
+    }
 
     let userMessage = input;
     if (input.startsWith('/')) {
diff --git a/tests/config/load.test.ts b/tests/config/load.test.ts
index f1a5829..2b73592 100644
--- a/tests/config/load.test.ts
+++ b/tests/config/load.test.ts
@@ -11,6 +11,7 @@ const ENV_KEYS = [
   'TINY_CODE_MODEL',
   'TINY_CODE_MAX_TOKENS',
   'TINY_CODE_EFFORT',
+  'TINY_CODE_IMPROVE',
   'HOME',
 ];
 
@@ -76,6 +77,34 @@ describe('loadConfig', () => {
     expect(cfg.allow.write).toEqual(['src/**']);
   });
 
+  it('enables self-improvement by default', () => {
+    const cfg = loadConfig({}, cwd);
+    expect(cfg.improve.enabled).toBe(true);
+    expect(cfg.improve.baseBranch).toBe('main');
+    expect(cfg.improve.onSessionEnd).toBe(true);
+  });
+
+  it('lets TINY_CODE_IMPROVE=0 disable the feature over a config file', async () => {
+    await writeFile(
+      join(cwd, 'tiny-code.config.json'),
+      JSON.stringify({ improve: { enabled: true } }),
+    );
+    process.env.TINY_CODE_IMPROVE = '0';
+    const cfg = loadConfig({}, cwd);
+    expect(cfg.improve.enabled).toBe(false);
+  });
+
+  it('reads improve settings from a config file', async () => {
+    await writeFile(
+      join(cwd, 'tiny-code.config.json'),
+      JSON.stringify({ improve: { enabled: false, baseBranch: 'develop', onSessionEnd: false } }),
+    );
+    const cfg = loadConfig({}, cwd);
+    expect(cfg.improve.enabled).toBe(false);
+    expect(cfg.improve.baseBranch).toBe('develop');
+    expect(cfg.improve.onSessionEnd).toBe(false);
+  });
+
   it('lets env override the config file model', async () => {
     await writeFile(
       join(cwd, 'tiny-code.config.json'),
diff --git a/tests/improve/pr.test.ts b/tests/improve/pr.test.ts
new file mode 100644
index 0000000..fb8f2f0
--- /dev/null
+++ b/tests/improve/pr.test.ts
@@ -0,0 +1,142 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import { mkdtemp, rm } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+
+interface Call {
+  cmd: string;
+  args: string[];
+}
+
+// Shared mock state, reset per test.
+const calls: Call[] = [];
+let handler: (cmd: string, args: string[]) => { stdout: string; stderr: string };
+
+vi.mock('node:child_process', () => {
+  const execFile = function execFile(): void {
+    /* unused — only the promisified custom path is exercised */
+  };
+  // promisify(execFile) returns this custom function, resolving to {stdout,stderr}.
+  (execFile as unknown as Record<symbol, unknown>)[
+    Symbol.for('nodejs.util.promisify.custom')
+  ] = (cmd: string, args: string[]) => {
+    calls.push({ cmd, args });
+    return Promise.resolve(handler(cmd, args));
+  };
+  return { execFile };
+});
+
+// Imported after the mock is registered.
+const { createImprovementPr } = await import('../../src/improve/pr.js');
+
+/** A handler simulating a clean repo with an authed gh, tracking staged files. */
+function happyHandler() {
+  const staged: string[] = [];
+  return (cmd: string, args: string[]) => {
+    if (cmd === 'git' && args[0] === 'add') staged.push(args[1] ?? '');
+    if (cmd === 'git' && args.includes('--cached') && args.includes('--name-only')) {
+      return { stdout: staged.join('\n'), stderr: '' };
+    }
+    if (cmd === 'git' && args[0] === 'rev-parse' && args.includes('--abbrev-ref')) {
+      return { stdout: 'work-branch', stderr: '' };
+    }
+    if (cmd === 'git' && args[0] === 'status') return { stdout: '', stderr: '' };
+    if (cmd === 'gh' && args[0] === 'pr') {
+      return { stdout: 'https://github.com/o/r/pull/7', stderr: '' };
+    }
+    return { stdout: '', stderr: '' };
+  };
+}
+
+let cwd: string;
+
+beforeEach(async () => {
+  cwd = await mkdtemp(join(tmpdir(), 'tiny-code-pr-'));
+  calls.length = 0;
+  handler = happyHandler();
+});
+
+afterEach(async () => {
+  await rm(cwd, { recursive: true, force: true });
+});
+
+describe('createImprovementPr', () => {
+  it('opens a PR staging only the single markdown path', async () => {
+    const result = await createImprovementPr({
+      cwd,
+      slug: 'better-grep-abc',
+      title: 'Better grep',
+      markdown: '# Better grep\n',
+    });
+
+    expect(result.ok).toBe(true);
+    expect(result.url).toBe('https://github.com/o/r/pull/7');
+
+    const adds = calls.filter((c) => c.cmd === 'git' && c.args[0] === 'add');
+    expect(adds).toHaveLength(1);
+    expect(adds[0]?.args).toEqual(['add', 'improvements/better-grep-abc.md']);
+  });
+
+  it('never stages with -A or .', async () => {
+    await createImprovementPr({ cwd, slug: 'x-1', title: 'X', markdown: '# X' });
+    for (const c of calls) {
+      if (c.cmd === 'git' && c.args[0] === 'add') {
+        expect(c.args).not.toContain('-A');
+        expect(c.args).not.toContain('.');
+      }
+    }
+  });
+
+  it('passes title to gh as a discrete argument (no shell interpolation)', async () => {
+    const evil = 'X"; rm -rf / #';
+    await createImprovementPr({ cwd, slug: 'x-2', title: evil, markdown: '# X' });
+    const prCall = calls.find((c) => c.cmd === 'gh' && c.args[0] === 'pr');
+    expect(prCall?.args).toContain(evil); // intact, as one arg — not concatenated into a shell line
+  });
+
+  it('refuses an unsafe slug before running anything', async () => {
+    const result = await createImprovementPr({
+      cwd,
+      slug: '../../etc/passwd',
+      title: 'X',
+      markdown: '# X',
+    });
+    expect(result.ok).toBe(false);
+    expect(result.reason).toMatch(/unsafe slug/);
+    expect(calls).toHaveLength(0);
+  });
+
+  it('fails gracefully when gh is missing', async () => {
+    handler = (cmd) => {
+      if (cmd === 'gh') throw Object.assign(new Error('not found'), { code: 'ENOENT' });
+      return { stdout: '', stderr: '' };
+    };
+    const result = await createImprovementPr({ cwd, slug: 'x-3', title: 'X', markdown: '# X' });
+    expect(result.ok).toBe(false);
+    expect(result.reason).toMatch(/gh CLI not found/);
+  });
+
+  it('fails gracefully when the working tree is dirty', async () => {
+    const base = happyHandler();
+    handler = (cmd, args) => {
+      if (cmd === 'git' && args[0] === 'status') return { stdout: ' M src/x.ts', stderr: '' };
+      return base(cmd, args);
+    };
+    const result = await createImprovementPr({ cwd, slug: 'x-4', title: 'X', markdown: '# X' });
+    expect(result.ok).toBe(false);
+    expect(result.reason).toMatch(/uncommitted changes/);
+  });
+
+  it('aborts if anything beyond the markdown file gets staged', async () => {
+    const base = happyHandler();
+    handler = (cmd, args) => {
+      if (cmd === 'git' && args.includes('--cached') && args.includes('--name-only')) {
+        return { stdout: 'improvements/x-5.md\nsrc/evil.ts', stderr: '' };
+      }
+      return base(cmd, args);
+    };
+    const result = await createImprovementPr({ cwd, slug: 'x-5', title: 'X', markdown: '# X' });
+    expect(result.ok).toBe(false);
+    expect(result.reason).toMatch(/Unexpected staged files/);
+  });
+});
diff --git a/tests/improve/reflect.test.ts b/tests/improve/reflect.test.ts
new file mode 100644
index 0000000..fd1fb17
--- /dev/null
+++ b/tests/improve/reflect.test.ts
@@ -0,0 +1,62 @@
+import { describe, it, expect } from 'vitest';
+import { reflect, serializeTranscript, NO_IMPROVEMENT } from '../../src/improve/reflect.js';
+import type { Message } from '../../src/agent/types.js';
+import type { ModelProvider, ProviderEvent, SendRequest } from '../../src/providers/types.js';
+
+class TextProvider implements ModelProvider {
+  readonly name = 'anthropic' as const;
+  readonly model = 'fake';
+  readonly sent: SendRequest[] = [];
+
+  constructor(private readonly chunks: string[]) {}
+
+  async *send(req: SendRequest): AsyncIterable<ProviderEvent> {
+    this.sent.push(req);
+    for (const delta of this.chunks) yield { type: 'text', delta };
+    yield { type: 'done', usage: { inputTokens: 0, outputTokens: 0 }, stopReason: 'end_turn' };
+  }
+}
+
+describe('serializeTranscript', () => {
+  it('flattens text, tool_use, and tool_result blocks', () => {
+    const messages: Message[] = [
+      { role: 'user', content: [{ type: 'text', text: 'fix the bug' }] },
+      {
+        role: 'assistant',
+        content: [{ type: 'tool_use', id: '1', name: 'bash', input: { command: 'ls' } }],
+      },
+      {
+        role: 'user',
+        content: [{ type: 'tool_result', toolUseId: '1', content: 'boom', isError: true }],
+      },
+    ];
+    const out = serializeTranscript(messages);
+    expect(out).toContain('[user] fix the bug');
+    expect(out).toContain('[tool_use] bash {"command":"ls"}');
+    expect(out).toContain('[tool_result (error)] boom');
+  });
+});
+
+describe('reflect', () => {
+  it('returns trimmed markdown when the model proposes something', async () => {
+    const provider = new TextProvider(['# Better grep\n', '\n## Summary\nuse rg']);
+    const result = await reflect({ provider, transcript: 'session', cwd: '/x' });
+    expect(result).toBe('# Better grep\n\n## Summary\nuse rg');
+  });
+
+  it('passes no tools to the provider', async () => {
+    const provider = new TextProvider(['# x']);
+    await reflect({ provider, transcript: 'session', cwd: '/x' });
+    expect(provider.sent[0]?.tools).toEqual([]);
+  });
+
+  it('returns null on the sentinel', async () => {
+    const provider = new TextProvider([NO_IMPROVEMENT]);
+    expect(await reflect({ provider, transcript: 's', cwd: '/x' })).toBeNull();
+  });
+
+  it('returns null on empty output', async () => {
+    const provider = new TextProvider(['   ']);
+    expect(await reflect({ provider, transcript: 's', cwd: '/x' })).toBeNull();
+  });
+});
diff --git a/tests/improve/run.test.ts b/tests/improve/run.test.ts
new file mode 100644
index 0000000..4337329
--- /dev/null
+++ b/tests/improve/run.test.ts
@@ -0,0 +1,80 @@
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import type { Message } from '../../src/agent/types.js';
+import type { ModelProvider, ProviderEvent, SendRequest } from '../../src/providers/types.js';
+
+const createImprovementPr = vi.fn();
+vi.mock('../../src/improve/pr.js', () => ({ createImprovementPr }));
+
+const { runImprovement } = await import('../../src/improve/run.js');
+
+class TextProvider implements ModelProvider {
+  readonly name = 'anthropic' as const;
+  readonly model = 'fake';
+  constructor(private readonly text: string) {}
+  async *send(_req: SendRequest): AsyncIterable<ProviderEvent> {
+    yield { type: 'text', delta: this.text };
+    yield { type: 'done', usage: { inputTokens: 0, outputTokens: 0 }, stopReason: 'end_turn' };
+  }
+}
+
+const userMsg: Message[] = [{ role: 'user', content: [{ type: 'text', text: 'hi' }] }];
+
+function harness(provider: ModelProvider, confirmValue: boolean) {
+  const logs: string[] = [];
+  return {
+    logs,
+    opts: {
+      provider,
+      messages: userMsg,
+      cwd: '/x',
+      baseBranch: 'main',
+      log: (l: string) => logs.push(l),
+      confirm: async () => confirmValue,
+    },
+  };
+}
+
+beforeEach(() => {
+  createImprovementPr.mockReset();
+});
+
+describe('runImprovement', () => {
+  it('reports no activity for an empty transcript', async () => {
+    const { logs, opts } = harness(new TextProvider('# X'), true);
+    await runImprovement({ ...opts, messages: [] });
+    expect(logs.join()).toMatch(/No session activity/);
+    expect(createImprovementPr).not.toHaveBeenCalled();
+  });
+
+  it('reports when reflection yields nothing', async () => {
+    const { logs, opts } = harness(new TextProvider('NO_IMPROVEMENT'), true);
+    await runImprovement(opts);
+    expect(logs.join()).toMatch(/No improvements suggested/);
+    expect(createImprovementPr).not.toHaveBeenCalled();
+  });
+
+  it('skips PR creation when the user declines', async () => {
+    const { logs, opts } = harness(new TextProvider('# Better grep\nbody'), false);
+    await runImprovement(opts);
+    expect(logs.join()).toMatch(/Skipped/);
+    expect(createImprovementPr).not.toHaveBeenCalled();
+  });
+
+  it('creates a PR and logs the url on approval', async () => {
+    createImprovementPr.mockResolvedValue({ ok: true, url: 'https://example/pr/1' });
+    const { logs, opts } = harness(new TextProvider('# Better grep\nbody'), true);
+    await runImprovement(opts);
+    expect(createImprovementPr).toHaveBeenCalledOnce();
+    const arg = createImprovementPr.mock.calls[0]?.[0];
+    expect(arg.title).toBe('Better grep');
+    expect(arg.markdown).toContain('# Better grep');
+    expect(logs.join()).toMatch(/https:\/\/example\/pr\/1/);
+  });
+
+  it('logs the failure reason when PR creation fails', async () => {
+    createImprovementPr.mockResolvedValue({ ok: false, reason: 'gh CLI not found' });
+    const { logs, opts } = harness(new TextProvider('# Title\nbody'), true);
+    await runImprovement(opts);
+    expect(logs.join()).toMatch(/gh CLI not found/);
+  });
+});
diff --git a/tests/improve/slug.test.ts b/tests/improve/slug.test.ts
new file mode 100644
index 0000000..fa5131f
--- /dev/null
+++ b/tests/improve/slug.test.ts
@@ -0,0 +1,39 @@
+import { describe, it, expect } from 'vitest';
+import { SLUG_RE, slugify } from '../../src/improve/slug.js';
+
+describe('slugify', () => {
+  it('produces a SLUG_RE-valid slug from a normal title', () => {
+    const slug = slugify('Improve the grep tool');
+    expect(SLUG_RE.test(slug)).toBe(true);
+    expect(slug).toMatch(/^improve-the-grep-tool-/);
+  });
+
+  it('strips punctuation and collapses separators', () => {
+    const slug = slugify('Add  ??? web_fetch!! tool');
+    expect(SLUG_RE.test(slug)).toBe(true);
+    expect(slug.startsWith('-')).toBe(false);
+    expect(slug).not.toContain('_');
+  });
+
+  it('neutralizes path-traversal attempts', () => {
+    for (const evil of ['../../etc/passwd', '..\\..\\win', '/abs/path', 'a/b/c.md']) {
+      const slug = slugify(evil);
+      expect(SLUG_RE.test(slug)).toBe(true);
+      expect(slug).not.toContain('/');
+      expect(slug).not.toContain('.');
+    }
+  });
+
+  it('falls back to improvement-<ts> when nothing usable remains', () => {
+    const slug = slugify('!!!  ...');
+    expect(SLUG_RE.test(slug)).toBe(true);
+    expect(slug).toMatch(/^improvement-/);
+  });
+
+  it('caps the base length', () => {
+    const slug = slugify('x'.repeat(200));
+    // base (<=50) + '-' + timestamp suffix
+    expect(slug.length).toBeLessThan(70);
+    expect(SLUG_RE.test(slug)).toBe(true);
+  });
+});