diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..45e7c3f Binary files /dev/null and b/.DS_Store differ diff --git a/.codex-pet-runs/iroha-doctoral/decoded/base.png b/.codex-pet-runs/iroha-doctoral/decoded/base.png new file mode 100644 index 0000000..95e6ad3 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/decoded/base.png differ diff --git a/.codex-pet-runs/iroha-doctoral/decoded/failed.png b/.codex-pet-runs/iroha-doctoral/decoded/failed.png new file mode 100644 index 0000000..2a35605 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/decoded/failed.png differ diff --git a/.codex-pet-runs/iroha-doctoral/decoded/idle.png b/.codex-pet-runs/iroha-doctoral/decoded/idle.png new file mode 100644 index 0000000..5f13c03 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/decoded/idle.png differ diff --git a/.codex-pet-runs/iroha-doctoral/decoded/jumping.png b/.codex-pet-runs/iroha-doctoral/decoded/jumping.png new file mode 100644 index 0000000..17aa7fc Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/decoded/jumping.png differ diff --git a/.codex-pet-runs/iroha-doctoral/decoded/review.png b/.codex-pet-runs/iroha-doctoral/decoded/review.png new file mode 100644 index 0000000..ad4581d Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/decoded/review.png differ diff --git a/.codex-pet-runs/iroha-doctoral/decoded/running-left.png b/.codex-pet-runs/iroha-doctoral/decoded/running-left.png new file mode 100644 index 0000000..37c8cc7 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/decoded/running-left.png differ diff --git a/.codex-pet-runs/iroha-doctoral/decoded/running-right.png b/.codex-pet-runs/iroha-doctoral/decoded/running-right.png new file mode 100644 index 0000000..a021a29 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/decoded/running-right.png differ diff --git a/.codex-pet-runs/iroha-doctoral/decoded/running.png b/.codex-pet-runs/iroha-doctoral/decoded/running.png new file mode 100644 index 0000000..8cc34b8 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/decoded/running.png differ diff --git a/.codex-pet-runs/iroha-doctoral/decoded/waiting.png b/.codex-pet-runs/iroha-doctoral/decoded/waiting.png new file mode 100644 index 0000000..7d13d1e Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/decoded/waiting.png differ diff --git a/.codex-pet-runs/iroha-doctoral/decoded/waving.png b/.codex-pet-runs/iroha-doctoral/decoded/waving.png new file mode 100644 index 0000000..7d22f68 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/decoded/waving.png differ diff --git a/.codex-pet-runs/iroha-doctoral/final/spritesheet.png b/.codex-pet-runs/iroha-doctoral/final/spritesheet.png new file mode 100644 index 0000000..3768493 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/final/spritesheet.png differ diff --git a/.codex-pet-runs/iroha-doctoral/final/spritesheet.webp b/.codex-pet-runs/iroha-doctoral/final/spritesheet.webp new file mode 100644 index 0000000..e4152ef Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/final/spritesheet.webp differ diff --git a/.codex-pet-runs/iroha-doctoral/final/validation.json b/.codex-pet-runs/iroha-doctoral/final/validation.json new file mode 100644 index 0000000..45763d0 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/final/validation.json @@ -0,0 +1,517 @@ +{ + "ok": true, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/final/spritesheet.webp", + "format": "WEBP", + "mode": "RGBA", + "width": 1536, + "height": 1872, + "transparent_rgb_residue_pixels": 0, + "errors": [], + "warnings": [], + "cells": [ + { + "state": "idle", + "row": 0, + "column": 0, + "used": true, + "nontransparent_pixels": 8249 + }, + { + "state": "idle", + "row": 0, + "column": 1, + "used": true, + "nontransparent_pixels": 8171 + }, + { + "state": "idle", + "row": 0, + "column": 2, + "used": true, + "nontransparent_pixels": 8128 + }, + { + "state": "idle", + "row": 0, + "column": 3, + "used": true, + "nontransparent_pixels": 8087 + }, + { + "state": "idle", + "row": 0, + "column": 4, + "used": true, + "nontransparent_pixels": 8106 + }, + { + "state": "idle", + "row": 0, + "column": 5, + "used": true, + "nontransparent_pixels": 8150 + }, + { + "state": "idle", + "row": 0, + "column": 6, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "idle", + "row": 0, + "column": 7, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "running-right", + "row": 1, + "column": 0, + "used": true, + "nontransparent_pixels": 8358 + }, + { + "state": "running-right", + "row": 1, + "column": 1, + "used": true, + "nontransparent_pixels": 9717 + }, + { + "state": "running-right", + "row": 1, + "column": 2, + "used": true, + "nontransparent_pixels": 9064 + }, + { + "state": "running-right", + "row": 1, + "column": 3, + "used": true, + "nontransparent_pixels": 9441 + }, + { + "state": "running-right", + "row": 1, + "column": 4, + "used": true, + "nontransparent_pixels": 8958 + }, + { + "state": "running-right", + "row": 1, + "column": 5, + "used": true, + "nontransparent_pixels": 9095 + }, + { + "state": "running-right", + "row": 1, + "column": 6, + "used": true, + "nontransparent_pixels": 9104 + }, + { + "state": "running-right", + "row": 1, + "column": 7, + "used": true, + "nontransparent_pixels": 9062 + }, + { + "state": "running-left", + "row": 2, + "column": 0, + "used": true, + "nontransparent_pixels": 8358 + }, + { + "state": "running-left", + "row": 2, + "column": 1, + "used": true, + "nontransparent_pixels": 9717 + }, + { + "state": "running-left", + "row": 2, + "column": 2, + "used": true, + "nontransparent_pixels": 9064 + }, + { + "state": "running-left", + "row": 2, + "column": 3, + "used": true, + "nontransparent_pixels": 9441 + }, + { + "state": "running-left", + "row": 2, + "column": 4, + "used": true, + "nontransparent_pixels": 8958 + }, + { + "state": "running-left", + "row": 2, + "column": 5, + "used": true, + "nontransparent_pixels": 9095 + }, + { + "state": "running-left", + "row": 2, + "column": 6, + "used": true, + "nontransparent_pixels": 9104 + }, + { + "state": "running-left", + "row": 2, + "column": 7, + "used": true, + "nontransparent_pixels": 9062 + }, + { + "state": "waving", + "row": 3, + "column": 0, + "used": true, + "nontransparent_pixels": 8163 + }, + { + "state": "waving", + "row": 3, + "column": 1, + "used": true, + "nontransparent_pixels": 8308 + }, + { + "state": "waving", + "row": 3, + "column": 2, + "used": true, + "nontransparent_pixels": 8378 + }, + { + "state": "waving", + "row": 3, + "column": 3, + "used": true, + "nontransparent_pixels": 8331 + }, + { + "state": "waving", + "row": 3, + "column": 4, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "waving", + "row": 3, + "column": 5, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "waving", + "row": 3, + "column": 6, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "waving", + "row": 3, + "column": 7, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "jumping", + "row": 4, + "column": 0, + "used": true, + "nontransparent_pixels": 6137 + }, + { + "state": "jumping", + "row": 4, + "column": 1, + "used": true, + "nontransparent_pixels": 6269 + }, + { + "state": "jumping", + "row": 4, + "column": 2, + "used": true, + "nontransparent_pixels": 6220 + }, + { + "state": "jumping", + "row": 4, + "column": 3, + "used": true, + "nontransparent_pixels": 6811 + }, + { + "state": "jumping", + "row": 4, + "column": 4, + "used": true, + "nontransparent_pixels": 6479 + }, + { + "state": "jumping", + "row": 4, + "column": 5, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "jumping", + "row": 4, + "column": 6, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "jumping", + "row": 4, + "column": 7, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "failed", + "row": 5, + "column": 0, + "used": true, + "nontransparent_pixels": 9070 + }, + { + "state": "failed", + "row": 5, + "column": 1, + "used": true, + "nontransparent_pixels": 9543 + }, + { + "state": "failed", + "row": 5, + "column": 2, + "used": true, + "nontransparent_pixels": 8690 + }, + { + "state": "failed", + "row": 5, + "column": 3, + "used": true, + "nontransparent_pixels": 8670 + }, + { + "state": "failed", + "row": 5, + "column": 4, + "used": true, + "nontransparent_pixels": 8779 + }, + { + "state": "failed", + "row": 5, + "column": 5, + "used": true, + "nontransparent_pixels": 8062 + }, + { + "state": "failed", + "row": 5, + "column": 6, + "used": true, + "nontransparent_pixels": 7306 + }, + { + "state": "failed", + "row": 5, + "column": 7, + "used": true, + "nontransparent_pixels": 8332 + }, + { + "state": "waiting", + "row": 6, + "column": 0, + "used": true, + "nontransparent_pixels": 8613 + }, + { + "state": "waiting", + "row": 6, + "column": 1, + "used": true, + "nontransparent_pixels": 8283 + }, + { + "state": "waiting", + "row": 6, + "column": 2, + "used": true, + "nontransparent_pixels": 8188 + }, + { + "state": "waiting", + "row": 6, + "column": 3, + "used": true, + "nontransparent_pixels": 8343 + }, + { + "state": "waiting", + "row": 6, + "column": 4, + "used": true, + "nontransparent_pixels": 8276 + }, + { + "state": "waiting", + "row": 6, + "column": 5, + "used": true, + "nontransparent_pixels": 8384 + }, + { + "state": "waiting", + "row": 6, + "column": 6, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "waiting", + "row": 6, + "column": 7, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "running", + "row": 7, + "column": 0, + "used": true, + "nontransparent_pixels": 8356 + }, + { + "state": "running", + "row": 7, + "column": 1, + "used": true, + "nontransparent_pixels": 8488 + }, + { + "state": "running", + "row": 7, + "column": 2, + "used": true, + "nontransparent_pixels": 7784 + }, + { + "state": "running", + "row": 7, + "column": 3, + "used": true, + "nontransparent_pixels": 7406 + }, + { + "state": "running", + "row": 7, + "column": 4, + "used": true, + "nontransparent_pixels": 8417 + }, + { + "state": "running", + "row": 7, + "column": 5, + "used": true, + "nontransparent_pixels": 8019 + }, + { + "state": "running", + "row": 7, + "column": 6, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "running", + "row": 7, + "column": 7, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "review", + "row": 8, + "column": 0, + "used": true, + "nontransparent_pixels": 7925 + }, + { + "state": "review", + "row": 8, + "column": 1, + "used": true, + "nontransparent_pixels": 8116 + }, + { + "state": "review", + "row": 8, + "column": 2, + "used": true, + "nontransparent_pixels": 7582 + }, + { + "state": "review", + "row": 8, + "column": 3, + "used": true, + "nontransparent_pixels": 7887 + }, + { + "state": "review", + "row": 8, + "column": 4, + "used": true, + "nontransparent_pixels": 7999 + }, + { + "state": "review", + "row": 8, + "column": 5, + "used": true, + "nontransparent_pixels": 7861 + }, + { + "state": "review", + "row": 8, + "column": 6, + "used": false, + "nontransparent_pixels": 0 + }, + { + "state": "review", + "row": 8, + "column": 7, + "used": false, + "nontransparent_pixels": 0 + } + ] +} diff --git a/.codex-pet-runs/iroha-doctoral/frames/failed/00.png b/.codex-pet-runs/iroha-doctoral/frames/failed/00.png new file mode 100644 index 0000000..94d4e82 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/failed/00.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/failed/01.png b/.codex-pet-runs/iroha-doctoral/frames/failed/01.png new file mode 100644 index 0000000..88b15b8 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/failed/01.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/failed/02.png b/.codex-pet-runs/iroha-doctoral/frames/failed/02.png new file mode 100644 index 0000000..0dcddb7 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/failed/02.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/failed/03.png b/.codex-pet-runs/iroha-doctoral/frames/failed/03.png new file mode 100644 index 0000000..8ad1f9a Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/failed/03.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/failed/04.png b/.codex-pet-runs/iroha-doctoral/frames/failed/04.png new file mode 100644 index 0000000..b0d6e10 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/failed/04.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/failed/05.png b/.codex-pet-runs/iroha-doctoral/frames/failed/05.png new file mode 100644 index 0000000..4f87771 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/failed/05.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/failed/06.png b/.codex-pet-runs/iroha-doctoral/frames/failed/06.png new file mode 100644 index 0000000..ffccd1d Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/failed/06.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/failed/07.png b/.codex-pet-runs/iroha-doctoral/frames/failed/07.png new file mode 100644 index 0000000..8bf5549 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/failed/07.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/frames-manifest.json b/.codex-pet-runs/iroha-doctoral/frames/frames-manifest.json new file mode 100644 index 0000000..2f795f5 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/frames/frames-manifest.json @@ -0,0 +1,125 @@ +{ + "ok": true, + "chroma_key": { + "hex": "#FF00FF", + "rgb": [ + 255, + 0, + 255 + ], + "threshold": 96.0 + }, + "rows": [ + { + "state": "idle", + "frames": [ + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/idle/00.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/idle/01.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/idle/02.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/idle/03.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/idle/04.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/idle/05.png" + ], + "method": "stable-slots" + }, + { + "state": "running-right", + "frames": [ + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/00.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/01.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/02.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/03.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/04.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/05.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/06.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/07.png" + ], + "method": "stable-slots" + }, + { + "state": "running-left", + "frames": [ + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/00.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/01.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/02.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/03.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/04.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/05.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/06.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/07.png" + ], + "method": "stable-slots" + }, + { + "state": "waving", + "frames": [ + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waving/00.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waving/01.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waving/02.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waving/03.png" + ], + "method": "stable-slots" + }, + { + "state": "jumping", + "frames": [ + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/jumping/00.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/jumping/01.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/jumping/02.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/jumping/03.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/jumping/04.png" + ], + "method": "stable-slots" + }, + { + "state": "failed", + "frames": [ + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/00.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/01.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/02.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/03.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/04.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/05.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/06.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/07.png" + ], + "method": "stable-slots" + }, + { + "state": "waiting", + "frames": [ + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waiting/00.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waiting/01.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waiting/02.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waiting/03.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waiting/04.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waiting/05.png" + ], + "method": "stable-slots" + }, + { + "state": "running", + "frames": [ + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running/00.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running/01.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running/02.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running/03.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running/04.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running/05.png" + ], + "method": "stable-slots" + }, + { + "state": "review", + "frames": [ + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/review/00.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/review/01.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/review/02.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/review/03.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/review/04.png", + "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/review/05.png" + ], + "method": "stable-slots" + } + ] +} diff --git a/.codex-pet-runs/iroha-doctoral/frames/idle/00.png b/.codex-pet-runs/iroha-doctoral/frames/idle/00.png new file mode 100644 index 0000000..af17690 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/idle/00.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/idle/01.png b/.codex-pet-runs/iroha-doctoral/frames/idle/01.png new file mode 100644 index 0000000..495fc7e Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/idle/01.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/idle/02.png b/.codex-pet-runs/iroha-doctoral/frames/idle/02.png new file mode 100644 index 0000000..9b06382 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/idle/02.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/idle/03.png b/.codex-pet-runs/iroha-doctoral/frames/idle/03.png new file mode 100644 index 0000000..c9dfcd1 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/idle/03.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/idle/04.png b/.codex-pet-runs/iroha-doctoral/frames/idle/04.png new file mode 100644 index 0000000..dd61e4e Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/idle/04.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/idle/05.png b/.codex-pet-runs/iroha-doctoral/frames/idle/05.png new file mode 100644 index 0000000..e62d1d3 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/idle/05.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/jumping/00.png b/.codex-pet-runs/iroha-doctoral/frames/jumping/00.png new file mode 100644 index 0000000..a6d2113 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/jumping/00.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/jumping/01.png b/.codex-pet-runs/iroha-doctoral/frames/jumping/01.png new file mode 100644 index 0000000..9efb36c Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/jumping/01.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/jumping/02.png b/.codex-pet-runs/iroha-doctoral/frames/jumping/02.png new file mode 100644 index 0000000..cbf80f5 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/jumping/02.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/jumping/03.png b/.codex-pet-runs/iroha-doctoral/frames/jumping/03.png new file mode 100644 index 0000000..3ff59d8 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/jumping/03.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/jumping/04.png b/.codex-pet-runs/iroha-doctoral/frames/jumping/04.png new file mode 100644 index 0000000..7ad9ae2 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/jumping/04.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/review/00.png b/.codex-pet-runs/iroha-doctoral/frames/review/00.png new file mode 100644 index 0000000..43cd4fd Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/review/00.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/review/01.png b/.codex-pet-runs/iroha-doctoral/frames/review/01.png new file mode 100644 index 0000000..99d58d7 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/review/01.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/review/02.png b/.codex-pet-runs/iroha-doctoral/frames/review/02.png new file mode 100644 index 0000000..7ef21fc Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/review/02.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/review/03.png b/.codex-pet-runs/iroha-doctoral/frames/review/03.png new file mode 100644 index 0000000..b9178ff Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/review/03.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/review/04.png b/.codex-pet-runs/iroha-doctoral/frames/review/04.png new file mode 100644 index 0000000..e19163a Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/review/04.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/review/05.png b/.codex-pet-runs/iroha-doctoral/frames/review/05.png new file mode 100644 index 0000000..fe16dcc Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/review/05.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-left/00.png b/.codex-pet-runs/iroha-doctoral/frames/running-left/00.png new file mode 100644 index 0000000..3be7d2e Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-left/00.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-left/01.png b/.codex-pet-runs/iroha-doctoral/frames/running-left/01.png new file mode 100644 index 0000000..2a792a0 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-left/01.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-left/02.png b/.codex-pet-runs/iroha-doctoral/frames/running-left/02.png new file mode 100644 index 0000000..7688f60 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-left/02.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-left/03.png b/.codex-pet-runs/iroha-doctoral/frames/running-left/03.png new file mode 100644 index 0000000..8975bf6 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-left/03.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-left/04.png b/.codex-pet-runs/iroha-doctoral/frames/running-left/04.png new file mode 100644 index 0000000..2d0e9f0 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-left/04.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-left/05.png b/.codex-pet-runs/iroha-doctoral/frames/running-left/05.png new file mode 100644 index 0000000..3fe698f Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-left/05.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-left/06.png b/.codex-pet-runs/iroha-doctoral/frames/running-left/06.png new file mode 100644 index 0000000..107447f Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-left/06.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-left/07.png b/.codex-pet-runs/iroha-doctoral/frames/running-left/07.png new file mode 100644 index 0000000..e592b82 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-left/07.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-right/00.png b/.codex-pet-runs/iroha-doctoral/frames/running-right/00.png new file mode 100644 index 0000000..212e10e Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-right/00.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-right/01.png b/.codex-pet-runs/iroha-doctoral/frames/running-right/01.png new file mode 100644 index 0000000..6063a06 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-right/01.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-right/02.png b/.codex-pet-runs/iroha-doctoral/frames/running-right/02.png new file mode 100644 index 0000000..42ffe49 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-right/02.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-right/03.png b/.codex-pet-runs/iroha-doctoral/frames/running-right/03.png new file mode 100644 index 0000000..96af05f Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-right/03.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-right/04.png b/.codex-pet-runs/iroha-doctoral/frames/running-right/04.png new file mode 100644 index 0000000..245ad6b Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-right/04.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-right/05.png b/.codex-pet-runs/iroha-doctoral/frames/running-right/05.png new file mode 100644 index 0000000..3d74c0b Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-right/05.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-right/06.png b/.codex-pet-runs/iroha-doctoral/frames/running-right/06.png new file mode 100644 index 0000000..1f842cb Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-right/06.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running-right/07.png b/.codex-pet-runs/iroha-doctoral/frames/running-right/07.png new file mode 100644 index 0000000..6d33a04 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running-right/07.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running/00.png b/.codex-pet-runs/iroha-doctoral/frames/running/00.png new file mode 100644 index 0000000..546e227 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running/00.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running/01.png b/.codex-pet-runs/iroha-doctoral/frames/running/01.png new file mode 100644 index 0000000..25b7606 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running/01.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running/02.png b/.codex-pet-runs/iroha-doctoral/frames/running/02.png new file mode 100644 index 0000000..c5d6f44 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running/02.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running/03.png b/.codex-pet-runs/iroha-doctoral/frames/running/03.png new file mode 100644 index 0000000..49d4417 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running/03.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running/04.png b/.codex-pet-runs/iroha-doctoral/frames/running/04.png new file mode 100644 index 0000000..c78748d Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running/04.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/running/05.png b/.codex-pet-runs/iroha-doctoral/frames/running/05.png new file mode 100644 index 0000000..cfc2eb4 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/running/05.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/waiting/00.png b/.codex-pet-runs/iroha-doctoral/frames/waiting/00.png new file mode 100644 index 0000000..eb930bd Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/waiting/00.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/waiting/01.png b/.codex-pet-runs/iroha-doctoral/frames/waiting/01.png new file mode 100644 index 0000000..01751db Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/waiting/01.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/waiting/02.png b/.codex-pet-runs/iroha-doctoral/frames/waiting/02.png new file mode 100644 index 0000000..1160c4f Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/waiting/02.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/waiting/03.png b/.codex-pet-runs/iroha-doctoral/frames/waiting/03.png new file mode 100644 index 0000000..2d122c5 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/waiting/03.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/waiting/04.png b/.codex-pet-runs/iroha-doctoral/frames/waiting/04.png new file mode 100644 index 0000000..003ddba Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/waiting/04.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/waiting/05.png b/.codex-pet-runs/iroha-doctoral/frames/waiting/05.png new file mode 100644 index 0000000..ce6e94f Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/waiting/05.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/waving/00.png b/.codex-pet-runs/iroha-doctoral/frames/waving/00.png new file mode 100644 index 0000000..4ae488f Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/waving/00.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/waving/01.png b/.codex-pet-runs/iroha-doctoral/frames/waving/01.png new file mode 100644 index 0000000..0e5d576 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/waving/01.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/waving/02.png b/.codex-pet-runs/iroha-doctoral/frames/waving/02.png new file mode 100644 index 0000000..2f9db2b Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/waving/02.png differ diff --git a/.codex-pet-runs/iroha-doctoral/frames/waving/03.png b/.codex-pet-runs/iroha-doctoral/frames/waving/03.png new file mode 100644 index 0000000..b5e4d29 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/frames/waving/03.png differ diff --git a/.codex-pet-runs/iroha-doctoral/imagegen-jobs.json b/.codex-pet-runs/iroha-doctoral/imagegen-jobs.json new file mode 100644 index 0000000..3bb6a6b --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/imagegen-jobs.json @@ -0,0 +1,383 @@ +{ + "schema_version": 1, + "created_at": "2026-06-04T06:45:26.289658+00:00", + "run_dir": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral", + "primary_generation_skill": "$imagegen", + "jobs": [ + { + "id": "base", + "kind": "base-pet", + "status": "complete", + "prompt_file": "prompts/base-pet.md", + "input_images": [], + "output_path": "decoded/base.png", + "depends_on": [], + "generation_skill": "$imagegen", + "requires_grounded_generation": false, + "allow_prompt_only_generation": true, + "source_path": "/Users/akiwayne/.codex/generated_images/019e915d-dad3-7010-b6b6-342889ceea2c/ig_08632c8be4aac5b7016a2120cfcc5c81919087073623557f95.png", + "completed_at": "2026-06-04T06:54:56Z" + }, + { + "id": "idle", + "kind": "row-strip", + "status": "complete", + "prompt_file": "prompts/rows/idle.md", + "retry_prompt_file": "prompts/row-retries/idle.md", + "input_images": [ + { + "path": "references/layout-guides/idle.png", + "role": "layout guide for 6 frame slots; use for spacing only, do not copy guide lines" + }, + { + "path": "references/canonical-base.png", + "role": "canonical identity reference" + } + ], + "output_path": "decoded/idle.png", + "depends_on": [ + "base" + ], + "generation_skill": "$imagegen", + "requires_grounded_generation": true, + "allow_prompt_only_generation": false, + "identity_reference_paths": [ + "references/canonical-base.png" + ], + "parallelizable_after": [ + "base" + ], + "derivation_policy": { + "may_derive": false, + "reason": "no deterministic derivation is configured for this state" + }, + "mirror_policy": {}, + "source_path": "/Users/akiwayne/.codex/generated_images/019e915d-dad3-7010-b6b6-342889ceea2c/ig_08632c8be4aac5b7016a212172eb9881918a85214ce553d89a.png", + "completed_at": "2026-06-04T06:57:34Z" + }, + { + "id": "running-right", + "kind": "row-strip", + "status": "complete", + "prompt_file": "prompts/rows/running-right.md", + "retry_prompt_file": "prompts/row-retries/running-right.md", + "input_images": [ + { + "path": "references/layout-guides/running-right.png", + "role": "layout guide for 8 frame slots; use for spacing only, do not copy guide lines" + }, + { + "path": "references/canonical-base.png", + "role": "canonical identity reference" + } + ], + "output_path": "decoded/running-right.png", + "depends_on": [ + "base" + ], + "generation_skill": "$imagegen", + "requires_grounded_generation": true, + "allow_prompt_only_generation": false, + "identity_reference_paths": [ + "references/canonical-base.png" + ], + "parallelizable_after": [ + "base" + ], + "derivation_policy": { + "may_derive": false, + "reason": "no deterministic derivation is configured for this state" + }, + "mirror_policy": {}, + "source_path": "/Users/akiwayne/.codex/generated_images/019e915d-dad3-7010-b6b6-342889ceea2c/ig_08632c8be4aac5b7016a2121fa88988191a13623887e96f36e.png", + "completed_at": "2026-06-04T06:59:29Z" + }, + { + "id": "running-left", + "kind": "row-strip", + "status": "complete", + "prompt_file": "prompts/rows/running-left.md", + "retry_prompt_file": "prompts/row-retries/running-left.md", + "input_images": [ + { + "path": "references/layout-guides/running-left.png", + "role": "layout guide for 8 frame slots; use for spacing only, do not copy guide lines" + }, + { + "path": "references/canonical-base.png", + "role": "canonical identity reference" + }, + { + "path": "decoded/running-right.png", + "role": "rightward gait reference for leftward row decision" + } + ], + "output_path": "decoded/running-left.png", + "depends_on": [ + "base", + "running-right" + ], + "generation_skill": "$imagegen", + "requires_grounded_generation": true, + "allow_prompt_only_generation": false, + "identity_reference_paths": [ + "references/canonical-base.png" + ], + "parallelizable_after": [ + "base", + "running-right" + ], + "derivation_policy": { + "may_derive": true, + "may_derive_from": "running-right", + "derivation": "framewise-horizontal-mirror-preserving-order", + "requires_explicit_approval": true, + "fallback_generation_skill": "$imagegen" + }, + "mirror_policy": { + "may_derive": true, + "may_derive_from": "running-right", + "derivation": "framewise-horizontal-mirror-preserving-order", + "requires_explicit_approval": true, + "fallback_generation_skill": "$imagegen" + }, + "source_path": "decoded/running-right.png", + "derived_from": "running-right", + "completed_at": "2026-06-04T06:59:29.377094+00:00", + "metadata": { + "width": 2048, + "height": 768, + "mode": "RGBA", + "format": "PNG" + }, + "mirror_decision": { + "approved": true, + "approved_at": "2026-06-04T06:59:29.377094+00:00", + "note": "The reference design has no directional text or asymmetric symbol; framewise mirroring preserves identity and gait.", + "transform": "framewise-horizontal-mirror-preserving-order" + } + }, + { + "id": "waving", + "kind": "row-strip", + "status": "complete", + "prompt_file": "prompts/rows/waving.md", + "retry_prompt_file": "prompts/row-retries/waving.md", + "input_images": [ + { + "path": "references/layout-guides/waving.png", + "role": "layout guide for 4 frame slots; use for spacing only, do not copy guide lines" + }, + { + "path": "references/canonical-base.png", + "role": "canonical identity reference" + } + ], + "output_path": "decoded/waving.png", + "depends_on": [ + "base" + ], + "generation_skill": "$imagegen", + "requires_grounded_generation": true, + "allow_prompt_only_generation": false, + "identity_reference_paths": [ + "references/canonical-base.png" + ], + "parallelizable_after": [ + "base" + ], + "derivation_policy": { + "may_derive": false, + "reason": "state requires its own generated animation semantics" + }, + "mirror_policy": {}, + "source_path": "/Users/akiwayne/.codex/generated_images/019e915d-dad3-7010-b6b6-342889ceea2c/ig_08632c8be4aac5b7016a2122eba090819195ef3a361671077b.png", + "completed_at": "2026-06-04T07:03:39Z" + }, + { + "id": "jumping", + "kind": "row-strip", + "status": "complete", + "prompt_file": "prompts/rows/jumping.md", + "retry_prompt_file": "prompts/row-retries/jumping.md", + "input_images": [ + { + "path": "references/layout-guides/jumping.png", + "role": "layout guide for 5 frame slots; use for spacing only, do not copy guide lines" + }, + { + "path": "references/canonical-base.png", + "role": "canonical identity reference" + } + ], + "output_path": "decoded/jumping.png", + "depends_on": [ + "base" + ], + "generation_skill": "$imagegen", + "requires_grounded_generation": true, + "allow_prompt_only_generation": false, + "identity_reference_paths": [ + "references/canonical-base.png" + ], + "parallelizable_after": [ + "base" + ], + "derivation_policy": { + "may_derive": false, + "reason": "state requires its own generated animation semantics" + }, + "mirror_policy": {}, + "source_path": "/Users/akiwayne/.codex/generated_images/019e915d-dad3-7010-b6b6-342889ceea2c/ig_08632c8be4aac5b7016a2126313b388191a1084c8ef65f8e77.png", + "completed_at": "2026-06-04T07:18:04Z" + }, + { + "id": "failed", + "kind": "row-strip", + "status": "complete", + "prompt_file": "prompts/rows/failed.md", + "retry_prompt_file": "prompts/row-retries/failed.md", + "input_images": [ + { + "path": "references/layout-guides/failed.png", + "role": "layout guide for 8 frame slots; use for spacing only, do not copy guide lines" + }, + { + "path": "references/canonical-base.png", + "role": "canonical identity reference" + } + ], + "output_path": "decoded/failed.png", + "depends_on": [ + "base" + ], + "generation_skill": "$imagegen", + "requires_grounded_generation": true, + "allow_prompt_only_generation": false, + "identity_reference_paths": [ + "references/canonical-base.png" + ], + "parallelizable_after": [ + "base" + ], + "derivation_policy": { + "may_derive": false, + "reason": "state requires its own generated animation semantics" + }, + "mirror_policy": {}, + "source_path": "/Users/akiwayne/.codex/generated_images/019e915d-dad3-7010-b6b6-342889ceea2c/ig_08632c8be4aac5b7016a2126c682808191b65ee84b15290512.png", + "completed_at": "2026-06-04T07:20:56Z" + }, + { + "id": "waiting", + "kind": "row-strip", + "status": "complete", + "prompt_file": "prompts/rows/waiting.md", + "retry_prompt_file": "prompts/row-retries/waiting.md", + "input_images": [ + { + "path": "references/layout-guides/waiting.png", + "role": "layout guide for 6 frame slots; use for spacing only, do not copy guide lines" + }, + { + "path": "references/canonical-base.png", + "role": "canonical identity reference" + } + ], + "output_path": "decoded/waiting.png", + "depends_on": [ + "base" + ], + "generation_skill": "$imagegen", + "requires_grounded_generation": true, + "allow_prompt_only_generation": false, + "identity_reference_paths": [ + "references/canonical-base.png" + ], + "parallelizable_after": [ + "base" + ], + "derivation_policy": { + "may_derive": false, + "reason": "state requires its own generated animation semantics" + }, + "mirror_policy": {}, + "source_path": "/Users/akiwayne/.codex/generated_images/019e915d-dad3-7010-b6b6-342889ceea2c/ig_08632c8be4aac5b7016a2127eddc6c819192a77fb8a30eaec0.png", + "completed_at": "2026-06-04T07:25:07Z" + }, + { + "id": "running", + "kind": "row-strip", + "status": "complete", + "prompt_file": "prompts/rows/running.md", + "retry_prompt_file": "prompts/row-retries/running.md", + "input_images": [ + { + "path": "references/layout-guides/running.png", + "role": "layout guide for 6 frame slots; use for spacing only, do not copy guide lines" + }, + { + "path": "references/canonical-base.png", + "role": "canonical identity reference" + } + ], + "output_path": "decoded/running.png", + "depends_on": [ + "base" + ], + "generation_skill": "$imagegen", + "requires_grounded_generation": true, + "allow_prompt_only_generation": false, + "identity_reference_paths": [ + "references/canonical-base.png" + ], + "parallelizable_after": [ + "base" + ], + "derivation_policy": { + "may_derive": false, + "reason": "state requires its own generated animation semantics" + }, + "mirror_policy": {}, + "source_path": "/Users/akiwayne/.codex/generated_images/019e915d-dad3-7010-b6b6-342889ceea2c/ig_08632c8be4aac5b7016a2128de6cf48191ac58d28ee1f32a4c.png", + "completed_at": "2026-06-04T07:30:59Z" + }, + { + "id": "review", + "kind": "row-strip", + "status": "complete", + "prompt_file": "prompts/rows/review.md", + "retry_prompt_file": "prompts/row-retries/review.md", + "input_images": [ + { + "path": "references/layout-guides/review.png", + "role": "layout guide for 6 frame slots; use for spacing only, do not copy guide lines" + }, + { + "path": "references/canonical-base.png", + "role": "canonical identity reference" + } + ], + "output_path": "decoded/review.png", + "depends_on": [ + "base" + ], + "generation_skill": "$imagegen", + "requires_grounded_generation": true, + "allow_prompt_only_generation": false, + "identity_reference_paths": [ + "references/canonical-base.png" + ], + "parallelizable_after": [ + "base" + ], + "derivation_policy": { + "may_derive": false, + "reason": "state requires its own generated animation semantics" + }, + "mirror_policy": {}, + "source_path": "/Users/akiwayne/.codex/generated_images/019e915d-dad3-7010-b6b6-342889ceea2c/ig_08632c8be4aac5b7016a212b1ddd98819184ad370391e6140c.png", + "completed_at": "2026-06-04T07:39:20Z" + } + ] +} diff --git a/.codex-pet-runs/iroha-doctoral/package/iroha-doctoral/pet.json b/.codex-pet-runs/iroha-doctoral/package/iroha-doctoral/pet.json new file mode 100644 index 0000000..bd1d354 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/package/iroha-doctoral/pet.json @@ -0,0 +1,6 @@ +{ + "id": "iroha-doctoral", + "displayName": "彩叶博士", + "description": "以参考设计重构、穿科研白大褂的酒寄彩叶博士研究生宠物。", + "spritesheetPath": "spritesheet.webp" +} diff --git a/.codex-pet-runs/iroha-doctoral/package/iroha-doctoral/spritesheet.webp b/.codex-pet-runs/iroha-doctoral/package/iroha-doctoral/spritesheet.webp new file mode 100644 index 0000000..e4152ef Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/package/iroha-doctoral/spritesheet.webp differ diff --git a/.codex-pet-runs/iroha-doctoral/pet_request.json b/.codex-pet-runs/iroha-doctoral/pet_request.json new file mode 100644 index 0000000..52b63e2 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/pet_request.json @@ -0,0 +1,200 @@ +{ + "pet_id": "iroha-doctoral", + "display_name": "Iroha Doctoral", + "description": "一位受酒寄彩叶启发、穿科研白大褂的年轻女性博士研究生宠物。.", + "created_at": "2026-06-04T06:45:26.287809+00:00", + "atlas": { + "columns": 8, + "rows": 9, + "cell_width": 192, + "cell_height": 208, + "width": 1536, + "height": 1872 + }, + "rows": [ + { + "state": "idle", + "row": 0, + "frames": 6, + "purpose": "calm resting, breathing, and blinking loop" + }, + { + "state": "running-right", + "row": 1, + "frames": 8, + "purpose": "rightward drag movement loop" + }, + { + "state": "running-left", + "row": 2, + "frames": 8, + "purpose": "leftward drag movement loop" + }, + { + "state": "waving", + "row": 3, + "frames": 4, + "purpose": "greeting or attention gesture" + }, + { + "state": "jumping", + "row": 4, + "frames": 5, + "purpose": "hover or playful jump" + }, + { + "state": "failed", + "row": 5, + "frames": 8, + "purpose": "blocked, failed, or cancelled reaction" + }, + { + "state": "waiting", + "row": 6, + "frames": 6, + "purpose": "waiting for approval, help, or user input" + }, + { + "state": "running", + "row": 7, + "frames": 6, + "purpose": "active task work or processing" + }, + { + "state": "review", + "row": 8, + "frames": 6, + "purpose": "ready or completed output review" + } + ], + "layout_guides": [ + { + "state": "idle", + "path": "references/layout-guides/idle.png", + "width": 1152, + "height": 208, + "frames": 6, + "cell_width": 192, + "cell_height": 208, + "safe_margin_x": 18, + "safe_margin_y": 16, + "usage": "layout guide input only; do not copy visible guide lines into generated sprite strips" + }, + { + "state": "running-right", + "path": "references/layout-guides/running-right.png", + "width": 1536, + "height": 208, + "frames": 8, + "cell_width": 192, + "cell_height": 208, + "safe_margin_x": 18, + "safe_margin_y": 16, + "usage": "layout guide input only; do not copy visible guide lines into generated sprite strips" + }, + { + "state": "running-left", + "path": "references/layout-guides/running-left.png", + "width": 1536, + "height": 208, + "frames": 8, + "cell_width": 192, + "cell_height": 208, + "safe_margin_x": 18, + "safe_margin_y": 16, + "usage": "layout guide input only; do not copy visible guide lines into generated sprite strips" + }, + { + "state": "waving", + "path": "references/layout-guides/waving.png", + "width": 768, + "height": 208, + "frames": 4, + "cell_width": 192, + "cell_height": 208, + "safe_margin_x": 18, + "safe_margin_y": 16, + "usage": "layout guide input only; do not copy visible guide lines into generated sprite strips" + }, + { + "state": "jumping", + "path": "references/layout-guides/jumping.png", + "width": 960, + "height": 208, + "frames": 5, + "cell_width": 192, + "cell_height": 208, + "safe_margin_x": 18, + "safe_margin_y": 16, + "usage": "layout guide input only; do not copy visible guide lines into generated sprite strips" + }, + { + "state": "failed", + "path": "references/layout-guides/failed.png", + "width": 1536, + "height": 208, + "frames": 8, + "cell_width": 192, + "cell_height": 208, + "safe_margin_x": 18, + "safe_margin_y": 16, + "usage": "layout guide input only; do not copy visible guide lines into generated sprite strips" + }, + { + "state": "waiting", + "path": "references/layout-guides/waiting.png", + "width": 1152, + "height": 208, + "frames": 6, + "cell_width": 192, + "cell_height": 208, + "safe_margin_x": 18, + "safe_margin_y": 16, + "usage": "layout guide input only; do not copy visible guide lines into generated sprite strips" + }, + { + "state": "running", + "path": "references/layout-guides/running.png", + "width": 1152, + "height": 208, + "frames": 6, + "cell_width": 192, + "cell_height": 208, + "safe_margin_x": 18, + "safe_margin_y": 16, + "usage": "layout guide input only; do not copy visible guide lines into generated sprite strips" + }, + { + "state": "review", + "path": "references/layout-guides/review.png", + "width": 1152, + "height": 208, + "frames": 6, + "cell_width": 192, + "cell_height": 208, + "safe_margin_x": 18, + "safe_margin_y": 16, + "usage": "layout guide input only; do not copy visible guide lines into generated sprite strips" + } + ], + "references": [], + "chroma_key": { + "hex": "#FF00FF", + "rgb": [ + 255, + 0, + 255 + ], + "name": "magenta", + "selection": "fallback" + }, + "pet_notes": "酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。", + "style_preset": "sticker", + "style_notes": "Polished Japanese anime chibi sticker sprite, clean dark outline, flat cel shading, expressive face, crisp opaque edges, no text, no shadows, no scenery.", + "style_contract": "Pet-safe sprite: compact full-body mascot, readable in a 192x208 cell, clear silhouette, simple face, stable palette/materials, and crisp edges for chroma-key extraction. Style `sticker`: Polished sticker mascot with bold clean shapes, crisp outline, flat colors, and minimal highlight detail. User style notes: Polished Japanese anime chibi sticker sprite, clean dark outline, flat cel shading, expressive face, crisp opaque edges, no text, no shadows, no scenery..", + "brand_name": "", + "brand_brief": "", + "brand_sources": [], + "pet_safe_style": "Pet-safe sprite: compact full-body mascot, readable in a 192x208 cell, clear silhouette, simple face, stable palette/materials, and crisp edges for chroma-key extraction.", + "primary_generation_skill": "$imagegen" +} diff --git a/.codex-pet-runs/iroha-doctoral/prompts/base-pet.md b/.codex-pet-runs/iroha-doctoral/prompts/base-pet.md new file mode 100644 index 0000000..2b45bd8 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/base-pet.md @@ -0,0 +1,7 @@ +Create one clean full-body reference sprite for Codex pet Iroha Doctoral. + +Pet identity: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. +Style: Pet-safe sprite: compact full-body mascot, readable in a 192x208 cell, clear silhouette, simple face, stable palette/materials, and crisp edges for chroma-key extraction. Style `sticker`: Polished sticker mascot with bold clean shapes, crisp outline, flat colors, and minimal highlight detail. User style notes: Polished Japanese anime chibi sticker sprite, clean dark outline, flat cel shading, expressive face, crisp opaque edges, no text, no shadows, no scenery.. + + +Place a single centered pose on a perfectly flat pure magenta #FF00FF chroma-key background. Keep the full pet visible, compact, readable at 192x208, and easy to animate. Preserve approved reference identity cues. No scenery, text, borders, checkerboard transparency, shadows, glows, detached effects, or extra props. Keep #FF00FF and close colors out of the pet, props, highlights, and effects. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/row-retries/failed.md b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/failed.md new file mode 100644 index 0000000..039d73c --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/failed.md @@ -0,0 +1,14 @@ +Create Codex pet row `failed` for `iroha-doctoral`: exactly 8 full-body frames in one horizontal strip on flat pure magenta #FF00FF. + +Use the attached canonical base for identity and the layout guide only for spacing. Same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, palette, material, proportions, markings, and props. + +Keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. + +Action: Blocked/failed loop: slumped or deflated reaction with sad or closed eyes. + +State requirements: +- Show failure through slumped pose, drooping ears/limbs, closed or sad eyes, and lower body position. +- Tears, small smoke puffs, or tiny stars are allowed only if attached to or overlapping the pet silhouette and kept inside the same frame slot. +- Do not draw red X marks, floating symbols, detached stars, separated smoke clouds, falling tear drops, dust, or other loose effects. + +One centered complete pose per invisible slot. No text, boxes, guide marks, scenery, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or #FF00FF colors in the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/row-retries/idle.md b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/idle.md new file mode 100644 index 0000000..afdfbf0 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/idle.md @@ -0,0 +1,18 @@ +Create Codex pet row `idle` for `iroha-doctoral`: exactly 6 full-body frames in one horizontal strip on flat pure magenta #FF00FF. + +Use the attached canonical base for identity and the layout guide only for spacing. Same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, palette, material, proportions, markings, and props. + +Keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. + +Action: Calm low-distraction resting loop: subtle breathing, tiny blink, slight head/body bob, and only quiet persona-preserving motion. + +State requirements: +- CRITICAL: idle is the low-distraction baseline state and the first frame is also used as the reduced-motion static pet. +- Use only subtle idle motion: gentle breathing, a tiny blink, a slight head or body bob, a very small material sway, or another quiet motion that fits the pet persona. +- Keep the pet essentially in the same pose, facing direction, silhouette, markings, palette, and prop state across all 6 frames. +- Idle variation must stay calm but still read as animation; do not repeat effectively identical copies across the loop. +- Do not show waving, walking, running, jumping, talking, working, reviewing, emotional reactions, large gestures, item interactions, or new props. +- Feet, base, body, or object anchor should remain planted or nearly planted. +- The first and last frames should be very close visually so the loop feels calm and does not pop. + +One centered complete pose per invisible slot. No text, boxes, guide marks, scenery, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or #FF00FF colors in the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/row-retries/jumping.md b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/jumping.md new file mode 100644 index 0000000..56ea33e --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/jumping.md @@ -0,0 +1,14 @@ +Create Codex pet row `jumping` for `iroha-doctoral`: exactly 5 full-body frames in one horizontal strip on flat pure magenta #FF00FF. + +Use the attached canonical base for identity and the layout guide only for spacing. Same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, palette, material, proportions, markings, and props. + +Keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. + +Action: Hover jump loop: anticipation, lift, airborne peak, descent, and settle through body height. + +State requirements: +- Show the jump through pose and vertical body position only: anticipation, lift, airborne peak, descent, settle. +- Do not draw ground shadows, contact shadows, drop shadows, oval shadows, landing marks, dust, smears, bounce pads, or motion marks under the pet. +- Keep the background outside the pet perfectly flat chroma key with no darker key-colored patches. + +One centered complete pose per invisible slot. No text, boxes, guide marks, scenery, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or #FF00FF colors in the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/row-retries/review.md b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/review.md new file mode 100644 index 0000000..d79197a --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/review.md @@ -0,0 +1,13 @@ +Create Codex pet row `review` for `iroha-doctoral`: exactly 6 full-body frames in one horizontal strip on flat pure magenta #FF00FF. + +Use the attached canonical base for identity and the layout guide only for spacing. Same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, palette, material, proportions, markings, and props. + +Keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. + +Action: Ready-review loop: focused inspection of completed output with lean, blink, narrowed eyes, head tilt, or paw pose. + +State requirements: +- Show review through lean, blink, narrowed eyes, head tilt, or paw/hand position. +- Do not add magnifying glasses, papers, code, UI, punctuation, symbols, or other new props unless they already exist in the base pet identity. + +One centered complete pose per invisible slot. No text, boxes, guide marks, scenery, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or #FF00FF colors in the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/row-retries/running-left.md b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/running-left.md new file mode 100644 index 0000000..3c5acd1 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/running-left.md @@ -0,0 +1,15 @@ +Create Codex pet row `running-left` for `iroha-doctoral`: exactly 8 full-body frames in one horizontal strip on flat pure magenta #FF00FF. + +Use the attached canonical base for identity and the layout guide only for spacing. Same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, palette, material, proportions, markings, and props. + +Keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. + +Action: Dragging-left loop: show directional movement to the left through body and limb poses only. + +State requirements: +- Show directional drag movement to the left through body, limb, and prop movement only. +- The row must unmistakably face and travel left. +- The movement cadence must alternate visibly across the 8 frames instead of repeating one nearly static stride. +- Do not draw speed lines, dust clouds, floor shadows, motion trails, or detached motion effects. + +One centered complete pose per invisible slot. No text, boxes, guide marks, scenery, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or #FF00FF colors in the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/row-retries/running-right.md b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/running-right.md new file mode 100644 index 0000000..2fdcde3 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/running-right.md @@ -0,0 +1,15 @@ +Create Codex pet row `running-right` for `iroha-doctoral`: exactly 8 full-body frames in one horizontal strip on flat pure magenta #FF00FF. + +Use the attached canonical base for identity and the layout guide only for spacing. Same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, palette, material, proportions, markings, and props. + +Keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. + +Action: Dragging-right loop: show directional movement to the right through body and limb poses only. + +State requirements: +- Show directional drag movement to the right through body, limb, and prop movement only. +- The row must unmistakably face and travel right. +- The movement cadence must alternate visibly across the 8 frames instead of repeating one nearly static stride. +- Do not draw speed lines, dust clouds, floor shadows, motion trails, or detached motion effects. + +One centered complete pose per invisible slot. No text, boxes, guide marks, scenery, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or #FF00FF colors in the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/row-retries/running.md b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/running.md new file mode 100644 index 0000000..837d9b8 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/running.md @@ -0,0 +1,13 @@ +Create Codex pet row `running` for `iroha-doctoral`: exactly 6 full-body frames in one horizontal strip on flat pure magenta #FF00FF. + +Use the attached canonical base for identity and the layout guide only for spacing. Same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, palette, material, proportions, markings, and props. + +Keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. + +Action: Working loop: focused active-task processing, thinking, typing, scanning, or effortful concentration; not literal foot-running, jogging, sprinting, treadmill motion, raised knees, long steps, pumping arms, or directional travel. + +State requirements: +- Show the pet actively working or processing, as if running a task: focused posture, busy hands or paws, purposeful bobbing, thinking motion, tool or prop motion only if already part of the pet identity, or other non-locomotion activity. +- Do not show literal foot-running, jogging, sprinting, treadmill motion, raised knees, long steps, pumping arms, directional travel, speed lines, dust clouds, floor shadows, motion trails, or detached motion effects. + +One centered complete pose per invisible slot. No text, boxes, guide marks, scenery, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or #FF00FF colors in the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/row-retries/waiting.md b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/waiting.md new file mode 100644 index 0000000..a4a7f75 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/waiting.md @@ -0,0 +1,13 @@ +Create Codex pet row `waiting` for `iroha-doctoral`: exactly 6 full-body frames in one horizontal strip on flat pure magenta #FF00FF. + +Use the attached canonical base for identity and the layout guide only for spacing. Same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, palette, material, proportions, markings, and props. + +Keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. + +Action: Needs-input loop: expectant asking pose for approval, help, or user input. + +State requirements: +- Show that Codex needs approval, help, or user input through an expectant asking pose. +- Keep the motion patient and readable, without turning it into ordinary idle or review. + +One centered complete pose per invisible slot. No text, boxes, guide marks, scenery, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or #FF00FF colors in the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/row-retries/waving.md b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/waving.md new file mode 100644 index 0000000..ef212a4 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/row-retries/waving.md @@ -0,0 +1,13 @@ +Create Codex pet row `waving` for `iroha-doctoral`: exactly 4 full-body frames in one horizontal strip on flat pure magenta #FF00FF. + +Use the attached canonical base for identity and the layout guide only for spacing. Same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, palette, material, proportions, markings, and props. + +Keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. + +Action: Greeting loop: paw or limb down, raised, tilted, and returning in a friendly attention gesture. + +State requirements: +- Show the greeting through paw, hand, wing, or limb pose only. +- Do not draw wave marks, motion arcs, lines, sparkles, symbols, or floating effects around the gesture. + +One centered complete pose per invisible slot. No text, boxes, guide marks, scenery, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or #FF00FF colors in the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/rows/failed.md b/.codex-pet-runs/iroha-doctoral/prompts/rows/failed.md new file mode 100644 index 0000000..f07cb14 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/rows/failed.md @@ -0,0 +1,18 @@ +Create one horizontal animation strip for Codex pet `iroha-doctoral`, state `failed`. + +Use the attached canonical base for identity. Use the attached layout guide only for slot count, spacing, centering, and padding; do not draw the guide. + +Output exactly 8 full-body frames in one left-to-right row on flat pure magenta #FF00FF. Treat the row as 8 invisible equal-width slots: one centered complete pose per slot, evenly spaced, with no overlap, clipping, empty slots, labels, or borders. + +Identity: same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, proportions, markings, palette, material, style, and props. +Style: Pet-safe sprite: compact full-body mascot, readable in a 192x208 cell, clear silhouette, simple face, stable palette/materials, and crisp edges for chroma-key extraction. Style `sticker`: Polished sticker mascot with bold clean shapes, crisp outline, flat colors, and minimal highlight detail. User style notes: Polished Japanese anime chibi sticker sprite, clean dark outline, flat cel shading, expressive face, crisp opaque edges, no text, no shadows, no scenery.. +Animation continuity: keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. Move the pose within the slot instead of redrawing the pet larger or smaller frame to frame. + +State action: Blocked/failed loop: slumped or deflated reaction with sad or closed eyes. + +State requirements: +- Show failure through slumped pose, drooping ears/limbs, closed or sad eyes, and lower body position. +- Tears, small smoke puffs, or tiny stars are allowed only if attached to or overlapping the pet silhouette and kept inside the same frame slot. +- Do not draw red X marks, floating symbols, detached stars, separated smoke clouds, falling tear drops, dust, or other loose effects. + +Clean extraction: crisp opaque edges, safe padding, no scenery, text, guide marks, checkerboard, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or chroma-key colors inside the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/rows/idle.md b/.codex-pet-runs/iroha-doctoral/prompts/rows/idle.md new file mode 100644 index 0000000..d91049f --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/rows/idle.md @@ -0,0 +1,22 @@ +Create one horizontal animation strip for Codex pet `iroha-doctoral`, state `idle`. + +Use the attached canonical base for identity. Use the attached layout guide only for slot count, spacing, centering, and padding; do not draw the guide. + +Output exactly 6 full-body frames in one left-to-right row on flat pure magenta #FF00FF. Treat the row as 6 invisible equal-width slots: one centered complete pose per slot, evenly spaced, with no overlap, clipping, empty slots, labels, or borders. + +Identity: same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, proportions, markings, palette, material, style, and props. +Style: Pet-safe sprite: compact full-body mascot, readable in a 192x208 cell, clear silhouette, simple face, stable palette/materials, and crisp edges for chroma-key extraction. Style `sticker`: Polished sticker mascot with bold clean shapes, crisp outline, flat colors, and minimal highlight detail. User style notes: Polished Japanese anime chibi sticker sprite, clean dark outline, flat cel shading, expressive face, crisp opaque edges, no text, no shadows, no scenery.. +Animation continuity: keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. Move the pose within the slot instead of redrawing the pet larger or smaller frame to frame. + +State action: Calm low-distraction resting loop: subtle breathing, tiny blink, slight head/body bob, and only quiet persona-preserving motion. + +State requirements: +- CRITICAL: idle is the low-distraction baseline state and the first frame is also used as the reduced-motion static pet. +- Use only subtle idle motion: gentle breathing, a tiny blink, a slight head or body bob, a very small material sway, or another quiet motion that fits the pet persona. +- Keep the pet essentially in the same pose, facing direction, silhouette, markings, palette, and prop state across all 6 frames. +- Idle variation must stay calm but still read as animation; do not repeat effectively identical copies across the loop. +- Do not show waving, walking, running, jumping, talking, working, reviewing, emotional reactions, large gestures, item interactions, or new props. +- Feet, base, body, or object anchor should remain planted or nearly planted. +- The first and last frames should be very close visually so the loop feels calm and does not pop. + +Clean extraction: crisp opaque edges, safe padding, no scenery, text, guide marks, checkerboard, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or chroma-key colors inside the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/rows/jumping.md b/.codex-pet-runs/iroha-doctoral/prompts/rows/jumping.md new file mode 100644 index 0000000..9243a43 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/rows/jumping.md @@ -0,0 +1,18 @@ +Create one horizontal animation strip for Codex pet `iroha-doctoral`, state `jumping`. + +Use the attached canonical base for identity. Use the attached layout guide only for slot count, spacing, centering, and padding; do not draw the guide. + +Output exactly 5 full-body frames in one left-to-right row on flat pure magenta #FF00FF. Treat the row as 5 invisible equal-width slots: one centered complete pose per slot, evenly spaced, with no overlap, clipping, empty slots, labels, or borders. + +Identity: same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, proportions, markings, palette, material, style, and props. +Style: Pet-safe sprite: compact full-body mascot, readable in a 192x208 cell, clear silhouette, simple face, stable palette/materials, and crisp edges for chroma-key extraction. Style `sticker`: Polished sticker mascot with bold clean shapes, crisp outline, flat colors, and minimal highlight detail. User style notes: Polished Japanese anime chibi sticker sprite, clean dark outline, flat cel shading, expressive face, crisp opaque edges, no text, no shadows, no scenery.. +Animation continuity: keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. Move the pose within the slot instead of redrawing the pet larger or smaller frame to frame. + +State action: Hover jump loop: anticipation, lift, airborne peak, descent, and settle through body height. + +State requirements: +- Show the jump through pose and vertical body position only: anticipation, lift, airborne peak, descent, settle. +- Do not draw ground shadows, contact shadows, drop shadows, oval shadows, landing marks, dust, smears, bounce pads, or motion marks under the pet. +- Keep the background outside the pet perfectly flat chroma key with no darker key-colored patches. + +Clean extraction: crisp opaque edges, safe padding, no scenery, text, guide marks, checkerboard, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or chroma-key colors inside the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/rows/review.md b/.codex-pet-runs/iroha-doctoral/prompts/rows/review.md new file mode 100644 index 0000000..f4c84c4 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/rows/review.md @@ -0,0 +1,17 @@ +Create one horizontal animation strip for Codex pet `iroha-doctoral`, state `review`. + +Use the attached canonical base for identity. Use the attached layout guide only for slot count, spacing, centering, and padding; do not draw the guide. + +Output exactly 6 full-body frames in one left-to-right row on flat pure magenta #FF00FF. Treat the row as 6 invisible equal-width slots: one centered complete pose per slot, evenly spaced, with no overlap, clipping, empty slots, labels, or borders. + +Identity: same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, proportions, markings, palette, material, style, and props. +Style: Pet-safe sprite: compact full-body mascot, readable in a 192x208 cell, clear silhouette, simple face, stable palette/materials, and crisp edges for chroma-key extraction. Style `sticker`: Polished sticker mascot with bold clean shapes, crisp outline, flat colors, and minimal highlight detail. User style notes: Polished Japanese anime chibi sticker sprite, clean dark outline, flat cel shading, expressive face, crisp opaque edges, no text, no shadows, no scenery.. +Animation continuity: keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. Move the pose within the slot instead of redrawing the pet larger or smaller frame to frame. + +State action: Ready-review loop: focused inspection of completed output with lean, blink, narrowed eyes, head tilt, or paw pose. + +State requirements: +- Show review through lean, blink, narrowed eyes, head tilt, or paw/hand position. +- Do not add magnifying glasses, papers, code, UI, punctuation, symbols, or other new props unless they already exist in the base pet identity. + +Clean extraction: crisp opaque edges, safe padding, no scenery, text, guide marks, checkerboard, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or chroma-key colors inside the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/rows/running-left.md b/.codex-pet-runs/iroha-doctoral/prompts/rows/running-left.md new file mode 100644 index 0000000..7137418 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/rows/running-left.md @@ -0,0 +1,19 @@ +Create one horizontal animation strip for Codex pet `iroha-doctoral`, state `running-left`. + +Use the attached canonical base for identity. Use the attached layout guide only for slot count, spacing, centering, and padding; do not draw the guide. + +Output exactly 8 full-body frames in one left-to-right row on flat pure magenta #FF00FF. Treat the row as 8 invisible equal-width slots: one centered complete pose per slot, evenly spaced, with no overlap, clipping, empty slots, labels, or borders. + +Identity: same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, proportions, markings, palette, material, style, and props. +Style: Pet-safe sprite: compact full-body mascot, readable in a 192x208 cell, clear silhouette, simple face, stable palette/materials, and crisp edges for chroma-key extraction. Style `sticker`: Polished sticker mascot with bold clean shapes, crisp outline, flat colors, and minimal highlight detail. User style notes: Polished Japanese anime chibi sticker sprite, clean dark outline, flat cel shading, expressive face, crisp opaque edges, no text, no shadows, no scenery.. +Animation continuity: keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. Move the pose within the slot instead of redrawing the pet larger or smaller frame to frame. + +State action: Dragging-left loop: show directional movement to the left through body and limb poses only. + +State requirements: +- Show directional drag movement to the left through body, limb, and prop movement only. +- The row must unmistakably face and travel left. +- The movement cadence must alternate visibly across the 8 frames instead of repeating one nearly static stride. +- Do not draw speed lines, dust clouds, floor shadows, motion trails, or detached motion effects. + +Clean extraction: crisp opaque edges, safe padding, no scenery, text, guide marks, checkerboard, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or chroma-key colors inside the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/rows/running-right.md b/.codex-pet-runs/iroha-doctoral/prompts/rows/running-right.md new file mode 100644 index 0000000..78c781f --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/rows/running-right.md @@ -0,0 +1,19 @@ +Create one horizontal animation strip for Codex pet `iroha-doctoral`, state `running-right`. + +Use the attached canonical base for identity. Use the attached layout guide only for slot count, spacing, centering, and padding; do not draw the guide. + +Output exactly 8 full-body frames in one left-to-right row on flat pure magenta #FF00FF. Treat the row as 8 invisible equal-width slots: one centered complete pose per slot, evenly spaced, with no overlap, clipping, empty slots, labels, or borders. + +Identity: same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, proportions, markings, palette, material, style, and props. +Style: Pet-safe sprite: compact full-body mascot, readable in a 192x208 cell, clear silhouette, simple face, stable palette/materials, and crisp edges for chroma-key extraction. Style `sticker`: Polished sticker mascot with bold clean shapes, crisp outline, flat colors, and minimal highlight detail. User style notes: Polished Japanese anime chibi sticker sprite, clean dark outline, flat cel shading, expressive face, crisp opaque edges, no text, no shadows, no scenery.. +Animation continuity: keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. Move the pose within the slot instead of redrawing the pet larger or smaller frame to frame. + +State action: Dragging-right loop: show directional movement to the right through body and limb poses only. + +State requirements: +- Show directional drag movement to the right through body, limb, and prop movement only. +- The row must unmistakably face and travel right. +- The movement cadence must alternate visibly across the 8 frames instead of repeating one nearly static stride. +- Do not draw speed lines, dust clouds, floor shadows, motion trails, or detached motion effects. + +Clean extraction: crisp opaque edges, safe padding, no scenery, text, guide marks, checkerboard, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or chroma-key colors inside the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/rows/running.md b/.codex-pet-runs/iroha-doctoral/prompts/rows/running.md new file mode 100644 index 0000000..bf46aac --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/rows/running.md @@ -0,0 +1,17 @@ +Create one horizontal animation strip for Codex pet `iroha-doctoral`, state `running`. + +Use the attached canonical base for identity. Use the attached layout guide only for slot count, spacing, centering, and padding; do not draw the guide. + +Output exactly 6 full-body frames in one left-to-right row on flat pure magenta #FF00FF. Treat the row as 6 invisible equal-width slots: one centered complete pose per slot, evenly spaced, with no overlap, clipping, empty slots, labels, or borders. + +Identity: same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, proportions, markings, palette, material, style, and props. +Style: Pet-safe sprite: compact full-body mascot, readable in a 192x208 cell, clear silhouette, simple face, stable palette/materials, and crisp edges for chroma-key extraction. Style `sticker`: Polished sticker mascot with bold clean shapes, crisp outline, flat colors, and minimal highlight detail. User style notes: Polished Japanese anime chibi sticker sprite, clean dark outline, flat cel shading, expressive face, crisp opaque edges, no text, no shadows, no scenery.. +Animation continuity: keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. Move the pose within the slot instead of redrawing the pet larger or smaller frame to frame. + +State action: Working loop: focused active-task processing, thinking, typing, scanning, or effortful concentration; not literal foot-running, jogging, sprinting, treadmill motion, raised knees, long steps, pumping arms, or directional travel. + +State requirements: +- Show the pet actively working or processing, as if running a task: focused posture, busy hands or paws, purposeful bobbing, thinking motion, tool or prop motion only if already part of the pet identity, or other non-locomotion activity. +- Do not show literal foot-running, jogging, sprinting, treadmill motion, raised knees, long steps, pumping arms, directional travel, speed lines, dust clouds, floor shadows, motion trails, or detached motion effects. + +Clean extraction: crisp opaque edges, safe padding, no scenery, text, guide marks, checkerboard, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or chroma-key colors inside the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/rows/waiting.md b/.codex-pet-runs/iroha-doctoral/prompts/rows/waiting.md new file mode 100644 index 0000000..fc6b4f7 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/rows/waiting.md @@ -0,0 +1,17 @@ +Create one horizontal animation strip for Codex pet `iroha-doctoral`, state `waiting`. + +Use the attached canonical base for identity. Use the attached layout guide only for slot count, spacing, centering, and padding; do not draw the guide. + +Output exactly 6 full-body frames in one left-to-right row on flat pure magenta #FF00FF. Treat the row as 6 invisible equal-width slots: one centered complete pose per slot, evenly spaced, with no overlap, clipping, empty slots, labels, or borders. + +Identity: same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, proportions, markings, palette, material, style, and props. +Style: Pet-safe sprite: compact full-body mascot, readable in a 192x208 cell, clear silhouette, simple face, stable palette/materials, and crisp edges for chroma-key extraction. Style `sticker`: Polished sticker mascot with bold clean shapes, crisp outline, flat colors, and minimal highlight detail. User style notes: Polished Japanese anime chibi sticker sprite, clean dark outline, flat cel shading, expressive face, crisp opaque edges, no text, no shadows, no scenery.. +Animation continuity: keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. Move the pose within the slot instead of redrawing the pet larger or smaller frame to frame. + +State action: Needs-input loop: expectant asking pose for approval, help, or user input. + +State requirements: +- Show that Codex needs approval, help, or user input through an expectant asking pose. +- Keep the motion patient and readable, without turning it into ordinary idle or review. + +Clean extraction: crisp opaque edges, safe padding, no scenery, text, guide marks, checkerboard, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or chroma-key colors inside the pet. diff --git a/.codex-pet-runs/iroha-doctoral/prompts/rows/waving.md b/.codex-pet-runs/iroha-doctoral/prompts/rows/waving.md new file mode 100644 index 0000000..34aee7c --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/prompts/rows/waving.md @@ -0,0 +1,17 @@ +Create one horizontal animation strip for Codex pet `iroha-doctoral`, state `waving`. + +Use the attached canonical base for identity. Use the attached layout guide only for slot count, spacing, centering, and padding; do not draw the guide. + +Output exactly 4 full-body frames in one left-to-right row on flat pure magenta #FF00FF. Treat the row as 4 invisible equal-width slots: one centered complete pose per slot, evenly spaced, with no overlap, clipping, empty slots, labels, or borders. + +Identity: same pet in every frame: 酒寄彩叶博士研究生形象,严格依据用户参考设计:短圆润深紫色波波头,鲜明青绿色内层挑染,蓝灰色眼睛,温柔聪慧神情,纤细青年女性比例;白色科研大褂覆盖水手领浅色上衣与蓝绿色半裙,白袜黑色乐福鞋,米色斜挎包与粉色挂件。所有动画状态保持发型、配色、白大褂、包和挂件完全一致。. Preserve silhouette, face, proportions, markings, palette, material, style, and props. +Style: Pet-safe sprite: compact full-body mascot, readable in a 192x208 cell, clear silhouette, simple face, stable palette/materials, and crisp edges for chroma-key extraction. Style `sticker`: Polished sticker mascot with bold clean shapes, crisp outline, flat colors, and minimal highlight detail. User style notes: Polished Japanese anime chibi sticker sprite, clean dark outline, flat cel shading, expressive face, crisp opaque edges, no text, no shadows, no scenery.. +Animation continuity: keep apparent pet scale and baseline stable within the row unless the state itself intentionally changes vertical position, such as `jumping`. Move the pose within the slot instead of redrawing the pet larger or smaller frame to frame. + +State action: Greeting loop: paw or limb down, raised, tilted, and returning in a friendly attention gesture. + +State requirements: +- Show the greeting through paw, hand, wing, or limb pose only. +- Do not draw wave marks, motion arcs, lines, sparkles, symbols, or floating effects around the gesture. + +Clean extraction: crisp opaque edges, safe padding, no scenery, text, guide marks, checkerboard, shadows, glows, motion blur, speed lines, dust, detached effects, stray pixels, or chroma-key colors inside the pet. diff --git a/.codex-pet-runs/iroha-doctoral/qa/contact-sheet.png b/.codex-pet-runs/iroha-doctoral/qa/contact-sheet.png new file mode 100644 index 0000000..1c899cf Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/qa/contact-sheet.png differ diff --git a/.codex-pet-runs/iroha-doctoral/qa/previews/failed.gif b/.codex-pet-runs/iroha-doctoral/qa/previews/failed.gif new file mode 100644 index 0000000..8fb124d Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/qa/previews/failed.gif differ diff --git a/.codex-pet-runs/iroha-doctoral/qa/previews/idle.gif b/.codex-pet-runs/iroha-doctoral/qa/previews/idle.gif new file mode 100644 index 0000000..2bb0ab7 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/qa/previews/idle.gif differ diff --git a/.codex-pet-runs/iroha-doctoral/qa/previews/jumping.gif b/.codex-pet-runs/iroha-doctoral/qa/previews/jumping.gif new file mode 100644 index 0000000..79c93d4 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/qa/previews/jumping.gif differ diff --git a/.codex-pet-runs/iroha-doctoral/qa/previews/review.gif b/.codex-pet-runs/iroha-doctoral/qa/previews/review.gif new file mode 100644 index 0000000..1dcd41a Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/qa/previews/review.gif differ diff --git a/.codex-pet-runs/iroha-doctoral/qa/previews/running-left.gif b/.codex-pet-runs/iroha-doctoral/qa/previews/running-left.gif new file mode 100644 index 0000000..cbe16d5 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/qa/previews/running-left.gif differ diff --git a/.codex-pet-runs/iroha-doctoral/qa/previews/running-right.gif b/.codex-pet-runs/iroha-doctoral/qa/previews/running-right.gif new file mode 100644 index 0000000..31a8d51 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/qa/previews/running-right.gif differ diff --git a/.codex-pet-runs/iroha-doctoral/qa/previews/running.gif b/.codex-pet-runs/iroha-doctoral/qa/previews/running.gif new file mode 100644 index 0000000..f0ba960 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/qa/previews/running.gif differ diff --git a/.codex-pet-runs/iroha-doctoral/qa/previews/waiting.gif b/.codex-pet-runs/iroha-doctoral/qa/previews/waiting.gif new file mode 100644 index 0000000..05ed2fd Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/qa/previews/waiting.gif differ diff --git a/.codex-pet-runs/iroha-doctoral/qa/previews/waving.gif b/.codex-pet-runs/iroha-doctoral/qa/previews/waving.gif new file mode 100644 index 0000000..364f126 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/qa/previews/waving.gif differ diff --git a/.codex-pet-runs/iroha-doctoral/qa/review.json b/.codex-pet-runs/iroha-doctoral/qa/review.json new file mode 100644 index 0000000..4d3f4ea --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/qa/review.json @@ -0,0 +1,990 @@ +{ + "ok": true, + "frames_root": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames", + "errors": [], + "warnings": [ + "idle used extraction method stable-slots; confirm motion playback remains stable and unclipped", + "running-right used extraction method stable-slots; confirm motion playback remains stable and unclipped", + "running-left used extraction method stable-slots; confirm motion playback remains stable and unclipped", + "waving used extraction method stable-slots; confirm motion playback remains stable and unclipped", + "jumping used extraction method stable-slots; confirm motion playback remains stable and unclipped", + "failed used extraction method stable-slots; confirm motion playback remains stable and unclipped", + "waiting used extraction method stable-slots; confirm motion playback remains stable and unclipped", + "running used extraction method stable-slots; confirm motion playback remains stable and unclipped", + "review used extraction method stable-slots; confirm motion playback remains stable and unclipped" + ], + "rows": [ + { + "state": "idle", + "expected_frames": 6, + "actual_frames": 6, + "extraction_method": "stable-slots", + "ok": true, + "errors": [], + "warnings": [ + "idle used extraction method stable-slots; confirm motion playback remains stable and unclipped" + ], + "frames": [ + { + "index": 0, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/idle/00.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8249, + "bbox": [ + 60, + 5, + 132, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 1 + }, + { + "index": 1, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/idle/01.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8171, + "bbox": [ + 60, + 5, + 132, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 2 + }, + { + "index": 2, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/idle/02.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8128, + "bbox": [ + 60, + 5, + 132, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 3 + }, + { + "index": 3, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/idle/03.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8087, + "bbox": [ + 60, + 5, + 132, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 3 + }, + { + "index": 4, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/idle/04.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8106, + "bbox": [ + 60, + 5, + 132, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 3 + }, + { + "index": 5, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/idle/05.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8150, + "bbox": [ + 60, + 5, + 132, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 4 + } + ] + }, + { + "state": "running-right", + "expected_frames": 8, + "actual_frames": 8, + "extraction_method": "stable-slots", + "ok": true, + "errors": [], + "warnings": [ + "running-right used extraction method stable-slots; confirm motion playback remains stable and unclipped" + ], + "frames": [ + { + "index": 0, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/00.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8358, + "bbox": [ + 46, + 6, + 145, + 201 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 30 + }, + { + "index": 1, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/01.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9717, + "bbox": [ + 46, + 6, + 145, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 26 + }, + { + "index": 2, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/02.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9064, + "bbox": [ + 46, + 5, + 145, + 202 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 18 + }, + { + "index": 3, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/03.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9441, + "bbox": [ + 46, + 5, + 145, + 202 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 25 + }, + { + "index": 4, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/04.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8958, + "bbox": [ + 46, + 5, + 145, + 202 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 19 + }, + { + "index": 5, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/05.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9095, + "bbox": [ + 46, + 6, + 145, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 28 + }, + { + "index": 6, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/06.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9104, + "bbox": [ + 46, + 6, + 145, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 24 + }, + { + "index": 7, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-right/07.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9062, + "bbox": [ + 46, + 7, + 145, + 202 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 18 + } + ] + }, + { + "state": "running-left", + "expected_frames": 8, + "actual_frames": 8, + "extraction_method": "stable-slots", + "ok": true, + "errors": [], + "warnings": [ + "running-left used extraction method stable-slots; confirm motion playback remains stable and unclipped" + ], + "frames": [ + { + "index": 0, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/00.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8358, + "bbox": [ + 47, + 6, + 146, + 201 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 30 + }, + { + "index": 1, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/01.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9717, + "bbox": [ + 47, + 6, + 146, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 26 + }, + { + "index": 2, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/02.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9064, + "bbox": [ + 47, + 5, + 146, + 202 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 18 + }, + { + "index": 3, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/03.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9441, + "bbox": [ + 47, + 5, + 146, + 202 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 26 + }, + { + "index": 4, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/04.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8958, + "bbox": [ + 47, + 5, + 146, + 202 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 19 + }, + { + "index": 5, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/05.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9095, + "bbox": [ + 47, + 6, + 146, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 28 + }, + { + "index": 6, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/06.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9104, + "bbox": [ + 47, + 6, + 146, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 26 + }, + { + "index": 7, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running-left/07.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9062, + "bbox": [ + 47, + 7, + 146, + 202 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 19 + } + ] + }, + { + "state": "waving", + "expected_frames": 4, + "actual_frames": 4, + "extraction_method": "stable-slots", + "ok": true, + "errors": [], + "warnings": [ + "waving used extraction method stable-slots; confirm motion playback remains stable and unclipped" + ], + "frames": [ + { + "index": 0, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waving/00.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8163, + "bbox": [ + 59, + 5, + 133, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 9 + }, + { + "index": 1, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waving/01.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8308, + "bbox": [ + 59, + 5, + 133, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 3 + }, + { + "index": 2, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waving/02.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8378, + "bbox": [ + 59, + 5, + 133, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 9 + }, + { + "index": 3, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waving/03.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8331, + "bbox": [ + 59, + 5, + 133, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 4 + } + ] + }, + { + "state": "jumping", + "expected_frames": 5, + "actual_frames": 5, + "extraction_method": "stable-slots", + "ok": true, + "errors": [], + "warnings": [ + "jumping used extraction method stable-slots; confirm motion playback remains stable and unclipped" + ], + "frames": [ + { + "index": 0, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/jumping/00.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 6137, + "bbox": [ + 57, + 61, + 135, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 10 + }, + { + "index": 1, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/jumping/01.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 6269, + "bbox": [ + 54, + 18, + 138, + 169 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 8 + }, + { + "index": 2, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/jumping/02.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 6220, + "bbox": [ + 53, + 5, + 139, + 148 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 5 + }, + { + "index": 3, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/jumping/03.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 6811, + "bbox": [ + 54, + 21, + 138, + 186 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 3 + }, + { + "index": 4, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/jumping/04.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 6479, + "bbox": [ + 57, + 56, + 135, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 7 + } + ] + }, + { + "state": "failed", + "expected_frames": 8, + "actual_frames": 8, + "extraction_method": "stable-slots", + "ok": true, + "errors": [], + "warnings": [ + "failed used extraction method stable-slots; confirm motion playback remains stable and unclipped" + ], + "frames": [ + { + "index": 0, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/00.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9070, + "bbox": [ + 55, + 5, + 137, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 19 + }, + { + "index": 1, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/01.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 9543, + "bbox": [ + 53, + 5, + 139, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 12 + }, + { + "index": 2, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/02.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8690, + "bbox": [ + 57, + 5, + 135, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 18 + }, + { + "index": 3, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/03.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8670, + "bbox": [ + 55, + 11, + 137, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 17 + }, + { + "index": 4, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/04.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8779, + "bbox": [ + 56, + 11, + 136, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 15 + }, + { + "index": 5, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/05.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8062, + "bbox": [ + 57, + 16, + 134, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 10 + }, + { + "index": 6, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/06.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 7306, + "bbox": [ + 56, + 36, + 136, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 6 + }, + { + "index": 7, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/failed/07.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8332, + "bbox": [ + 58, + 12, + 134, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 10 + } + ] + }, + { + "state": "waiting", + "expected_frames": 6, + "actual_frames": 6, + "extraction_method": "stable-slots", + "ok": true, + "errors": [], + "warnings": [ + "waiting used extraction method stable-slots; confirm motion playback remains stable and unclipped" + ], + "frames": [ + { + "index": 0, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waiting/00.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8613, + "bbox": [ + 51, + 5, + 140, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 11 + }, + { + "index": 1, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waiting/01.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8283, + "bbox": [ + 59, + 5, + 133, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 9 + }, + { + "index": 2, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waiting/02.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8188, + "bbox": [ + 57, + 9, + 134, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 14 + }, + { + "index": 3, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waiting/03.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8343, + "bbox": [ + 54, + 6, + 138, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 17 + }, + { + "index": 4, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waiting/04.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8276, + "bbox": [ + 55, + 7, + 137, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 11 + }, + { + "index": 5, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/waiting/05.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8384, + "bbox": [ + 57, + 8, + 135, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 10 + } + ] + }, + { + "state": "running", + "expected_frames": 6, + "actual_frames": 6, + "extraction_method": "stable-slots", + "ok": true, + "errors": [], + "warnings": [ + "running used extraction method stable-slots; confirm motion playback remains stable and unclipped" + ], + "frames": [ + { + "index": 0, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running/00.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8356, + "bbox": [ + 58, + 5, + 133, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 6 + }, + { + "index": 1, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running/01.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8488, + "bbox": [ + 58, + 5, + 133, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 49 + }, + { + "index": 2, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running/02.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 7784, + "bbox": [ + 62, + 5, + 129, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 6 + }, + { + "index": 3, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running/03.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 7406, + "bbox": [ + 62, + 7, + 129, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 5 + }, + { + "index": 4, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running/04.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8417, + "bbox": [ + 58, + 5, + 133, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 10 + }, + { + "index": 5, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/running/05.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8019, + "bbox": [ + 58, + 5, + 133, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 5 + } + ] + }, + { + "state": "review", + "expected_frames": 6, + "actual_frames": 6, + "extraction_method": "stable-slots", + "ok": true, + "errors": [], + "warnings": [ + "review used extraction method stable-slots; confirm motion playback remains stable and unclipped" + ], + "frames": [ + { + "index": 0, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/review/00.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 7925, + "bbox": [ + 60, + 5, + 132, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 4 + }, + { + "index": 1, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/review/01.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 8116, + "bbox": [ + 58, + 5, + 134, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 7 + }, + { + "index": 2, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/review/02.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 7582, + "bbox": [ + 63, + 5, + 129, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 4 + }, + { + "index": 3, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/review/03.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 7887, + "bbox": [ + 58, + 5, + 134, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 8 + }, + { + "index": 4, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/review/04.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 7999, + "bbox": [ + 58, + 5, + 134, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 5 + }, + { + "index": 5, + "file": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/frames/review/05.png", + "width": 192, + "height": 208, + "nontransparent_pixels": 7861, + "bbox": [ + 60, + 5, + 132, + 203 + ], + "edge_pixels": 0, + "chroma_adjacent_pixels": 2 + } + ] + } + ] +} diff --git a/.codex-pet-runs/iroha-doctoral/qa/run-summary.json b/.codex-pet-runs/iroha-doctoral/qa/run-summary.json new file mode 100644 index 0000000..e6fe519 --- /dev/null +++ b/.codex-pet-runs/iroha-doctoral/qa/run-summary.json @@ -0,0 +1,10 @@ +{ + "ok": true, + "run_dir": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral", + "spritesheet": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/final/spritesheet.webp", + "validation": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/final/validation.json", + "contact_sheet": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/qa/contact-sheet.png", + "review": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/qa/review.json", + "package": "/Users/akiwayne/Documents/Project2026/go-project/go-claude/.codex-pet-runs/iroha-doctoral/package/iroha-doctoral", + "install_status": "awaiting permission to copy into ~/.codex/pets/iroha-doctoral" +} diff --git a/.codex-pet-runs/iroha-doctoral/references/canonical-base.png b/.codex-pet-runs/iroha-doctoral/references/canonical-base.png new file mode 100644 index 0000000..95e6ad3 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/references/canonical-base.png differ diff --git a/.codex-pet-runs/iroha-doctoral/references/layout-guides/failed.png b/.codex-pet-runs/iroha-doctoral/references/layout-guides/failed.png new file mode 100644 index 0000000..6dfc181 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/references/layout-guides/failed.png differ diff --git a/.codex-pet-runs/iroha-doctoral/references/layout-guides/idle.png b/.codex-pet-runs/iroha-doctoral/references/layout-guides/idle.png new file mode 100644 index 0000000..c8e68ee Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/references/layout-guides/idle.png differ diff --git a/.codex-pet-runs/iroha-doctoral/references/layout-guides/jumping.png b/.codex-pet-runs/iroha-doctoral/references/layout-guides/jumping.png new file mode 100644 index 0000000..606d805 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/references/layout-guides/jumping.png differ diff --git a/.codex-pet-runs/iroha-doctoral/references/layout-guides/review.png b/.codex-pet-runs/iroha-doctoral/references/layout-guides/review.png new file mode 100644 index 0000000..c8e68ee Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/references/layout-guides/review.png differ diff --git a/.codex-pet-runs/iroha-doctoral/references/layout-guides/running-left.png b/.codex-pet-runs/iroha-doctoral/references/layout-guides/running-left.png new file mode 100644 index 0000000..6dfc181 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/references/layout-guides/running-left.png differ diff --git a/.codex-pet-runs/iroha-doctoral/references/layout-guides/running-right.png b/.codex-pet-runs/iroha-doctoral/references/layout-guides/running-right.png new file mode 100644 index 0000000..6dfc181 Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/references/layout-guides/running-right.png differ diff --git a/.codex-pet-runs/iroha-doctoral/references/layout-guides/running.png b/.codex-pet-runs/iroha-doctoral/references/layout-guides/running.png new file mode 100644 index 0000000..c8e68ee Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/references/layout-guides/running.png differ diff --git a/.codex-pet-runs/iroha-doctoral/references/layout-guides/waiting.png b/.codex-pet-runs/iroha-doctoral/references/layout-guides/waiting.png new file mode 100644 index 0000000..c8e68ee Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/references/layout-guides/waiting.png differ diff --git a/.codex-pet-runs/iroha-doctoral/references/layout-guides/waving.png b/.codex-pet-runs/iroha-doctoral/references/layout-guides/waving.png new file mode 100644 index 0000000..89c008d Binary files /dev/null and b/.codex-pet-runs/iroha-doctoral/references/layout-guides/waving.png differ diff --git a/.gitignore b/.gitignore index 6d14afd..877b69f 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,6 @@ scratch/ .tasks/ .runtime-tasks/ .worktrees/ +worktrees/ +*.out +coverage.* diff --git a/AGENTS.md b/AGENTS.md index 7ddd4d7..f99ff67 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,62 +1,64 @@ - + -# iroha-code +# iroha (go-claude) ## Purpose -An interactive AI Agent CLI built in Go, powered by 7 LLM providers (GLM, OpenAI, Claude, DeepSeek, Kimi, SiliconFlow, Gemini) with a Bubble Tea TUI, human-in-the-loop tool-use permissions, hook system, cross-session memory, task DAG planning, team coordination, MCP plugin routing, and autonomous execution. Designed as a Claude Code-inspired agent for the terminal. +An interactive, terminal-native AI coding agent CLI (binary: `iroha`). Bridges Google Genkit / Google ADK for multi-provider LLM orchestration (Gemini, Claude, OpenAI, DeepSeek, GLM) with Charm's Bubble Tea TUI framework for the user interface. Features human-in-the-loop permission approvals, hook pipelines, cross-session persistent memory, structured task tracking, team coordination, MCP plugin routing, and autonomous execution modes. Designed as a Claude Code-inspired agent for the terminal. ## Key Files | File | Description | |------|-------------| -| `go.mod` | Go module definition (go 1.26.1, Charm stack, Google ADK/GenAI, Firebase Genkit) | -| `go.sum` | Dependency checksums | -| `.gitignore` | Excludes binary (`/iroha`), `.omc/`, `.iroha/`, `scratch/` | -| `system_prompt.md` | Default system prompt template for the agent | +| `go.mod` | Module `iroha`, Go 1.26.1, direct deps: Charm stack, Firebase Genkit, Google ADK/GenAI, UUID, yaml | +| `system_prompt.md` | Default system prompt template loaded by the agent at runtime | | `.golangci.yml` | Linter config (errcheck, govet, revive, staticcheck) | | `.goreleaser.yml` | GoReleaser build and release configuration | -| `install.sh` | Installation script | +| `install.sh` | One-line installer script (curl pipe sh) | +| `DESIGN.md` | Product design doc: brand, visual language, component inventory, interaction states | +| `README.md` | User-facing docs: features, quick start, slash commands, permission modes | +| `CONTRIBUTING.md` | Contribution guide and dev environment setup | ## Subdirectories | Directory | Purpose | |-----------|---------| -| `cmd/` | Application entry points (see `cmd/AGENTS.md`) | +| `cmd/` | CLI entry points (see `cmd/AGENTS.md`) | | `pkg/` | Core library packages (see `pkg/AGENTS.md`) | -| `docs/` | Project documentation and analysis | -| `.github/` | CI workflows, issue/PR templates | -| `scratch/` | Debug and experimental scripts | +| `docs/` | Project documentation and roadmap (see `docs/AGENTS.md`) | +| `scratch/` | Debug scripts and experimental throwaway code (gitignored) | +| `test/` | Integration test suites (see `test/AGENTS.md`) | ## For AI Agents ### Working In This Directory -- Run `go build -o iroha ./cmd/agent-cli` to compile the binary -- Run `go test ./...` to execute all tests -- The binary output is `./iroha` at repo root -- Config is stored at `~/.iroha.json` (outside repo) -- Project-local state lives in `./.iroha/` (gitignored) -- Auto-migrates from legacy `~/.go-claude.json` path +- Build: `go build -o iroha ./cmd/agent-cli` +- Test all: `go test ./...` +- Test specific packages: `go test ./pkg/tui/ ./pkg/llm/ ./pkg/agent/` +- Tidy modules: `go mod tidy` +- Binary output is `./iroha` at repo root +- User config stored at `~/.iroha.json` (outside repo) +- Project-local state in `./.iroha/` (gitignored) ### Testing Requirements -- Unit tests live alongside source files (`*_test.go`) -- Run `go test ./pkg/...` for all package tests -- Test coverage: ~25% (3,633 test lines / ~16,000 source lines) -- Key gaps: `tools.go` / `tools_*.go` have no dedicated tests +- Unit tests live alongside source (`*_test.go`) +- Key tested packages: `pkg/tui`, `pkg/llm`, `pkg/agent` +- Test gaps: `tools.go` / `tools_*.go` lack dedicated tests ### Common Patterns -- Standard Go project layout: `cmd/` for binaries, `pkg/` for libraries -- Google ADK (`google.golang.org/adk`) for agent framework -- Firebase Genkit (`github.com/firebase/genkit/go`) for Gemini/Claude SDK bridging -- Charm stack (Bubble Tea, Lipgloss, Glamour, Bubbles) for TUI -- English-language user-facing strings throughout (migrated from Chinese) +- Standard Go layout: `cmd/` for binaries, `pkg/` for libraries +- Google ADK (`google.golang.org/adk`) as agent framework +- Firebase Genkit for Gemini/Claude SDK bridging and OpenTelemetry tracing +- Charm stack (Bubble Tea, Lipgloss, Glamour) for TUI +- Three-level permission model: Default (confirm), Plan (read-only), Auto (silent) +- Hook system: `PreToolUse`, `PostToolUse`, `SessionStart` lifecycle hooks ## Dependencies ### External -- `github.com/charmbracelet/bubbletea` v1.3.10 — TUI framework - `github.com/charmbracelet/lipgloss` v1.1.1 — Terminal styling - `github.com/charmbracelet/glamour` v1.0.0 — Markdown rendering -- `github.com/charmbracelet/bubbles` v1.0.0 — TUI components -- `google.golang.org/adk` v1.2.1 — Agent development kit +- `github.com/charmbracelet/x/ansi` v0.11.6 — ANSI utilities +- `github.com/firebase/genkit/go` v1.8.0 — Firebase Genkit Go SDK (LLM orchestration) +- `github.com/google/uuid` v1.6.0 — UUID generation +- `google.golang.org/adk` v1.2.1 — Google Agent Development Kit - `google.golang.org/genai` v1.57.0 — Generative AI types -- `github.com/firebase/genkit/go` — Firebase Genkit Go SDK - - +- `golang.org/x/term` v0.43.0 — Terminal control +- `gopkg.in/yaml.v3` v3.0.1 — YAML parsing diff --git a/cmd/AGENTS.md b/cmd/AGENTS.md index 9011189..3a7f1d0 100644 --- a/cmd/AGENTS.md +++ b/cmd/AGENTS.md @@ -1,5 +1,5 @@ - + # cmd @@ -9,14 +9,14 @@ Application entry points. Each subdirectory is a standalone binary with its own ## Subdirectories | Directory | Purpose | |-----------|---------| -| `agent-cli/` | Primary CLI binary — config resolution, runner init, TUI launch (see `agent-cli/AGENTS.md`) | +| `agent-cli/` | Primary CLI binary — flag parsing, config resolution, runner init, TUI launch (see `agent-cli/AGENTS.md`) | ## For AI Agents ### Working In This Directory - Each subdirectory produces one binary via `go build -o iroha ./cmd/agent-cli` - Keep `main.go` files thin — delegate to `pkg/` packages -- The main.go in agent-cli is ~203 lines, all orchestration logic is in `pkg/` +- The main.go in agent-cli is ~214 lines, all orchestration logic is in `pkg/` ### Testing Requirements - No unit tests for entry points; tested via integration/manual testing @@ -25,6 +25,4 @@ Application entry points. Each subdirectory is a standalone binary with its own ### Common Patterns - Flag parsing for provider, model, API key, base URL, API format, session, permission mode - Config file resolution with CLI flag > env var > config file > wizard priority chain -- Auto-migration from legacy `~/.go-claude.json` to `~/.iroha.json` - - +- Teammate mode for multi-agent IPC via `--teammate` and `--socket` flags diff --git a/cmd/agent-cli/AGENTS.md b/cmd/agent-cli/AGENTS.md index b120d18..7cf0870 100644 --- a/cmd/agent-cli/AGENTS.md +++ b/cmd/agent-cli/AGENTS.md @@ -1,28 +1,26 @@ - + # agent-cli ## Purpose -Primary CLI entry point. Resolves configuration (flags > env vars > config file > wizard), initializes the agent runner with LLM adapter and 30+ tools, and launches the Bubble Tea TUI program with alt screen and mouse support. +Primary CLI entry point. Resolves configuration (flags > env vars > config file > wizard), initializes the agent runner with LLM adapter and tools, and launches the Bubble Tea TUI. Also supports teammate mode for multi-agent IPC. ## Key Files | File | Description | |------|-------------| -| `main.go` | Binary entry point — flag parsing, config resolution, runner init, TUI launch (~203 lines) | +| `main.go` | Binary entry point — flag parsing, config resolution, session management, runner init, TUI launch (~214 lines) | ## For AI Agents ### Working In This Directory - This is the only file that ties all `pkg/` packages together - Config priority: CLI flags > environment variables > `~/.iroha.json` > interactive wizard > provider defaults -- Supported env vars: `ZHIPU_API_KEY`, `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GEMINI_API_KEY`, `DEEPSEEK_API_KEY`, `MOONSHOT_API_KEY`, `SILICONFLOW_API_KEY` -- Key flags: `--provider`, `--model`, `--apikey`, `--baseurl`, `--api-format`, `--config`, `--resume`, `--last`, `--session`, `--fork`, `--yes`, `--plan`, `--default` -- The `--config` flag forces the interactive setup wizard -- `--yes` sets auto-permission mode, `--plan` sets plan-only mode -- Auto-migrates config from legacy `~/.go-claude.json` -- Initializes Genkit only for Gemini/Claude providers (not needed for OpenAI-compatible) -- Configures global singletons and starts CronScheduler before TUI launch +- Two runtime modes: (1) normal TUI mode, (2) teammate mode via `--teammate --socket ` for child agent IPC +- Session management flags: `--resume` (picker), `--last` (auto-resume recent), `--session `, `--fork ` +- Permission mode flags: `--yes`/`-y` (auto), `--plan`/`-p` (read-only), `--default`/`-d` (ask) +- Trailing CLI args are joined as a startup prompt sent to the agent immediately +- Supported providers: gemini, claude, openai, glm, deepseek, kimi, siliconflow ### Testing Requirements - No unit tests here; tested via integration/manual testing @@ -30,9 +28,10 @@ Primary CLI entry point. Resolves configuration (flags > env vars > config file ## Dependencies ### Internal -- `go-claude/pkg/agent` — Runner creation -- `go-claude/pkg/config` — Config loading and wizard -- `go-claude/pkg/llm` — Provider type constants, adapter creation -- `go-claude/pkg/tui` — TUI model and program +- `iroha/pkg/agent` — Runner creation, session service, teammate mode, permission modes +- `iroha/pkg/config` — Config loading and interactive wizard +- `iroha/pkg/llm` — Provider type constants, API format enum +- `iroha/pkg/tui` — TUI model and program (Bubble Tea) - +### External +- `github.com/google/uuid` — Session ID generation diff --git a/cmd/agent-cli/main.go b/cmd/agent-cli/main.go index 24bfe3c..53c7ec9 100644 --- a/cmd/agent-cli/main.go +++ b/cmd/agent-cli/main.go @@ -12,7 +12,6 @@ import ( "iroha/pkg/llm" "iroha/pkg/tui" - tea "github.com/charmbracelet/bubbletea" "github.com/google/uuid" ) @@ -207,17 +206,8 @@ func main() { initialMode = agent.ModeDefault } - // 5. Create the TUI model - m := tui.NewModel(runner, sessionID, startInSessionPicker, initialMode, startupPrompt) - - // 6. Create the Bubble Tea Program - p := tea.NewProgram(m, tea.WithAltScreen()) - - // Inject the program reference back into the model via ProgramRef pointer - m.ProgramRef.P = p - - // 7. Run the TUI Program - if _, err := p.Run(); err != nil { + // 5. Run the modern decoupled component-based raw TUI loop + if err := tui.RunApp(runner, sessionID, startInSessionPicker, initialMode, startupPrompt); err != nil { fmt.Printf("\x1b[31m[TUI runtime error] %v\x1b[0m\n", err) os.Exit(1) } diff --git a/docs/claude-code-architecture/README.md b/docs/claude-code-architecture/README.md new file mode 100644 index 0000000..8af7b6b --- /dev/null +++ b/docs/claude-code-architecture/README.md @@ -0,0 +1,57 @@ +# Claude Code Architecture — Master Spec & 1:1 Replica Plan + +> Produced 2026-06-14. Research: 16-dimension deep-dive into real Claude Code (v2.1.x, mid-2026), sourced via anysearch against `docs.claude.com`, `code.claude.com`, `anthropic.com`, and `github.com/anthropics/*`. Audit: 6-area read-through of the current `iroha` (go-claude) codebase. Adversarial verification of 38 load-bearing claims (13/16 dimensions covered). Method: ultracode multi-agent workflow. + +## How to read this + +| Doc | What it is | +|-----|------------| +| **[gap-analysis.md](gap-analysis.md)** | iroha-current-state vs Claude Code, per cluster. The delta. | +| **[refactor-plan.md](refactor-plan.md)** | The phased plan to reach 1:1 fidelity. The decisions + roadmap. | +| **[research/](research/)** | 16 detailed Claude Code architecture specs (the reference implementers copy from). | +| **[audit/](audit/)** | 6 honest capability inventories of the current iroha code (+ ADK-coupling maps). | +| **[verify-verdicts.md](verify-verdicts.md)** | Adversarial fact-check results — confirmed / refuted / uncertain, with corrections. | + +--- + +## Executive summary — the one finding that decides everything + +**iroha has no native agent loop.** Its `Execute()` is a thin event-forwarder around Google ADK's internal `Flow.Run` (`for { runOneStep }`). Real Claude Code owns its loop — a single async generator `query()` → `queryLoop()` (~1,730 lines, one code path) that every caller (REPL, SDK, sub-agents, headless `-p`, compaction) funnels through. + +The audit is unambiguous: **decoupling is not incremental** — *"the agent loop itself is outsourced to ADK, so a native refactor means replacing the loop driver, not just swapping types."* + +This single fact reframes the whole project. The peripheral managers (task / todo / cron / background / worktree / skills / plugin / team-inbox / memory / session-JSON) are **~85% already framework-free** and port almost verbatim. The tool **handlers** are ~90% decoupling-ready (they only consume `context.Context` + a workdir key). The ADK coupling is concentrated in a small, well-defined core: `runner.go`, `tools.go` (registry), `mcp.go` (the `DynamicMCPTool` wrapper), `subagent.go`/`pool.go`, the 3 `pkg/llm` adapters, and the `genai` wire types. + +**Therefore the project is a native engine rewrite with a large reusable periphery — not a greenfield rewrite, and not a patch.** + +## Claude Code architecture at a glance (verified) + +- **Agent loop** — one iteration = one model call: assemble context (system prompt + tool defs + history, **prompt-cached**) → stream response → if any `tool_use` blocks, execute and feed `tool_result` back as a `user` message → repeat. Yields to caller **only** on a tool-free response (`end_turn`) with no stop-hook continuation and no budget continuation. `max_turns` counts **only tool-use turns**. Read-only tools (Read/Glob/Grep/MCP `readOnlyHint`) run in parallel; stateful tools (Edit/Write/Bash) run sequentially. +- **5 SDK message types** — `SystemMessage` (`init` / `compact_boundary`), `AssistantMessage`, `UserMessage` (carries tool_result), `StreamEvent` (raw SSE, opt-in), `ResultMessage` (terminal; `success` / `error_max_turns` / `error_max_budget_usd`; carries `total_cost_usd`, `usage`, `num_turns`, `session_id`, `stop_reason`). +- **Streaming** — layered: Messages-API SSE (`message_start` → `content_block_*` → `message_delta` → `message_stop`) wrapped in SDK `StreamEvent`s; headless `--output-format stream-json` terminates with a top-level `type:"result"` event (**not** `message_stop`). +- **Session transcript** — append-only JSONL at `$CLAUDE_CONFIG_DIR/projects//.jsonl`; each line has `uuid` + `parentUuid` (DAG/linked-list); compaction writes a `compact_boundary` (`parentUuid:null`, logicalParentUuid) followed by a user message with `isCompactSummary:true`. +- **Context/compaction** — API microcompact (`clear_tool_uses_20250919`: trigger 180k input tokens, target 40k) + `clear_thinking_20251015`; token-budget auto-continue (`COMPLETION_THRESHOLD=0.9`, `DIMINISHING_THRESHOLD=500`); real Anthropic token counting. +- **System prompt** — per-turn assembled array of blocks. **CLAUDE.md is NOT in the system prompt** — it is read and injected as a **user message** (project context); only the base agent prompt, tool descriptions, and env-info live in the system prompt (prompt-cached via `cache_control` breakpoints). *(Verified — this is the most commonly mis-stated fact.)* +- **Memory/CLAUDE.md** — `CLAUDE.md` cascade: managed (highest) → CLI args → local → project → user; `@import` expansion; the `#` memory quick-add; the `memory` tool writes typed `.md` files with an index. +- **Permissions** — 6 modes (default/acceptEdits/plan/bypassPermissions + auto/dontAsk); rules in `settings.json` `permissions.{allow,deny,ask}` evaluated **deny → ask → allow** (first match wins); Bash word-boundary glob gotcha (`Bash(ls *)` vs `Bash(ls*)`); path anchors differ per tool. +- **Hooks** — events: `PreToolUse` (uses `hookSpecificOutput.permissionDecision`, fires **before** permission-mode checks, can deny even in `bypassPermissions`), `PostToolUse`, `UserPromptSubmit`, `Stop`, `SubagentStop`, `SessionStart`, `SessionEnd`, `PreCompact`; command-hook stdin-JSON / stdout-JSON / exit-code protocol. +- **MCP** — 4 transports (stdio / SSE / streamable-HTTP / WebSocket); protocol **2025-06-18** (not iroha's pinned `2024-11-05`); tools namespaced `mcp__server__tool`; OAuth; `MAX_MCP_OUTPUT_TOKENS` default 25000 (warning at 10000); oversized results persist to disk with a file reference. +- **Subagents/Task** — single model-facing `Agent` tool (legacy alias `Task`); `.claude/agents/*.md` frontmatter (`name/description/tools/model`); parent receives **only the subagent's final message** as the tool_result (no intermediate calls); built-in `Explore`/`Plan` are one-shot (no `agentId`). +- **Skills** — `SKILL.md` with frontmatter; **progressive disclosure** (model decides when to expand the body); plugin namespace `plugin-name:skill-name`. +- **Slash commands + plan mode** — built-in + custom `.claude/commands/*.md` (`$ARGUMENTS`, `$1`, `!` bash, `@file`); `ExitPlanMode` presents 5 options (auto / acceptEdits / default / keep-planning / refine). +- **TUI** — **TypeScript React (Concurrent/Ink), not a Model/Update/View loop**; settings hierarchy: enterprise managed → user `~/.claude/settings.json` → project `.claude/settings.json` → local `settings.local.json`; IDE integration (VS Code/JetBrains). +- **Sandbox/security** — defense-in-depth: Bash sandbox (network/filesys/command deny), allow/deny patterns, macOS Seatbelt + Linux landlock/namespaces via a dedicated binary. + +Full detail per dimension: see [`research/`](research/). Corrections from the verify pass: see [`verify-verdicts.md`](verify-verdicts.md). + +## Current iroha state at a glance (audited) + +~24,900 lines of non-test Go, 40+ tools, 7 LLM providers, 6 permission modes, 12 hook events, real OS-level sandbox (mac `sandbox-exec` / linux `bwrap`), durable task/cron/background/worktree/skills/memory stores, a hand-rolled TUI with differential renderer + glamour markdown. **Functionally broad; architecturally mis-aligned at the core.** + +Capability status across audited areas: 91 implemented / 11 partial / 2 stub / 11 missing. The single `[missing]` that matters most: **the agent loop driver itself**. + +## The decision (see refactor-plan.md for detail) + +**Build a native `AgentLoop` that owns the model→tool→model iteration, with Anthropic-native content-block messages + a real tokenizer, decoupling from Google ADK/Genkit.** Reuse the ~85% framework-free periphery. Fix the behavioral divergences (auto-commit, fixed persona, global circuit breaker, orphaned HTTP/OAuth MCP, stale MCP protocol, forced-cheap subagents, etc.). + +The plan is phased (Phase 0 foundation → Phase 4 verify) so the system stays buildable at each step. diff --git a/docs/claude-code-architecture/_extracted-results.json b/docs/claude-code-architecture/_extracted-results.json new file mode 100644 index 0000000..4d671a8 --- /dev/null +++ b/docs/claude-code-architecture/_extracted-results.json @@ -0,0 +1 @@ +{"research": {"agent-loop": {"asOfDate": "2026-06", "claimsToVerify": ["maxTurns counts ONLY tool-use turns, not all turns \u2014 a final text-only response is not counted toward the cap (so the 4-turn 'fix auth.ts' example has 3 tool turns; max_turns=2 stops 'before the edit step'). Source: docs.claude.com agent-loop + streaming-vs-single-mode confirms single-message query() with maxTurns:1 still yields a result.", "Auto-compact fires at (contextWindow - min(modelMaxOutput,20000)) - 13000 tokens and hard-blocks at that effective window - 3000 tokens; after 3 consecutive failures the circuit breaker permanently disables auto-compact (constant MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES=3). Source: claude-code-from-source.com ch05.", "Token-budget auto-continue: COMPLETION_THRESHOLD=0.9 (stop at >=90% used) and DIMINISHING_THRESHOLD=500 tokens \u2014 early stop requires >=3 continuations AND both current+previous deltas <500. Subagents ALWAYS stop (budget is top-level only). The nudge is an isMeta user message. Source: claude-code-from-source.com ch05 + inematds/claudecode-manual 04-query-engine.md.", "CallModel/retry wraps every API call in withRetry() with DEFAULT_MAX_RETRIES=10, exponential backoff BASE_DELAY*2^(attempt-1) capped at maxDelayMs=32000 with 25% jitter, honoring Retry-After; after 3 consecutive 529s on a non-custom Opus model it throws FallbackTriggeredError to switch to fallbackModel. Source: inematds/claudecode-manual 04-query-engine.md.", "SDK stop condition is not a bare stop_reason=='end_turn' check \u2014 the loop yields control ONLY when the assistant response contains ZERO tool_use blocks AND stop hooks do not inject blocking errors; a stop hook can force another full iteration. Source: docs.claude.com agent-loop + claude-code-from-source.com ch05 (Terminal vs Continue states)."], "components": [{"config": "options.max_turns (Python) / maxTurns (TS) \u2014 int, no default limit. options.max_budget_usd (Python) / maxBudgetUsd (TS) \u2014 no default limit. options.effort in {\"low\",\"medium\",\"high\",\"xhigh\",\"max\"} (xhigh recommended on Opus 4.7+/Fable 5). options.model e.g. \"claude-sonnet-4-6\", \"claude-opus-4-8\". options.permission_mode / permissionMode in {default, acceptEdits, plan, dontAsk, auto, bypassPermissions}. options.include_partial_messages (Py) / includePartialMessages (TS) bool \u2014 gates StreamEvent emission.", "dataModel": "Python dataclasses: SystemMessage (subtype 'init'|'compact_boundary', data nested w/ session_id), AssistantMessage (content blocks), UserMessage (tool result content), ResultMessage (subtype, result, usage, total_cost_usd, num_turns, session_id, stop_reason), StreamEvent (uuid, session_id, event:dict, parent_tool_use_id). TS equivalents: SDKAssistantMessage.type='assistant', SDKUserMessage.type='user', SDKResultMessage.type='result', SDKSystemMessage.type='system' subtype 'init', SDKCompactBoundaryMessage.type='compact_boundary' (NOT a SystemMessage subtype in TS), SDKPartialAssistantMessage.type='stream_event'. SDKMessage union also includes SDKUserMessageReplay, SDKStatusMessage, SDKLocalCommandOutputMessage, SDKHookStartedMessage, SDKHookProgressMessage.", "mechanism": "query() is an async generator (Python `async for message in query(...)`; TS `for await (const message of query({...}))`). It yields messages in this lifecycle order: (1) SystemMessage subtype='init' with session metadata (session_id, tools, models, agent info); (2) per turn: AssistantMessage (text + tool_use blocks) \u2192 UserMessage (tool_result content); (3) repeat; (4) final AssistantMessage with text-only (no tool_use); (5) ResultMessage with final text, token usage, cost (total_cost_usd), num_turns, session_id, stop_reason. Default (non-streaming) yields complete AssistantMessage after each model response completes; with include_partial_messages/includePartialMessages=true it also yields StreamEvent (TS: SDKPartialAssistantMessage, type 'stream_event') carrying raw API SSE events (message_start, content_block_start, content_block_delta with text_delta/input_json_delta, content_block_stop, message_delta, message_stop). IMPORTANT: a small number of trailing system events (e.g. prompt_suggestion) can arrive AFTER ResultMessage \u2014 callers must drain the stream to completion, not break on the result. check stop_reason === 'refusal' to detect refusals.", "name": "SDK query() entry point + message protocol", "purpose": "The public surface of the agent loop: a single async generator function that drives the entire turn cycle and yields typed messages."}, {"config": "Internal (source-level, not public API): MAX_OUTPUT_TOKENS_RECOVERY_LIMIT=3, MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES=3, hasAttemptedReactiveCompact one-shot, stopHookActive flag, turnCount monotonic counter, maxOutputTokensOverride (64K during escalation, cleared after).", "dataModel": "Terminal discriminated union: {reason: 'blocking_limit'|'image_error'|'model_error'|'aborted_streaming'|'prompt_too_long'|'completed'|'stop_hook_prevented'|'aborted_tools'|'hook_stopped'|'max_turns'}. Continue transition.reason: 'next_turn'|'collapse_drain_retry'|'reactive_compact_retry'|'max_output_tokens_escalate'|'max_output_tokens_recovery'|'stop_hook_blocking'|'token_budget_continuation'. LoopState carries messages, toolUseContext, turnCount, transition, autoCompactTracking, maxOutputTokensRecoveryCount, hasAttemptedReactiveCompact, maxOutputTokensOverride, pendingToolUseSummary (background Haiku summary promise), stopHookActive.", "mechanism": "Skeleton: init state \u2192 while(true){ run context-management pipeline \u2192 callModel via withRetry (streaming) \u2192 for each streamed AssistantMessage check for tool_use blocks (sets needsFollowUp) \u2192 if any tool_use: execute tools (StreamingToolExecutor runs concurrency-safe tools during streaming, sequential for stateful), append tool_result blocks, reconstruct NEW State object with transition.reason='next_turn', continue \u2192 if NO tool_use: run prompt-too-long recovery, max-output-token escalation/recovery, then stop hooks, then token-budget check \u2192 return Terminal }. Every continue site reconstructs a complete new immutable State object (not field mutation). Errors are WITHHELD from the yield stream during recovery (isWithheldPromptTooLong, isWithheldMaxOutputTokens) so SDK consumers that disconnect on any error field keep listening; withheld errors are pushed to internal assistantMessages so downstream recovery can find them, surfaced only if ALL recovery fails.", "name": "queryLoop() \u2014 the while(true) core (query.ts)", "purpose": "The single internal generator that every caller (REPL, SDK, sub-agents, headless -p, compact agent) delegates to. ~1,730 lines, one code path."}, {"config": "DEFAULT_MAX_RETRIES=10. maxDelayMs=32000. Persistent mode UNATTENDED_RETRY: 30-min backoff cap, heartbeat every 30s. feature('HISTORY_SNIP'), feature('TOKEN_BUDGET'), feature('CONTEXT_COLLAPSE') gates evaluated at bundle time.", "dataModel": "callModel yields AssistantMessage (type 'assistant', .message.content with text/tool_use/thinking blocks, optional .error field) and StreamEvent. withRetry yields SystemAPIErrorMessage before each sleep. On FallbackTriggeredError, currentModel=fallbackModel and signature/thinking blocks stripped (they are model-bound \u2014 replaying across models => 400). Orphaned partial AssistantMessages are tombstoned: yielded as {type:'tombstone', message} so UI/transcript removes them (prevents 'thinking blocks cannot be modified' error).", "mechanism": "queryModel is an async function* calling Anthropic messages.create(stream=true) wrapped in withRetry() (DEFAULT_MAX_RETRIES=10, exponential backoff base*2^(attempt-1) capped maxDelayMs=32000 + 0-25% jitter, honors Retry-After header). SSE sequence reconstructed into AssistantMessage objects: message_start \u2192 (content_block_start \u2192 content_block_delta* \u2192 content_block_stop)* \u2192 message_delta (carries final usage + stop_reason) \u2192 message_stop. Usage mutated in-place on last message only when message_delta arrives. Retry decision rules: 529 overloaded \u2192 only foreground query sources retry (background bails to avoid cascade); after 3 consecutive 529s on non-custom Opus model \u2192 throw FallbackTriggeredError \u2192 queryLoop switches to fallbackModel; OAuth 401 \u2192 handleOAuth401Error token refresh; context-overflow 400 \u2192 parse token counts, compute maxTokensOverride; ECONNRESET/EPIPE \u2192 disableKeepAlive then retry; persistent UNATTENDED_RETRY mode retries indefinitely with 30-min cap + 30s heartbeat.", "name": "callModel / queryModel \u2014 API streaming + retry ladder", "purpose": "Make the streaming Anthropic API call with model fallback and recover from transient failures."}, {"config": "tool() helper accepts annotations.readOnlyHint (default false) to opt custom tools into parallel execution. Built-in read-only: Read, Glob, Grep, MCP tools marked readOnly. Stateful (always sequential): Edit, Write, Bash. PreToolUse hook can short-circuit: reject \u2192 tool skipped, Claude gets rejection tool_result instead. Deny via permission \u2192 Claude typically tries another approach or reports it couldn't proceed.", "dataModel": "Request: {type:'tool_use', id:'toolu_', name, input}. Response: {type:'tool_result', tool_use_id, content: str | content_block[], is_error?: bool}. yieldMissingToolResultBlocks fires in 3 abort/error paths (outer error, fallback mid-stream, user abort) creating synthetic error tool_results for every tool_use lacking a result \u2014 prevents next-call protocol errors.", "mechanism": "Each assistant response may contain multiple tool_use blocks. Parallel execution is decided by tool type: read-only tools (Read, Glob, Grep, MCP readOnlyHint=true tools) run concurrently; stateful tools (Edit, Write, Bash, custom tools default) run sequentially. StreamingToolExecutor (gated feature streamingToolExecution) starts executing concurrency-safe tools as soon as their tool_use block's input is complete during streaming \u2014 before the full response finishes. 14-step execution pipeline per tool: Zod validation \u2192 input backfill (e.g. expand path) \u2192 PreToolUse hook \u2192 permission check (canUseTool callback) \u2192 execute \u2192 PostToolUse hook \u2192 format result. A background Haiku summary of tool results is kicked off (pendingToolUseSummary) and resolved/overlapped during the NEXT iteration's streaming (yielded as ToolUseSummaryMessage). Permission denial returns a rejection tool_result to Claude.", "name": "Tool execution + round trips", "purpose": "Execute requested tool_use blocks and feed tool_result blocks back so the loop continues."}, {"config": "max_turns/maxTurns, max_budget_usd/maxBudgetUsd (no defaults). ResultMessage subtype values: success, error_max_turns, error_max_budget_usd, error_during_execution, error_max_structured_output_retries. API stop_reason values the loop inspects: end_turn, tool_use, max_tokens, pause_turn, refusal, model_context_window_exceeded, stop_sequence.", "dataModel": "ResultMessage.subtype discriminated union above; .result field ONLY present on 'success'. .stop_reason (string|null) from last assistant response. All subtypes carry total_cost_usd, usage, num_turns, session_id (Python: total_cost_usd/usage typed Optional, guard None on error paths).", "mechanism": "PRIMARY stop condition = assistant response with zero tool_use blocks (model produced text only) AND no stop-hook blocking errors AND token budget says stop. Caps: max_turns/maxTurns counts ONLY tool-use turns (the final text-only response is NOT counted \u2014 so max_turns=2 in a 3-tool-turn task stops before the 3rd tool). max_budget_usd/maxBudgetUsd stops on spend threshold. Hitting either \u2192 ResultMessage.subtype = error_max_turns | error_max_budget_usd (result field absent). Other ResultMessage.subtypes: success (result present), error_during_execution (API failure/cancel), error_max_structured_output_retries. Normal completion \u2192 subtype 'success' + result text. stop_hook_prevented is its own Terminal reason but still surfaces via ResultMessage. API stop_reason on the final turn: end_turn (normal), max_tokens (truncated; triggers escalation/recovery ladder), refusal (declined \u2014 detect via stop_reason=='refusal'), pause_turn (server-tool sampling-loop iteration limit, default 10 \u2014 handle by appending assistant response and re-requesting), model_context_window_exceeded.", "name": "Stop conditions + ResultMessage subtypes", "purpose": "Decide when the loop yields control back to the user and report why."}, {"config": "Check via `message.type === 'result'` (TS) or isinstance(message, ResultMessage) (Python). For streaming check isinstance(message, StreamEvent) then message.event.get('type').", "dataModel": "StreamEvent: {uuid: str, session_id: str, event: dict[str,Any] (raw API SSE), parent_tool_use_id: str|None}. TS SDKPartialAssistantMessage.type === 'stream_event'.", "mechanism": "When include_partial_messages/includePartialMessages=true the generator interleaves StreamEvent (raw API SSE wrapped) between the buffered messages: message_start \u2192 content_block_start(text) \u2192 content_block_delta(text_delta)* \u2192 content_block_stop \u2192 content_block_start(tool_use) \u2192 content_block_delta(input_json_delta)* \u2192 content_block_stop \u2192 message_delta \u2192 message_stop \u2192 buffered AssistantMessage \u2192 [tool exec] \u2192 next turn's stream events \u2192 ResultMessage. Text is in delta.type=='text_delta'.delta.text; tool input accumulates from delta.type=='input_json_delta'.partial_json. Known limitation: structured-output JSON does NOT stream \u2014 only appears in final ResultMessage.structured_output.", "name": "Streaming vs buffered turn modes", "purpose": "Two output delivery modes: buffered (complete AssistantMessage per turn) vs streaming (raw SSE deltas as they arrive)."}], "confidence": "high", "dimension": "agent-loop", "externalInterfaces": ["Python: from claude_agent_sdk import query, ClaudeAgentOptions, AssistantMessage, UserMessage, ResultMessage, SystemMessage; from claude_agent_sdk.types import StreamEvent, AgentDefinition, TaskBudget, HookEvent", "TypeScript: import { query, tool, createSdkMcpServer, startup, listSessions, getSessionMessages } from '@anthropic-ai/claude-agent-sdk'; SDKMessage union of SDKAssistantMessage|SDKUserMessage|SDKUserMessageReplay|SDKResultMessage|SDKSystemMessage|SDKPartialAssistantMessage|SDKCompactBoundaryMessage|SDKStatusMessage|SDKLocalCommandOutputMessage|SDKHookStartedMessage|SDKHookProgressMessage|...", "query() returns AsyncGenerator (TS) / async iterator (Python). CLI binary bundled as optional dep @anthropic-ai/claude-agent-sdk-.", "Anthropic Messages API: model (e.g. claude-opus-4-8), messages[], system, tools[], max_tokens, stream=true, beta headers e.g. context-1m-2025-08-07, task-budgets-2026-03-13", "Transcript: JSONL, one entry per message incl. isMeta nudge messages; user msg persisted before API call for resume", "Hooks: PreToolUse, PostToolUse, PostToolUseFailure, UserPromptSubmit, Stop, SubagentStop, PreCompact, Notification, SubagentStart, PermissionRequest"], "keyBehaviors": ["maxTurns counts ONLY tool-use turns \u2014 the final text-only response is not counted. max_turns=2 in a 3-tool-turn task stops before the 3rd tool. This is the single most commonly mis-stated fact about the loop.", "Withholding pattern: recoverable errors (prompt_too_long from context collapse/reactive compact, max_output_tokens) are NOT yielded to the stream during recovery because SDK consumers (Cowork, desktop app) terminate the session on any message carrying an error field. They are pushed to internal assistantMessages and surfaced only if recovery fails.", "Empty-response gotcha (API-level): adding a text block immediately AFTER a tool_result teaches Claude to expect user input after every tool use and yields empty responses (2-3 tokens, stop_reason end_turn). Correct: send tool_result directly with no trailing text. The agent loop in Claude Code handles this internally \u2014 re-implementors must format tool_result user messages without extra text.", "Context window never resets within a session \u2014 accumulates system prompt + tool defs + CLAUDE.md + conversation + tool I/O across turns. Static prefixes (system prompt, tool defs, CLAUDE.md) are prompt-cached so only the first request pays full cost.", "Subagents get a FRESH conversation (no parent turns) \u2014 only their final response returns to the parent as a tool_result. Subagents ALWAYS stop on token budget (budget is top-level only).", "Streaming input mode (default, recommended) supports images, queued messages, real-time interruption, full tool access, mid-loop user input via async generator yielding SDKUserMessage. Single-message mode does NOT support images/queueing/interruption and raises on error results (e.g. error_max_turns) \u2014 wrap in try block.", "pause_turn handling: when using server tools (web_search_20250305, web fetch) and the server-side sampling loop hits its 10-iteration default limit, the response may contain a server_tool_use without a matching server_tool_result. Agent loop must append the assistant response and re-request to let Claude finish.", "Trailing events after ResultMessage: a few system events (prompt_suggestion etc.) can arrive AFTER ResultMessage \u2014 iterate the stream to completion, do NOT break on the result message.", "Stop hooks can force another iteration: when the model produces text-only (thinks it's done) but a stop hook returns blocking errors, the errors are appended as a user message and the loop continues with stopHookActive=true (prevents re-running same hooks). preventContinuation \u2192 Terminal reason 'stop_hook_prevented'. Stop hooks are SKIPPED when the last assistant message is an API error \u2014 prevents death spiral (error\u2192hook blocking\u2192retry\u2192error).", "Effort vs extended-thinking are independent: effort in {low,medium,high,xhigh,max} controls reasoning depth per response; extended thinking produces visible chain-of-thought blocks. You can combine effort='low' with extended thinking on, or effort='max' without it.", "thinking/redacted_thinking blocks have 3 inviolable rules: (1) a message with a thinking block must be in a query with max_thinking_length>0; (2) a thinking block may never be the last block in a message; (3) thinking blocks must be preserved for the whole assistant trajectory. Violations \u2192 opaque API 400s. Model fallback must STRIP signature blocks (they are model-bound).", "Orphaned tool_use safety net: yieldMissingToolResultBlocks synthesizes error tool_results for every tool_use lacking a result \u2014 fires on model crash, fallback mid-stream, and user abort. Without it the next API call 400s on the protocol violation.", "Abort has two distinct paths: abort-during-streaming (executor drains queued results or synthesizes them; signal.reason distinguishes hard Ctrl+C from submit-interrupt which skips the interruption message since the queued user msg provides context) vs abort-during-tool-execution (interruption message carries toolUse:true flag).", "compact_boundary message: Python emits SystemMessage subtype='compact_boundary'; TS emits a SEPARATE SDKCompactBoundaryMessage type (not a SystemMessage subtype). Compaction replaces older messages with a summary \u2014 early instructions may be lost; persistent rules belong in CLAUDE.md (re-injected each request)."], "openQuestions": ["Exact public option key for the +500k-style token-budget auto-continue on the SDK surface vs the internal output_config.task_budget (task-budgets-2026-03-13 beta) \u2014 the source dives describe the internal feature flag TOKEN_BUDGET but the public ClaudeAgentOptions field name for per-turn token budget is not pinned in the fetched docs.", "Precise current default value of the server-side sampling-loop iteration limit that triggers pause_turn (docs say 'default 10' \u2014 verify it hasn't changed for the newest server tools).", "Whether the StreamingToolExecutor gate `config.gates.streamingToolExecution` is on by default in the latest shipped CLI binary, or still feature-flagged \u2014 affects whether tools begin executing before the assistant response completes.", "Exact behavior of permission_mode='auto' (TS-only, model classifier) availability across models in mid-2026 \u2014 docs mark it as conditional."], "sources": [{"title": "How the agent loop works \u2014 Claude Code Docs", "url": "https://code.claude.com/docs/en/agent-sdk/agent-loop", "why": "Official authoritative spec of the turn cycle, message types (SystemMessage/AssistantMessage/UserMessage/ResultMessage), max_turns semantics (counts tool-use turns only), ResultMessage subtypes, permission modes, effort levels, parallel tool execution, context window + auto-compaction."}, {"title": "Stream responses in real-time \u2014 Claude Code Docs", "url": "https://code.claude.com/docs/en/agent-sdk/streaming-output", "why": "Official spec of include_partial_messages/includePartialMessages, StreamEvent dataclass fields, raw SSE event ordering (message_start, content_block_start/delta/stop, message_delta, message_stop), text_delta vs input_json_delta, known structured-output limitation."}, {"title": "Streaming Input vs Single Message \u2014 Claude Code Docs", "url": "https://code.claude.com/docs/en/agent-sdk/streaming-vs-single-mode", "why": "Official distinction between persistent streaming-input mode (images, queued msgs, interruption) and one-shot single-message mode; SDKUserMessage generator shape; single-message raises on error results."}, {"title": "Stop reasons and fallback \u2014 Claude API Docs", "url": "https://platform.claude.com/docs/en/build-with-claude/handling-stop-reasons", "why": "Authoritative enumeration of API stop_reason values (end_turn, max_tokens, stop_sequence, tool_use, pause_turn, refusal, model_context_window_exceeded), the empty-response-after-tool_result gotcha, pause_turn default 10-iteration limit, streaming stop_reason appears only in message_delta."}, {"title": "Ch 5. The Agent Loop \u2014 Claude Code from Source", "url": "https://claude-code-from-source.com/ch05-agent-loop/", "why": "Source-level reverse engineering of query.ts (~1730 lines): why async generator (backpressure, typed Terminal return, yield*), 10-field LoopState, immutable state reconstruction, 4-layer context compression (snip/microcompact/context collapse/auto-compact), withholding pattern, escalation ladder, 10 Terminal + 7 Continue reasons, exact thresholds (13k/3k buffers, MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES=3, MAX_OUTPUT_TOKENS_RECOVERY_LIMIT=3), token-budget diminishing-returns rules, thinking-block rules, orphaned tool_result safety net."}, {"title": "Lesson 04 \u2014 Query Engine & LLM API (source deep dive)", "url": "https://github.com/inematds/claudecode-manual/blob/main/01-core-architecture/04-query-engine.md", "why": "Independent source-level confirmation of QueryEngine.submitMessage \u2192 query() \u2192 queryLoop() \u2192 queryModel/callModel \u2192 stop hooks, transcript-first persistence, SSE\u2192AssistantMessage reconstruction, withRetry() internals (DEFAULT_MAX_RETRIES=10, getRetryDelay formula, 529 routing, Opus 3x529\u2192FallbackTriggeredError, OAuth 401 refresh, context-overflow token parse), exact token-budget constants (COMPLETION_THRESHOLD=0.9, DIMINISHING_THRESHOLD=500, continuationCount>=3), stop-hook categories and fire-and-forget background tasks."}, {"title": "Agent SDK reference \u2014 TypeScript \u2014 Claude Code Docs", "url": "https://code.claude.com/docs/en/agent-sdk/typescript", "why": "Authoritative TypeScript wire format: SDKMessage discriminated union (type field values 'assistant'|'user'|'result'|'system'|'stream_event'|'compact_boundary'|...), query() signature, startup() pre-warm, tool()/ToolAnnotations (readOnlyHint gates parallel exec), SessionMessage shape from transcripts."}, {"title": "claude-agent-sdk-python types.py", "url": "https://github.com/anthropics/claude-agent-sdk-python/blob/main/src/claude_agent_sdk/types.py", "why": "Authoritative Python wire format and config: PermissionMode literal, EffortLevel literal, AgentDefinition fields (maxTurns, effort, model, permissionMode), TaskBudget (output_config.task_budget with task-budgets-2026-03-13 beta), full HookEvent literal, ToolPermissionContext/PermissionResult, permission update protocol (addRules/replaceRules/setMode destinations)."}, {"title": "Agent SDK \u2014 Claude Wiki (message categories)", "url": "https://claude-wiki.com/agent-sdk.html", "why": "Corroborating summary of SDKMessage stream categories and that SDKAssistantMessage may carry an error field (basis for the withholding-pattern behavior)."}], "summary": "Claude Code's agent loop is a single async generator (`query()` \u2192 `queryLoop()` in `query.ts`) that every caller (REPL, SDK, sub-agents, headless `-p`, compact agent) funnels through. One iteration = one model API call: gather context (system prompt + tool defs + conversation history, prompt-cached), stream a response, and if the response contains any `tool_use` content blocks, execute those tools and feed the `tool_result` blocks back as a `user` message, then loop. The loop yields control back to the caller ONLY when the assistant produces a response with zero `tool_use` blocks (i.e. `stop_reason: \"end_turn\"` or text-only) AND no stop-hook forces continuation AND no token-budget continuation fires. The loop yields 5 core SDK message types: `SystemMessage` (subtype `\"init\"` at start, `\"compact_boundary\"` after compaction), `AssistantMessage` (after each model response, incl. final text-only one), `UserMessage` (after each tool execution, carrying tool_result content), `StreamEvent` (only when `include_partial_messages`/`includePartialMessages` is enabled \u2014 raw API SSE events like `content_block_delta` with `text_delta`/`input_json_delta`), and `ResultMessage` (terminal, carries final text + `usage` + `total_cost_usd` + `session_id` + `stop_reason` + `num_turns`). A turn counts ONLY tool-use round trips; `max_turns`/`maxTurns` and `max_budget_usd`/`maxBudgetUsd` cap the loop and surface as `ResultMessage.subtype` = `error_max_turns` / `error_max_budget_usd`. Read-only tools (Read, Glob, Grep, MCP readOnlyHint) execute in parallel within a turn; stateful tools (Edit, Write, Bash) run sequentially."}, "tools-canonical": {"asOfDate": "2026-06", "claimsToVerify": ["TodoWrite was DISABLED by default as of Claude Code v2.1.142 in favor of TaskCreate/TaskGet/TaskList/TaskUpdate, and CLAUDE_CODE_ENABLE_TASKS=0 re-enables the legacy TodoWrite tool", "MultiEdit was REMOVED in Claude Code v2.0 (it existed in v1.x as a ~70-line batch-edit tool) and is NOT present in the current v2.1.x built-in tool set \u2014 replicas must implement multiple parallel Edit calls instead, NOT a MultiEdit tool", "Bash tool defaults: 120000ms timeout (max 600000ms), 30000 character output truncation (hard ceiling 150000 via BASH_MAX_OUTPUT_LENGTH); when exceeded, full output is saved to a session file and Claude gets path + short preview", "Read tool returns content in cat -n format with 1-indexed line numbers, prefix format is 'spaces + line_number + tab + content', default first 2000 lines, lines truncated at 2000 chars", "Edit requires old_string to appear exactly ONCE (else error) unless replace_all:true; enforces read-before-edit; Bash cat/head/tail/sed -n 'X,Yp'/grep/egrep/fgrep on a single file with no pipes satisfies read-before-edit but piped output does not"], "components": [{"config": "Required: file_path. Optional: offset (1-indexed line number to start), limit (line count, default 2000). No path = error.", "dataModel": "Params: {file_path: string (required), offset?: number, limit?: number}. additionalProperties:false. Result: tool_result with text content. For >10-page PDFs the `pages` param is required.", "mechanism": "Returns file contents with 1-indexed line numbers in `cat -n` format. Line-number prefix format: `spaces + line_number + tab + content`. Default reads first 2000 lines from the start; each line truncated at 2000 chars. If a whole-file read exceeds token limit, returns first page + a `PARTIAL view` notice telling the model how to read more with offset/limit. A read that explicitly passes offset/limit and STILL exceeds the limit returns an error. Multimodal: images (PNG/JPG) returned as visual content (resized/recompressed to model limits); PDFs read whole if <=10 pages, else paged via `pages` param like \"1-5\" up to 20 pages; .ipynb returns all cells with outputs. Reads files only, NOT directories (use Bash `ls`). Absolute paths enforced.", "name": "Read", "purpose": "Read file contents with line numbers; multimodal (text, images, PDFs, .ipynb)."}, {"config": "Required: file_path, content. No optional fields.", "dataModel": "Params: {file_path: string (required), content: string (required)}. additionalProperties:false.", "mechanism": "Creates a new file or fully overwrites an existing one. Does NOT append or merge \u2014 atomically writes the complete content. Enforces READ-BEFORE-WRITE: if target exists, the model must have read it in the current conversation at least once or the call FAILS with an error. New files are exempt. Same Bash-read satisfaction rules as Edit (cat/head/tail/sed -n X,Yp/grep/egrep/fgrep on a single file, no pipes). For partial changes, the model is instructed to use Edit instead. Absolute paths only.", "name": "Write", "purpose": "Create new file or fully overwrite existing file."}, {"config": "Required: file_path, old_string, new_string. Optional: replace_all (default false). new_string MUST differ from old_string.", "dataModel": "Params: {file_path, old_string, new_string (all required); replace_all?: boolean (default false)}. additionalProperties:false.", "mechanism": "EXACT string replacement \u2014 no regex, no fuzzy matching. Three checks run in order: (1) READ-BEFORE-EDIT (must have read file this conversation AND file unchanged on disk since) \u2014 runs FIRST before matching; (2) MATCH (old_string must appear exactly, including indentation/whitespace); (3) UNIQUENESS \u2014 old_string must appear EXACTLY ONCE, otherwise the edit fails; to disambiguate, supply more surrounding context, or set replace_all:true to replace all occurrences. Absolute paths. Read-before-edit is ALSO satisfied when Bash ran cat/head/tail/sed -n 'X,Yp'/grep/egrep/fgrep on a SINGLE file with no pipes/redirects \u2014 piped output and other commands do NOT count. NOTE: read-before-edit satisfaction set != deny-rule-checked set (egrep/fgrep count for read-before-edit but not Read deny rules).", "name": "Edit", "purpose": "Precise surgical string replacement in a file via exact matching."}, {"config": "CLAUDE_CODE_GLOB_NO_IGNORE=false makes Glob respect .gitignore (default ignores the ignore file).", "dataModel": "Params: {pattern: string (required), path?: string}. additionalProperties:false. Result: list of file paths + truncation flag.", "mechanism": "Finds files by NAME pattern using standard glob syntax: `*` (single dir level), `**` (recursive), `?`, `{a,b}` alternation, `[abc]`/`[a-z]`/`[!abc]`. Examples: `**/*.js`, `src/**/*.ts`, `*.{json,yaml}`. Results sorted by modification time (most recent first), capped at 100 files; hitting the cap returns a truncation flag so the model can narrow. Does NOT respect .gitignore by default (finds gitignored files) \u2014 DIFFERS from Grep which does respect .gitignore. Set CLAUDE_CODE_GLOB_NO_IGNORE=false to make it respect .gitignore.", "name": "Glob", "purpose": "Fast file-by-name pattern matching."}, {"config": "output_mode default files_with_matches. -A/-B/-C/-n only honored with output_mode=content. multiline default false. head_limit works in all modes.", "dataModel": "Params: {pattern (required), path?, output_mode?: 'content'|'files_with_matches'|'count' (default files_with_matches), glob?, type?, '-i'?, '-n'?, '-A'?, '-B'?, '-C'?, multiline?: boolean (default false), head_limit?: number}. additionalProperties:false. Note the literal flag names -i/-n/-A/-B/-C as JSON keys.", "mechanism": "Searches file CONTENTS. Built on ripgrep (uses ripgrep regex, NOT POSIX grep \u2014 literal braces need escaping: `interface\\{\\}` to find Go `interface{}`). Three output modes: files_with_matches (paths only, DEFAULT), content (matching lines + file + line number, supports -A/-B/-C context and -n), count (per-file match count). Scope by `glob` (e.g. `**/*.tsx`) or `type` (e.g. `py`, `rust`). Default single-line match; multiline:true spans lines (rg -U --multiline-dotall). head_limit caps first N entries across all modes. Respects .gitignore (skips gitignored files); to search a gitignored file pass its path directly. The literal JSON keys `-i`, `-n`, `-A`, `-B`, `-C`, `multiline`, `head_limit` mirror rg flags.", "name": "Grep", "purpose": "Search file contents using ripgrep regex."}, {"config": "Required: notebook_path, new_source. Optional: cell_id, cell_type (required for insert), edit_mode (default replace).", "dataModel": "Params: {notebook_path (required, absolute), new_source (required), cell_id?, cell_type?: 'code'|'markdown', edit_mode?: 'replace'|'insert'|'delete' (default replace)}. additionalProperties:false.", "mechanism": "Edits ONE cell at a time, targeted by `cell_id` (NOT string replacement across the notebook like Edit). Modes: replace (overwrite cell source, DEFAULT), insert (add new cell AFTER target; with no cell_id goes at the START; requires cell_type=code|markdown), delete (remove target cell). notebook_path must be ABSOLUTE. Permission rules use the Edit(...) path format \u2014 e.g. `Edit(notebooks/**)` covers NotebookEdit in that dir.", "name": "NotebookEdit", "purpose": "Modify Jupyter notebook cells by cell_id."}, {"config": "timeout default 120000 (BASH_DEFAULT_TIMEOUT_MS overrides default, BASH_MAX_TIMEOUT_MS overrides ceiling). Output cap 30000 (BASH_MAX_OUTPUT_LENGTH raises it, hard ceiling 150000). CLAUDE_BASH_MAINTAIN_PROJECT_WORKING_DIR=1 disables cwd carry-over. CLAUDE_ENV_FILE for env var persistence. Sources ~/.zshrc/~/.bashrc/~/.profile.", "dataModel": "Params: {command: string (required), description?: string, timeout?: number (max 600000), run_in_background?: boolean (default false)}. additionalProperties:false. Result text includes stdout, stderr, and `Exit code N`.", "mechanism": "Runs each command in a SEPARATE process (not one persistent shell) but emulates persistence: `cd` carries to later commands ONLY if it stays in the project dir or an added working dir (else resets to project dir + appends `Shell cwd was reset to `). Env vars do NOT persist across commands (export in one is gone in the next). Aliases/functions/options DO persist \u2014 at session start Claude Code sources ~/.zshrc/~/.bashrc/~/.profile, captures aliases/functions/options, applies to every command. Subagent sessions never carry cwd changes. Limits: default timeout 120000ms (2 min), model can request up to 600000ms (10 min) via timeout param; output truncated at 30000 chars by default \u2014 when exceeded, full output saved to a file in the session dir and the model gets the file path + short preview (raise via BASH_MAX_OUTPUT_LENGTH up to hard 150000). run_in_background:true detaches; never use it for `sleep` (returns immediately). Model is told to avoid Bash for cat/head/tail/grep/find/sed/awk/echo and to prefer Read/Grep/Glob; independent commands go as parallel Bash calls, dependent ones chained with && (not newlines). Background task output files have no size limit and are not auto-cleaned. Git safety: never update git config, never destructive git ops unless explicit, never skip hooks, never force-push main/master.", "name": "Bash", "purpose": "Execute shell commands; general-purpose escape hatch."}, {"config": "Required: command. No args passed (args go in the skill itself).", "dataModel": "Params: {command: string (required) \u2014 skill name only, no args}. additionalProperties:false.", "mechanism": "Loads a skill by name. Skill names without leading slash. Plugin-namespaced skills use `plugin:skill` form. When invoked, shows `{name} skill is loading` then expands the skill prompt. Only skills in the available list may be invoked; cannot invoke a skill already running; not for built-in CLI commands (/help, /clear). Runs through the existing Skill tool rather than adding a new tool entry. Note: the separate SlashCommand tool handles user-authored `/commands`.", "name": "Skill", "purpose": "Execute a skill within the main conversation."}, {"config": "Required: plan. Use only for implementation tasks, not research.", "dataModel": "Params: {plan: string (required, supports markdown)}. additionalProperties:false.", "mechanism": "Called only while in plan mode, after the model has presented its plan and is ready to code. Presents the plan to the user for approval and exits plan mode. ONLY for implementation/code-writing tasks \u2014 explicitly NOT for research/exploration. If ambiguous, the model is told to resolve via AskUserQuestion first. Permission: Yes (entering/exiting plan mode is gated).", "name": "ExitPlanMode", "purpose": "Present a plan for approval and exit plan mode."}, {"config": "1-4 questions; 2-4 options each; header max 12 chars; label 1-5 words; multiSelect required field.", "dataModel": "Params: {questions: array (minItems 1, maxItems 4) of {question, header (max 12 chars), multiSelect: boolean (required), options: array (minItems 2, maxItems 4) of {label, description}}; answers?: object (populated by permission component)}. additionalProperties:false.", "mechanism": "Structured multiple-choice prompt. 1-4 questions per call, 2-4 options per question, header is a very short label (max 12 chars), each option has label (1-5 words) + description. Users can always select 'Other' for custom text (auto-added \u2014 model must NOT include an 'Other' option). multiSelect must be specified. Used for gathering preferences, clarifying ambiguity, deciding implementation direction.", "name": "AskUserQuestion", "purpose": "Ask multiple-choice clarifying questions."}, {"config": "Required: query (min 2 chars). allowed_domains XOR blocked_domains (not both). No specifier in permission rules.", "dataModel": "Params: {query: string (required, minLength 2), allowed_domains?: string[], blocked_domains?: string[]}. additionalProperties:false.", "mechanism": "Runs query against Anthropic's server-side web search backend, returns result TITLES and URLs only (does NOT fetch pages \u2014 follow up with WebFetch). May issue up to EIGHT backend searches per call, refining internally before returning. Scope with allowed_domains (include only) or blocked_domains (exclude) \u2014 the two lists CANNOT be combined in one call. Backend not configurable (use MCP for other providers). Permission rules take NO specifier \u2014 bare `WebSearch` in allow/deny only. US-only. Availability varies by provider (works on Claude API + MS Foundry; on Vertex AI with Claude 4 models; NOT on Bedrock).", "name": "WebSearch", "purpose": "Server-side web search returning titles+URLs."}, {"config": "Required: url, prompt. 15-min cache. HTTP auto->HTTPS. User-Agent: Claude-User*.", "dataModel": "Params: {url: string (required, format: uri), prompt: string (required)}. additionalProperties:false.", "mechanism": "Fetches URL, converts HTML to Markdown (not configurable), runs the prompt against content using a SMALL FAST model, returns that model's answer (NOT raw page) \u2014 lossy by design. HTTP auto-upgraded to HTTPS. Large pages truncated to a fixed char limit before processing. 15-minute self-cleaning cache. On cross-host redirect, returns a text result naming original + redirect target (does NOT follow); model issues a second WebFetch. User-Agent begins with `Claude-User`; Accept header prefers Markdown over HTML. In default/acceptEdits modes, prompts on first reach of a new domain EXCEPT a built-in preapproved docs-domain set; add `WebFetch(domain:example.com)` to pre-allow. An explicit WebFetch(domain:...) in deny/ask/allow OVERRIDES the preapproved set. auto/bypassPermissions modes skip the prompt.", "name": "WebFetch", "purpose": "Fetch a URL, convert to Markdown, extract per prompt via small model."}, {"config": "Required: prompt. Optional: description, subagent_type, model, resume.", "dataModel": "Params: {description: string (3-5 words, required in older schema), prompt: string (required), subagent_type: string (required), model?: 'haiku'|'sonnet'|'opus', resume?: string (agent id)}. additionalProperties:false.", "mechanism": "Spawns a subagent in a SEPARATE context window that works autonomously and returns ONE final text result; parent never sees intermediate tool calls/outputs. Named types: general-purpose (all tools), Explore (Glob/Grep/Read/Bash, with thoroughness quick|medium|very thorough), plus setup agents. `tools`/`disallowedTools` frontmatter on the subagent definition controls tool set: neither=inherit all; tools only=just those; disallowedTools only=all except those; both set=disallowedTools wins. Foreground subagents show live permission prompts; background subagents auto-deny any prompting call and continue. Launching itself needs no permission. maxTurns caps turn count. Fork mode: a fork inherits the full parent conversation, always runs in background, surfaces prompts in terminal. Note: docs table lists the tool as `Agent`; older schema/system-prompt name is `Task` \u2014 same tool. deprecated TaskOutput is replaced by Read on the task's output file path.", "name": "Task (a.k.a. Agent)", "purpose": "Spawn a subagent with its own context to handle a task autonomously."}, {"config": "Disabled by default since v2.1.142. Set CLAUDE_CODE_ENABLE_TASKS=0 to re-enable TodoWrite.", "dataModel": "TodoWrite params: {todos: array of {content (minLength 1), status: 'pending'|'in_progress'|'completed', activeForm (minLength 1)}}. additionalProperties:false on items.", "mechanism": "Replaces the ENTIRE todo list each call (not incremental). Exactly ONE item should be in_progress at a time. Item shape: {content: imperative-form string, status: 'pending'|'in_progress'|'completed', activeForm: present-continuous string}. Use for 3+ step complex tasks; skip for trivial/conversational. VERSION CHANGE: TodoWrite is DISABLED BY DEFAULT as of v2.1.142 in favor of the granular TaskCreate/TaskGet/TaskList/TaskUpdate quartet. To re-enable the legacy TodoWrite tool, set CLAUDE_CODE_ENABLE_TASKS=0. (Note: the Tasks feature itself was gated behind CLAUDE_CODE_ENABLE_TASKS=1 during its earlier opt-in rollout.) A 2026 system-prompt change swaps the hardcoded TodoWrite reference for one that resolves to TaskCreate or TodoWrite depending on whether tasks are enabled.", "name": "TodoWrite (LEGACY / disabled by default)", "purpose": "Manage the session checklist (whole-list replace)."}, {"config": "No permission required. New ID-based (vs old positional).", "dataModel": "TaskCreate: {subject, description, activeForm?, metadata?}. TaskUpdate: {taskId, status?, subject?, description?, activeForm?, owner?, addBlockedBy?, addBlocks?, metadata?}. TaskGet: {taskId}. TaskList: {} (returns summary).", "mechanism": "The modern replacement (introduced ~v2.1.16, became default in v2.1.142). Granular CRUD: TaskCreate (new pending task, auto-assigned ID), TaskGet (full details by ID), TaskList (all tasks summary), TaskUpdate (status pending->in_progress->completed, owner assignment, blockedBy/blocks dependencies, or deleted). Replaces the whole-list-replace TodoWrite with ID-based per-task updates and dependency graphs. State persists in ~/.claude/tasks// for team contexts.", "name": "TaskCreate / TaskGet / TaskList / TaskUpdate", "purpose": "Granular ID-based task management (replaces TodoWrite)."}, {"config": "Conditions: SendMessage/TeamCreate/TeamDelete need CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1. Monitor/RemoteTrigger/ScheduleWakeup/PushNotification unavailable on Bedrock/Vertex/Foundry. PowerShell needs CLAUDE_CODE_USE_POWERSHELL_TOOL=1 (off-C Windows). LSP needs a code-intelligence plugin. ToolSearch only when tool-search enabled.", "dataModel": "Various; see docs table.", "mechanism": "These are real, current tools but secondary to the core file/exec/agent set: Monitor (v2.1.98+, runs a watcher in background, reuses Bash permission rules, not on Bedrock/Vertex/Foundry); LSP (code intelligence, inactive until a code-intelligence plugin is installed; operations goToDefinition/findReferences/hover/documentSymbol/workspaceSymbol/goToImplementation/prepareCallHierarchy/incomingCalls/outgoingCalls); PowerShell (native, CLAUDE_CODE_USE_POWERSHELL_TOOL=1, spawns pwsh with -ExecutionPolicy Bypass process-scope); EnterPlanMode/ExitPlanMode (plan mode lifecycle); EnterWorktree/ExitWorktree (git worktree sessions under .claude/worktrees/); CronCreate/CronList/CronDelete (session-scoped scheduled prompts); ScheduleWakeup (reschedules a /loop iteration, 1min-1hr out); PushNotification (desktop + phone via Remote Control); SendMessage/TeamCreate/TeamDelete (agent teams, CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1); Workflow (dynamic multi-subagent orchestration); ShareOnboardingGuide; RemoteTrigger (claude.ai Routines behind /schedule); ListMcpResourcesTool/ReadMcpResourceTool/WaitForMcpServers/ToolSearch (MCP integration + deferred tool loading); TaskOutput (DEPRECATED \u2014 prefer Read on the task output file path); TaskStop (kill background task). Older/internal-only tools NOT in current v2 docs: BashOutput (read background shell output by bash_id, only NEW output since last check, optional regex filter that permanently drops non-matching lines) and KillShell (kill by shell_id) \u2014 these predate the run_in_background/task-id model.", "name": "Monitor / LSP / PowerShell / plan-mode / worktree / cron / agent-team / workflow / MCP / background-task tools", "purpose": "Extended built-in tools beyond the core file/exec/agent set."}], "confidence": "high", "dimension": "tools-canonical", "keyBehaviors": ["Read output uses `cat -n` 1-indexed line numbers with prefix `spaces + line_number + tab + content`; default first 2000 lines, each line truncated at 2000 chars; a whole-file read that exceeds the token limit returns a `PARTIAL view` notice (NOT an error), but a read that explicitly passes offset/limit and still exceeds returns an ERROR.", "Edit's THREE ordered checks: (1) read-before-edit (file read this conversation + unchanged on disk since) runs FIRST, (2) exact match, (3) uniqueness \u2014 old_string must appear EXACTLY ONCE or the edit FAILS (use replace_all:true or more context). Whitespace/indentation must match exactly.", "Read-before-edit / read-before-write is ALSO satisfied by Bash `cat`/`head`/`tail`/`sed -n 'X,Yp'`/`grep`/`egrep`/`fgrep` on a SINGLE file with NO pipes/redirects \u2014 but the deny-rule-checked command set differs (egrep/fgrep count for read-before-edit but NOT for Read deny rules). Piped output does NOT satisfy read-before-edit.", "Bash: 30,000 char output truncation default; when exceeded, FULL output is saved to a file in the session dir and the model receives the file path + a short preview from the start (raise cap via BASH_MAX_OUTPUT_LENGTH up to hard 150,000). Background task `.output` files have NO size limit and are never auto-cleaned.", "Bash `cd` carries to later commands ONLY within the project dir / added working dirs; landing outside resets to project dir and appends `Shell cwd was reset to `. Env vars do NOT persist across commands (export is gone next call); aliases/functions/options DO persist (sourced from ~/.zshrc/~/.bashrc/~/.profile at session start). CLAUDE_BASH_MAINTAIN_PROJECT_WORKING_DIR=1 disables carry-over; CLAUDE_ENV_FILE enables env persistence.", "Glob does NOT respect .gitignore by default (finds gitignored files) \u2014 DIFFERS from Grep which DOES respect .gitignore. Glob results sorted by mtime (recent first), capped at 100 files with a truncation flag. Set CLAUDE_CODE_GLOB_NO_IGNORE=false to make Glob respect .gitignore.", "Grep uses RIPGREP regex not POSIX grep (literal braces need escaping: `interface\\{\\}`); output_mode default is `files_with_matches` (paths only); -A/-B/-C/-n context flags only honored when output_mode=content; multiline default false; literal JSON keys `-i`/`-n`/`-A`/`-B`/`-C` mirror rg flags.", "TodoWrite is DISABLED BY DEFAULT as of v2.1.142 \u2014 replaced by TaskCreate/TaskGet/TaskList/TaskUpdate. Re-enable legacy TodoWrite with CLAUDE_CODE_ENABLE_TASKS=0. TodoWrite replaces the WHOLE list each call; Task* tools are ID-based and granular with dependency graphs.", "MultiEdit (batch edits, one file, `edits: [{old_string,new_string,replace_all}]`) was REMOVED in Claude Code v2.0 and is NOT in the current built-in tool set \u2014 replicas should implement parallel Edit calls instead of a MultiEdit tool.", "WebFetch is LOSSY by design: HTML->Markdown (not configurable), processed by a small fast model per the prompt (model gets the answer, not raw page), 15-min cache, HTTP auto->HTTPS, cross-host redirect returns original+target (no follow) requiring a second call. User-Agent starts with `Claude-User`.", "WebSearch returns TITLES + URLs only (no page fetch \u2014 follow up with WebFetch); may issue up to 8 backend searches per call; allowed_domains and blocked_domains CANNOT be combined in one call; permission rule takes NO specifier (bare `WebSearch` only); US-only; NOT on Bedrock.", "Agent/Task subagents: parent sees ONLY the final result, never intermediate tool calls; launching needs no permission but each subagent tool call is checked against session permission rules (background subagents auto-deny any prompting call); disallowedTools takes precedence over tools when both frontmatter fields set.", "All file tools require ABSOLUTE paths (relative rejected); NotebookEdit targets cells by cell_id not by index and not by string replacement; permission rules: Read/Grep/Glob/LSP use `Read(path)` format, Edit/Write/NotebookEdit use `Edit(path)` format (an Edit allow also grants read to same path), Bash/Monitor use `Bash(cmd pattern)`, WebFetch uses `WebFetch(domain:...)`, Agent uses `Agent(type)`, Skill uses `Skill(name)`."], "openQuestions": ["Exact current schema of the Task/Agent tool's optional `model` and `resume` fields and whether `description`/`subagent_type` remain strictly required in the latest v2.1.16x prompt (community schemas conflict slightly on required-ness).", "Whether TaskOutput is fully removed or merely deprecated in the very latest version (docs mark it deprecated, prefer Read on output file path).", "Exact composition of the built-in preapproved WebFetch documentation-domain set that skip the first-time domain prompt.", "Exact internal JSON result envelope shape for each tool (the model-facing text content is well documented, but the structured tool_result field names Claude Code itself emits for the API differ slightly and are not officially published."], "sources": [{"title": "Tools reference - Claude Code Docs (official)", "url": "https://code.claude.com/docs/en/tools-reference", "why": "PRIMARY source. Full official table of every built-in tool name + permission requirement + per-tool behavior sections (Read cat -n, Edit unique-match, Bash persistence/limits, Glob/Grep, NotebookEdit, WebFetch/WebSearch, Write, Agent, TodoWrite v2.1.142 deprecation, Task tools, Monitor/LSP/PowerShell/worktree/cron/workflow)."}, {"title": "Internal claude code tools implementation (gist by bgauryy)", "url": "https://gist.github.com/bgauryy/0cdb9aa337d01ae5bd0c803943aa36bd", "why": "Reverse-engineered EXACT JSON schemas (draft-07) and parameter interfaces for Read/Write/Edit/Glob/Grep/NotebookEdit/Bash/BashOutput/KillShell/Task/Skill/SlashCommand/TodoWrite/ExitPlanMode/AskUserQuestion/WebFetch/WebSearch/getDiagnostics/executeCode \u2014 the load-bearing field names and types for a replica."}, {"title": "Claude Code Tool Input Schemas (kaidhar/claude-code-permissions-hook)", "url": "https://github.com/kaidhar/claude-code-permissions-hook/blob/main/docs/tool-input-schemas.md", "why": "Cross-referenced tool_input JSON shapes (verified against actual hook inputs) used by PreToolUse hooks \u2014 confirms MultiEdit schema (edits[] array), Task model/resume fields, LS tool (path+ignore), and MCP naming mcp____."}, {"title": "Claude Code 2.0 System Prompt Changes (Mikhail Shilkov)", "url": "https://mikhail.io/2025/09/sonnet-4-5-system-prompt-changes/", "why": "Authoritative confirmation that MultiEdit was REMOVED in Claude Code v2.0 (existed as a ~70-line tool in v1.x), driving the decision NOT to reimplement a MultiEdit tool."}, {"title": "Tasks API vs TodoWrite (DeepWiki) + Reddit r/ClaudeAI", "url": "https://deepwiki.com/FlorianBruniaux/claude-code-ultimate-guide/8.1-tasks-api-vs-todowrite", "why": "Confirms the v2.1.16 Tasks API introduction and the v2.1.142 default-disable of TodoWrite, plus the CLAUDE_CODE_ENABLE_TASKS env var semantics during rollout."}, {"title": "anthropics/claude-code Issue #19901 (Bash output limits)", "url": "https://github.com/anthropics/claude-code/issues/19901", "why": "Official-tracked confirmation that Bash captures max 30,000 chars by default and spills full output to a session file with path+preview when exceeded."}, {"title": "Claude Code changelog (official)", "url": "https://code.claude.com/docs/en/changelog", "why": "Version-specific Bash behavior changes (background shell stopped ~5s after result when stdin closes; $()/$VAR subshell pattern matching) and the CLAUDE_CODE_ENABLE_TASKS gating timeline."}, {"title": "Piebald-AI claude-code-system-prompts CHANGELOG", "url": "https://github.com/Piebald-AI/claude-code-system-prompts/blob/main/CHANGELOG.md", "why": "Tracks the system-prompt swap that resolves the TodoWrite tool reference to TaskCreate or TodoWrite depending on whether tasks are enabled \u2014 confirms the dual-resolution mechanism."}], "summary": "Claude Code (as of v2.1.x, mid-2026) exposes a fixed canonical set of built-in tools to the model. The core file/exec/agent tools are Read, Write, Edit, Glob, Grep, Bash, NotebookEdit, Task (a.k.a. Agent), TodoWrite, WebFetch, WebSearch, AskUserQuestion, ExitPlanMode, Skill. The official docs table now lists ~50 tools including newer ones: TaskCreate/TaskGet/TaskList/TaskUpdate (which REPLACE TodoWrite as of v2.1.142), NotebookEdit, LSP, Monitor, PowerShell, EnterPlanMode/ExitPlanMode, EnterWorktree/ExitWorktree, CronCreate/CronList/CronDelete, ScheduleWakeup, SendMessage, TeamCreate/TeamDelete, Workflow, ShareOnboardingGuide, RemoteTrigger, PushNotification, ListMcpResourcesTool/ReadMcpResourceTool, WaitForMcpServers, ToolSearch, plus deprecated BashOutput/KillShell/TaskOutput. CRITICAL VERSION FACT: MultiEdit was REMOVED in Claude Code v2.0 (it existed in v1.x for batch atomic edits in a single file) and is NOT in the current tool set; the model achieves the same via multiple parallel Edit calls. TodoWrite is DISABLED BY DEFAULT as of v2.1.142 in favor of the Task* quartet (re-enable via CLAUDE_CODE_ENABLE_TASKS=0). Each tool has a strict JSON-schema parameter contract; file tools require absolute paths and enforce a read-before-edit/read-before-write session state check; permission rules use the exact tool name as the matcher string."}, "tool-exec-engine": {"asOfDate": "2026-06", "claimsToVerify": ["Default max parallel tool concurrency is 10 via env var CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY; in v2.1.158 read-only is detected via tool annotations.readOnlyHint (mapping to the internal isConcurrencySafe(input) check)", "Within a parallel batch, only Bash (non-zero exit) errors cascade to cancel all in-flight siblings with the synthetic message 'Cancelled: parallel tool call errored'; Read/Grep/Fetch errors are isolated (reported as v2.1.158 bug #64247)", "Permission rule evaluation order is deny -> ask -> allow (first match wins, specificity does not change order); rules format 'Tool' or 'Tool(specifier)' with Bash wildcards where a space before * enforces a word boundary; oversized tool results persist to ~/.claude/tool-results/{hash}.txt and MCP default persist threshold is 25000 chars (hard ceiling 500000 via _meta anthropic/maxResultSizeChars)"], "components": [{"config": "settings.json: permissions.{allow,ask,deny} string arrays; permissions.defaultMode; --permission-mode / --dangerously-skip-permissions CLI flags. ENABLE_TOOL_SEARCH unset|true|auto|auto:N|false controls MCP deferral. MAX_MCP_OUTPUT_TOKENS, MCP_TOOL_TIMEOUT.", "dataModel": "API contract (Anthropic Messages): assistant turn with stop_reason='tool_use' contains 1+ tool_use blocks {id:'toolu_...', name, input}. Client must reply with ONE user message whose content array begins with tool_result blocks {tool_use_id, content?, is_error?} \u2014 text blocks MUST come AFTER all tool_results, else HTTP 400. Multiple tool_result blocks for one turn MUST be batched in a single user message (separate messages break future parallel-tool-use prompting). Server tools (web_search, code_execution) execute inside Claude and need no tool_result.", "mechanism": "1) Stream assistant response, parse each tool_use block. 2) For each: look up tool def (alias-fallback to getAllBaseTools for renamed tools in old transcripts), abort-check, Zod safeParse input (on failure append hint to call ToolSearch for deferred tools), semantic validateInput (e.g. FileEdit rejects no-ops, Bash blocks standalone sleep when MonitorTool present). 3) Speculatively start auto-mode classifier for Bash. 4) Backfill derived fields (expand ~/foo) into a CLONED input (original kept for transcript). 5) Run PreToolUse hooks \u2014 can allow/deny/modify/stop; hook allow does NOT bypass deny/ask rules; exit code 2 blocks before rule eval. 6) canUseTool(): if hook decided, final; else deny\u2192ask\u2192allow rule match \u2192 tool.checkPermissions() \u2192 mode default \u2192 interactive prompt or classifier. 7) On deny build error msg + run PermissionDenied hooks. 8) call(input=original). 9) Result budget. 10) PostToolUse hooks (can modify MCP output / block). 11) Append newMessages. 12) classifyToolError for telemetry.", "name": "Tool-call lifecycle (API + in-process)", "purpose": "Translate a model tool_use block into a validated, permission-gated, executed tool_result content block, preserving message-history invariants."}, {"config": "Seven modes: default, acceptEdits (auto-allows edits + mkdir/touch/rm/rmdir/mv/cp/sed in-scope), plan (read-only, denies writes), dontAsk (auto-deny prompts, CI), bypassPermissions (allow all; since v2.1.126 includes protected paths; rm -rf / and rm -rf ~ STILL prompt as circuit breaker; refuses root/sudo outside sandbox), auto (classifier model; v2.1.83+; consecutive 3 or total 20 blocks \u2192 fall back to prompting). Shift+Tab cycles default\u2192acceptEdits\u2192plan. disableBypassPermissionsMode / disableAutoMode = 'disable' locks them.", "dataModel": "PermissionRule = { source, ruleBehavior: 'allow'|'deny'|'ask', ruleValue: 'Tool' | 'Tool(specifier)' }. Settings precedence (highest wins): Managed > CLI args > .claude/settings.local.json > .claude/settings.json > ~/.claude/settings.json. A deny at ANY level cannot be overridden.", "mechanism": "Rule string format 'Tool' or 'Tool(specifier)'. Bare deny removes tool from context entirely; scoped deny (Bash(rm *)) leaves tool visible and blocks the matching call. Bash rules: glob '*' (space before * = word boundary; ls* matches lsof, ls * does not); ':*' suffix == trailing ' *'; separators && || ; | |& & newline split compound commands and EACH subcommand must match (max 5 rules saved per compound approval); process wrappers timeout/time/nice/nohup/stdbuf and bare xargs are stripped; read-only set (ls cat echo pwd head tail grep find wc which diff stat du cd + read-only git) never prompts. Read/Edit use gitignore patterns with 4 anchors: //abs, ~/home, /project-rel, ./cwd-rel. WebFetch uses domain: prefix (* matches within a label except leading *. or whole-pattern). MCP rules: mcp__, mcp____*, mcp____tool (allow globs only after literal mcp__server__ prefix; unanchored allow globs are warned+skipped). Protected paths (.git, .claude except worktrees, .vscode, .idea, .husky, etc + named rc/config files) never auto-approved except in bypassPermissions.", "name": "Permission resolution chain", "purpose": "Decide allow/deny/ask per tool invocation using deny\u2192ask\u2192allow precedence layered over 7 modes."}, {"config": "CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY (default 10) bounds concurrent batch size. Tools declare interruptBehavior() 'cancel'|'block' (block is default).", "dataModel": "Partition = []Group{ parallel:bool, calls:[]ToolCall }. TrackedTool states: queued|executing|completed|yielded. ToolResult={ data, newMessages?, contextModifier? }. AbortController hierarchy: query-level (Ctrl+C) \u2192 sibling-level (Bash-error cascade) \u2192 per-tool.", "mechanism": "partitionToolCalls() walks calls L\u2192R, safeParse input, calls isConcurrencySafe(parsedInput) in try-catch (failure\u2192serial), merges consecutive-safe calls into one concurrent batch, isolates unsafe calls into single-tool serial batches. Concurrent: runToolsConcurrently via bounded async-generator all() with limit. Serial: apply contextModifier immediately. TWO OPTIMIZATIONS: (a) speculative execution \u2014 StreamingToolExecutor.addTool() is fire-and-forget called per parsed tool_use during streaming; processQueue() admits a tool iff noToolsRunning || (newToolSafe && allRunningSafe); (b) batch dispatch after stream completes. RESULTS YIELDED IN SUBMISSION ORDER not completion order \u2014 getCompletedResults() breaks the walk at any executing serial tool (order preservation via buffering). Context modifiers only applied for serial tools; concurrent-batch modifiers queued by tool_use_id and applied in submission order after batch. discard() escape hatch sets discarded=true so retry stream starts fresh.", "name": "Concurrency: partition + streaming executor", "purpose": "Run independent read-only tools in parallel; serialize writes; overlap tool execution with model response streaming."}, {"config": "maxResultSizeChars per tool (Bash 30000, FileEdit 100000, Grep 100000, FileRead Infinity). MCP: MAX_MCP_OUTPUT_TOKENS default 25000, warning at 10000; per-server .mcp.json timeout overrides MCP_TOOL_TIMEOUT; tool can raise limit to 500000 via _meta['anthropic/maxResultSizeChars'].", "dataModel": "Persisted file path ~/.claude/tool-results/{hash}.txt; wrapper replaces in-content.", "mechanism": "Per-tool maxResultSizeChars threshold \u2192 oversize output persisted to ~/.claude/tool-results/{hash}.txt and replaced with preview block (model re-Reads full content). ContentReplacementState tracks an aggregate conversation budget (death-by-a-thousand-cuts guard). BashTool detects image output by magic bytes \u2192 emits image content block; FileReadTool emits base64 image blocks, handles PDFs/notebooks/dirs, blocks /dev/zero /dev/random /dev/stdin.", "name": "Result budgeting", "purpose": "Bound tool output size per-call and per-conversation to avoid context overflow."}, {"config": "MAX_MCP_OUTPUT_TOKENS, MCP_TOOL_TIMEOUT, ENABLE_TOOL_SEARCH, .mcp.json (project root, checked into VCS), .claude.json (user scope).", "dataModel": "Tool name mcp____ (chars outside [A-Za-z0-9_-] \u2192 _, capped 64). Plugin form mcp__plugin____. MCP tool schema = JSON Schema; input validated same as built-ins.", "mechanism": "Spawn server (stdio/SSE/HTTP) \u2192 JSON-RPC 2.0 initialize \u2192 tools/list discovers \u2192 register with mcp__ prefix \u2192 route tools/call transparently. assembleToolPool(): built-ins (deny-filtered, REPL-hidden, isEnabled-checked) sorted alphabetically THEN MCP tools sorted alphabetically, concatenated (built-ins prefix) so a prompt-cache breakpoint sits after the last built-in \u2014 flat-sorted interleaving would bust cache on MCP add/remove. MCP tools go through the SAME 14-step pipeline. Tool search/deferred loading (ENABLE_TOOL_SEARCH default-on for MCP): tools sent with defer_loading=true (name+desc only, no schema); model calls ToolSearchTool to load schema; calling a deferred tool without loading \u2192 Zod string-coercion failure + targeted recovery hint.", "name": "MCP tool routing & registry", "purpose": "Expose external MCP server tools as first-class tools indistinguishable from built-ins to the agent loop."}, {"config": "CLAUDE_CODE_MAX_OUTPUT_TOKENS bounds model output; MaxTokens stop surfaces that error.", "dataModel": "tool_result.is_error=true with natural stderr-style content. Stop reasons: tool_use (run tools), end_turn, max_tokens, pause_turn, refusal, model_context_window_exceeded, etc.", "mechanism": "classifyToolError() extracts telemetry-safe string (errno, stable name) \u2014 never logs raw msg (minified builds mangle constructor.name). Parallel batch: only Bash non-zero-exit errors cascade (cancel sibling controller \u2192 synthetic 'Cancelled: parallel tool call errored'); Read/Grep/Fetch errors are isolated (no sibling cancel). Dependencies across parallel calls (create-then-update) are NOT pre-detected: dispatch all, if one fails return is_error:true with natural message, model reissues next turn. Orphaned tool_use (interrupted parallel call) must still get a placeholder tool_result or API 400s. MaxTokens stop_reason with partial tool_use: still emit tool_result blocks for the partial calls.", "name": "Error classification & recovery", "purpose": "Convert execution failures into model-actionable tool_result(is_error) without leaking internals, and keep conversation history coherent."}], "confidence": "high", "dimension": "tool-exec-engine", "externalInterfaces": ["Anthropic Messages API: stop_reason='tool_use' with tool_use{id,name,input} blocks; reply user message with tool_result{tool_use_id,content,is_error} blocks (all results in ONE user message, no text before tool_results)", "Internal: checkPermissionsAndCallTool() 14-step pipeline; partitionToolCalls() in toolOrchestration.ts; StreamingToolExecutor{addTool,processQueue,executeTool,getCompletedResults,getRemainingResults,discard}; canUseTool()", "Tool interface: call(input)\u2192ToolResult{data,newMessages,contextModifier}; inputSchema (Zod\u2192JSON Schema); isConcurrencySafe(input); isReadOnly(input); checkPermissions(input); validateInput(); isEnabled(); interruptBehavior(); maxResultSizeChars", "Config files: ~/.claude/settings.json, .claude/settings.json, .claude/settings.local.json (permissions.{allow,ask,deny,defaultMode}); .mcp.json (project MCP), .claude.json (user MCP); ~/.claude/tool-results/{hash}.txt (persisted oversize output)", "MCP JSON-RPC 2.0: initialize, tools/list (supports _meta anthropic/maxResultSizeChars up to 500000), tools/call", "CLI flags: --permission-mode, --dangerously-skip-permissions, --allow-dangerously-skip-permissions, --add-dir, --allowedTools, --disallowedTools", "Env vars: CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY(10), MAX_MCP_OUTPUT_TOKENS(25000), MCP_TOOL_TIMEOUT, ENABLE_TOOL_SEARCH, CLAUDE_CODE_MAX_OUTPUT_TOKENS, CLAUDE_CODE_ENABLE_AUTO_MODE"], "keyBehaviors": ["RESULTS ARE YIELDED IN SUBMISSION (tool_use arrival) ORDER, NOT COMPLETION ORDER. Buffer completed results; getCompletedResults() BREAKS the walk at any still-executing serial tool so nothing after it yields early. This is the single hardest correctness invariant to preserve in a reimpl.", "Concurrency safety is PER-INVOCATION, not per-tool. isConcurrencySafe(parsedInput) is called after safeParse; any parse failure or thrown exception \u2192 serial (fail-closed). BashTool parses compound commands via splitCommandWithOperators and returns true only if EVERY non-neutral subcommand is in search/read/list sets.", "Mutual exclusion contract in the streaming executor: a tool can start iff noToolsRunning OR (newToolSafe AND allRunningAreSafe). A single non-concurrent tool in flight blocks everyone.", "Bash errors are the ONLY errors that cascade to sibling cancellation in a parallel batch (synthesize 'Cancelled: parallel tool call errored'). This is confirmed production behavior (v2.1.158, issue #64247) and a known bug source \u2014 Opus 4.8 spirals on the synthetic cancel messages. Read/Grep errors do NOT cancel siblings.", "tool_result blocks for a parallel turn MUST be batched in a single user message and MUST come before any text blocks. Splitting results across messages or putting text first 'teaches' the model to stop using parallel tools and can cause HTTP 400.", "Permission rule precedence is deny \u2192 ask \u2192 allow (first match), REGARDLESS of specificity. A matching ask rule prompts even if a more specific allow matches. A deny at ANY settings level is absolute. Hook decisions do not bypass deny/ask rules; hook exit-code-2 blocks before rule eval.", "Bare deny rule (e.g. 'Bash') REMOVES the tool from model context entirely; scoped deny ('Bash(rm *)') keeps the tool visible and blocks only matching calls. Bash wildcard space sensitivity: 'Bash(ls *)' matches 'ls -la' not 'lsof'; 'Bash(ls*)' matches both. ':*' suffix == trailing ' *' but only at pattern end.", "Speculative execution during streaming: StreamingToolExecutor.addTool() is fire-and-forget (does not await processQueue) so response parsing never stalls; tools can finish before the model response completes. Abort-controller hierarchy is 3 levels (query\u2192sibling\u2192per-tool); per-tool abort bubbles to query controller unless reason is a sibling error (so permission denial ends the whole turn).", "FileReadTool is the ONLY built-in with maxResultSizeChars=Infinity (persisting Read output would loop). It self-bounds via token estimation. MCP default output token limit is 25000 (warn at 10000); a tool can raise to hard ceiling 500000 via _meta['anthropic/maxResultSizeChars'].", "assembleToolPool sorts built-ins and MCP tools alphabetically SEPARATELY then concatenates (built-ins prefix) to keep a stable prompt-cache breakpoint after the last built-in \u2014 flat-sorting all tools would invalidate cache when MCP servers change.", "Tool search/defer_loading (default-on for MCP): sends name+description only; model calls ToolSearch to load schema. Disabled by default on Vertex AI and when ANTHROPIC_BASE_URL is non-first-party. Requires tool_reference support (no Haiku). Calling a deferred tool un-triggered \u2192 Zod string-coercion failure + recovery hint.", "bypassPermissions (v2.1.126+) includes protected-path writes but rm -rf / and rm -rf ~ still prompt as a circuit breaker; refuses to start as root/sudo outside recognized sandboxes. auto mode classifier thresholds (consecutive 3 / total 20 blocks) are NOT configurable."], "openQuestions": ["Exact set and order of fields in the Zod input backfill / _simulatedSedEdit injection (only approximate from secondary source)", "Whether contextModifier queuing for concurrent batches is actually exercised by any current built-in (source comment says none are)", "Precise mapping of the auto-mode classifier's decision order vs the in-process 14-step pipeline (two slightly different orderings are described)", "Exact behavior when an orphaned tool_use from an interrupted parallel turn is repaired (placeholder tool_result content text)"], "sources": [{"title": "Handle tool calls \u2014 Claude API Docs", "url": "https://platform.claude.com/docs/en/agents-and-tools/tool-use/handle-tool-calls", "why": "Authoritative API contract: tool_use/tool_result block shapes, is_error, ordering rules (tool_result must immediately follow, must be first in user content, HTTP 400 cases)."}, {"title": "Parallel tool use \u2014 Claude API Docs", "url": "https://platform.claude.com/docs/en/agents-and-tools/tool-use/parallel-tool-use", "why": "disable_parallel_tool_use semantics, unordered execution, dependency recovery via is_error, single-user-message batching rule."}, {"title": "Ch 6. Tools \u2014 From Definition to Execution (Claude Code from Source)", "url": "https://claude-code-from-source.com/ch06-tools/", "why": "Best secondary source: 14-step checkPermissionsAndCallTool pipeline, buildTool fail-closed defaults, Tool interface (5 key members), ToolResult/ToolUseContext, registry assembleToolPool, deferred loading, per-tool maxResultSizeChars table."}, {"title": "Ch 7. Concurrent Tool Execution (Claude Code from Source)", "url": "https://claude-code-from-source.com/ch07-concurrency/", "why": "partitionToolCalls algorithm, streaming executor lifecycle (queued/executing/completed/yielded), mutual-exclusion admission, order-preservation, Bash-only sibling cascade, discard() escape hatch, per-tool concurrency table."}, {"title": "Configure permissions \u2014 Claude Code Docs", "url": "https://code.claude.com/docs/en/permissions", "why": "Official rule syntax: deny\u2192ask\u2192allow precedence, Bash wildcards (space-before-*, :* suffix), compound command splitting, process-wrapper stripping, Read/Edit gitignore anchors, WebFetch domain:, MCP mcp__server__tool rules, protected paths, settings precedence."}, {"title": "Choose a permission mode \u2014 Claude Code Docs", "url": "https://code.claude.com/docs/en/permission-modes", "why": "Six modes table (default/acceptEdits/plan/auto/dontAsk/bypassPermissions), what each auto-approves, auto-mode classifier thresholds (3 consecutive / 20 total), v2.1.126 protected-path change, rm -rf / circuit breaker, auto-mode model requirements."}, {"title": "Connect Claude Code to tools via MCP \u2014 Claude Code Docs", "url": "https://code.claude.com/docs/en/mcp", "why": "MCP tool naming mcp__server__tool (64-char cap, char substitution), plugin form mcp__plugin_X_Y__Z, MAX_MCP_OUTPUT_TOKENS=25000 default (warn 10000), _meta anthropic/maxResultSizeChars ceiling 500000, tool search/defer_loading (ENABLE_TOOL_SEARCH), JSON-RPC 2.0 tools/list + tools/call."}, {"title": "[Bug] Parallel tool calls cancel all siblings on single error (#64247)", "url": "https://github.com/anthropics/claude-code/issues/64247", "why": "Confirms exact behavior + version (v2.1.158): 'Cancelled: parallel tool call ... errored', isConcurrencySafe\u2192annotations.readOnlyHint, Bash-error sibling cascade."}, {"title": "Environment variables \u2014 Claude Code Docs", "url": "https://code.claude.com/docs/en/env-vars", "why": "Confirms CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY default 10 governs read-only tool + subagent parallelism."}, {"title": "toolOrchestration.ts (openonion/claude-code mirror)", "url": "https://github.com/openonion/claude-code/blob/main/src/services/tools/toolOrchestration.ts", "why": "Source confirmation of getMaxToolUseConcurrency() = parseInt(env.CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY)||10 and runToolsConcurrently signature."}], "summary": "Claude Code's tool-exec engine sits between the model's `tool_use` content blocks and the `tool_result` blocks returned to the API. Every tool call \u2014 built-in (Read/Edit/Bash/Grep/Agent) or MCP \u2014 flows through one uniform 14-step pipeline (`checkPermissionsAndCallTool`): lookup \u2192 abort-check \u2192 Zod input validation \u2192 semantic `validateInput` \u2192 speculative classifier start \u2192 input backfill \u2192 PreToolUse hooks \u2192 permission resolution (deny\u2192ask\u2192allow rules + tool.checkPermissions + mode + interactive prompt) \u2192 deny hooks \u2192 `call()` execution \u2192 result budgeting (persist oversize to `~/.claude/tool-results/{hash}.txt`) \u2192 PostToolUse hooks \u2192 append newMessages \u2192 classifyToolError. Concurrency runs two layers: a greedy `partitionToolCalls()` groups consecutive concurrency-safe calls into parallel batches (isolating unsafe calls into serial singletons), and a `StreamingToolExecutor` starts tools speculatively *while the model is still streaming* its response. Results are buffered and yielded in submission order (not completion order) so conversation history stays coherent. Permission gating is layered: PreToolUse hooks can short-circuit, then static allow/ask/deny rules (`Tool` or `Tool(specifier)` format), then tool-specific checks, then one of 7 modes (default/acceptEdits/plan/auto/dontAsk/bypassPermissions/bubble). MCP tools are registered as `mcp____` and are indistinguishable to the agent loop."}, "streaming-protocol": {"asOfDate": "2026-06", "claimsToVerify": ["stream-json output requires BOTH --verbose AND --include-partial-messages flags together to get token-level text_delta deltas; --output-format stream-json alone does NOT stream token deltas (it emits only complete assistant/system/result messages).", "The tool_use accumulation contract: content_block_start carries input:{} (empty object placeholder) as the FIRST event, real args arrive ONLY via content_block_delta with delta.type==\"input_json_delta\" and delta.partial_json (string), concatenated and parsed exactly once at content_block_stop. The object-vs-string type mismatch is by design.", "The headless final event is type==\"result\" with subtype \"result\" (or \"success\"/\"error\" variants) \u2014 NOT \"message_stop\". message_stop is the Messages-API SSE terminal event inside a stream_event, distinct from the ResultMessage that ends stream-json. Known bug #1920: missing result event hangs consumers.", "The exact ResultMessage JSON top-level fields are: type,result,subtype,session_id,is_error,duration_ms,duration_api_ms,num_turns,total_cost_usd,usage,stop_reason,structured_output.", "system/api_retry event fields: attempt (starts at 1), max_retries, retry_delay_ms, error_status (int or null), error category from {authentication_failed, oauth_org_not_allowed, billing_error, rate_limit, overloaded, invalid_request, model_not_found, server_error, max_output_tokens, unknown}."], "components": [{"config": "HTTP request: POST /v1/messages with body {\"stream\": true, ...}. Response Content-Type: text/event-stream. Headers: anthropic-version (e.g. 2023-06-01), x-api-key or Authorization: Bearer.", "dataModel": "Each SSE frame: two lines \u2014 `event: ` and `data: {\"type\":\"\", ...}` (the data.type MATCHES the SSE event name), blank line terminates. message_start.message has full Message skeleton {id, type:\"message\", role:\"assistant\", content:[], model, stop_reason:null, stop_sequence:null, usage:{input_tokens, output_tokens}}. content_block_start has {type:\"content_block_start\", index:int, content_block:{type:\"text\"|\"tool_use\"|\"thinking\"|\"server_tool_use\"|\"web_search_tool_result\", ...}}. For text: content_block={type:\"text\", text:\"\"}. For tool_use: content_block={type:\"tool_use\", id:\"toolu_...\", name:, input:{}} (input is EMPTY OBJECT placeholder). For thinking: {type:\"thinking\", thinking:\"\", signature:\"\"}. Deltas: text_delta {text}, input_json_delta {partial_json: }, thinking_delta {thinking}, signature_delta {signature}. message_delta: {delta:{stop_reason, stop_sequence}, usage:{output_tokens (cumulative)}}. message_stop: {type:\"message_stop\"} (empty data). ping: {type:\"ping\"}. error: {type:\"error\", error:{type:\"overloaded_error\", message:...}}.", "mechanism": "Sequence is STRICTLY ordered: (1) ONE message_start carrying the Message skeleton with empty content[]; (2) for each content block: ONE content_block_start (carries index + the content_block stub), zero or more content_block_delta events (each carries index + a typed delta), ONE content_block_stop (carries index only); (3) one or more message_delta events (top-level Message mutations \u2014 primarily stop_reason and cumulative usage); (4) ONE terminal message_stop. ping events may appear anywhere. Each content block's index maps to its final position in Message.content[]. Exception: server-side fallback emits a content_block_start/content_block_stop pair with NO deltas between. SSE wire format is `event: \\ndata: \\n\\n`. Unknown event types may be added \u2014 clients must handle gracefully.", "name": "Anthropic Messages API SSE streaming", "purpose": "The lowest transport layer: the raw server-sent events streamed back from POST /v1/messages with stream:true. Everything Claude Code / Agent SDK streams up to the user is derived from accumulating these events."}, {"dataModel": "Per-block accumulator state keyed by content-block index: map[int]string of concatenated partial_json. Final parsed value: tool_use.input is always an OBJECT (map), built by json.loads the accumulated string at content_block_stop.", "mechanism": "The accumulation contract (verbatim from docs): (1) On content_block_start with type==\"tool_use\", initialize `input_json = \"\"`; (2) for each content_block_delta with delta.type==\"input_json_delta\", append `input_json += event.delta.partial_json`; (3) on content_block_stop, parse `json.loads(input_json)`. The deliberate type mismatch \u2014 content_block_start.input is an empty OBJECT {}, but the deltas carry STRING partial_json \u2014 is by design: the object marks the slot, the deltas build the real value. A block can emit MANY deltas (sometimes dozens). Without eager_input_streaming the server buffers+validates whole values; current models emit at most one complete key+value per delta chunk, so there are visible pauses. With eager streaming, chunks arrive sooner, are longer, may straddle tokens, and the final string is NOT guaranteed valid JSON (max_tokens can truncate mid-value \u2014 must handle that and e.g. wrap in {\"INVALID_JSON\": \"\"} when feeding back as a tool error).", "name": "Fine-grained tool_use input streaming (partial JSON)", "purpose": "How the `input` field of a tool_use block is delivered incrementally so a client can render/act on partial args before the block closes."}, {"config": "ClaudeAgentOptions(include_partial_messages=True) (Python) / includePartialMessages:true (TypeScript). Required to receive any token-level data. Default False.", "dataModel": "@dataclass StreamEvent: { uuid: str; session_id: str; event: dict[str,Any] (the RAW Anthropic SSE event); parent_tool_use_id: str|None }. AssistantMessage: { content: list[ContentBlock]; model: str; parent_tool_use_id; error: AssistantMessageError|None }. SystemMessage: { subtype: str; data: dict }. ResultMessage: { subtype, duration_ms, duration_api_ms, is_error, num_turns, session_id, stop_reason, total_cost_usd, usage:dict, result:str, structured_output }. ContentBlock variants: TextBlock{text}, ToolUseBlock{id,name,input}, ThinkingBlock{thinking,signature}.", "mechanism": "The SDK wraps the bundled `claude` CLI as a subprocess and communicates via NDJSON over stdin/stdout (NOT a direct HTTP API call). With partial messages ENABLED, the SDK additionally yields a StreamEvent for every raw API SSE event, interleaved with the semantic messages. The flow: StreamEvent(message_start) -> StreamEvent(content_block_start/delta/stop) for each block -> StreamEvent(message_delta) -> StreamEvent(message_stop) -> AssistantMessage (the ACCUMULATED complete message) -> [tool executes] -> next turn's StreamEvents -> ... -> ResultMessage. To extract streaming text: check isinstance StreamEvent -> event.type==\"content_block_delta\" -> delta.type==\"text_delta\" -> delta.text. To track tool calls: content_block_start with content_block.type==\"tool_use\" gives .name; accumulate input_json_delta.partial_json; content_block_stop finalizes. To consume from the CLI directly: `claude -p ... --output-format stream-json --verbose --include-partial-messages` then each stdout line is a JSON object; the streaming lines have type==\"stream_event\" and an `event` field mirroring the raw SSE event.", "name": "Agent SDK message model + StreamEvent", "purpose": "The Python/TypeScript Agent SDK's typed message classes that wrap the raw SSE events and the conversation lifecycle."}, {"dataModel": "Every line: JSON object with `type` field. assistant: {type:\"assistant\", message:{content:[ContentBlock], model, ...}, uuid, session_id, parent_tool_use_id}. user: {type:\"user\", message:{role:\"user\", content:...}, uuid, session_id, parent_tool_use_id, tool_use_result}. stream_event: {type:\"stream_event\", event:{...raw SSE...}, uuid, session_id, parent_tool_use_id}. system/init: {type:\"system\", subtype:\"init\", session_id, model, tools, mcpServers, plugins, plugin_errors}. system/api_retry: {type:\"system\", subtype:\"api_retry\", attempt:int(>=1), max_retries:int, retry_delay_ms:int, error_status:int|null, error:, uuid, session_id}. system/compact_boundary (Python: SystemMessage subtype \"compact_boundary\"; TS: SDKCompactBoundaryMessage). result: {type:\"result\", subtype:\"result\"|\"success\"|\"error\", result:str, session_id, is_error:bool, duration_ms, duration_api_ms, num_turns, total_cost_usd, usage:{...}, stop_reason, structured_output}.", "mechanism": "`--output-format stream-json` makes `claude -p` emit NDJSON (one JSON object per line) on stdout as events occur, instead of a single batch payload. The FIRST event in the stream is system/init (unless CLAUDE_CODE_SYNC_PLUGIN_INSTALL is set, in which case system/plugin_install events precede it). Token-level deltas only appear if BOTH --verbose AND --include-partial-messages are passed; otherwise only complete assistant/user/result/system messages are emitted. When an API request fails with a retryable error, a system/api_retry event is emitted BEFORE the retry (use to surface retry progress / custom backoff). The LAST event is always a result message (type:\"result\") with the full cost/usage/turns metadata. Consumers MUST buffer bytes and split on newline because events can straddle chunk boundaries. The result event is the terminal sentinel \u2014 a known bug (issue #1920) is that the CLI sometimes fails to emit it, causing consumers to hang.", "name": "Headless CLI --output-format stream-json", "purpose": "The CLI surface for headless / CI / scripted streaming consumption of an agent run."}, {"config": "CLI flags for SDK subprocess: `--output-format stream-json --input-format stream-json --verbose` (required trio). Plus optionally: --permission-prompt-tool stdio (route perms via control protocol, NOT interactive), --setting-sources user,project,local, --system-prompt / --append-system-prompt, --permission-mode acceptEdits|dontAsk|..., --model, --no-session-persistence. Env: ANTHROPIC_API_KEY, CLAUDE_CODE_OAUTH_TOKEN, CLAUDE_CONFIG_DIR (default ~/.claude), CLAUDE_CODE_ENTRYPOINT (e.g. sdk-go), CLAUDE_AGENT_SDK_VERSION.", "dataModel": "control_request: {type:\"control_request\" (or \"sdk_control_request\"), request:{subtype, request_id, ...}}. initialize: {request:{subtype:\"initialize\", request_id, hooks:{:[{matcher, hook_callback_ids:[...]}]}, sdk_mcp_servers:[\"name\",...]}}. permission: {request:{subtype:\"permission\", request_id, tool_name, tool_input:dict}}. mcp_message: {request:{subtype:\"mcp_message\", request_id, server_name, message:{jsonrpc:\"2.0\", id, method, params}}}. control_response success: {type:\"control_response\", response:{subtype:\"success\", request_id, response:{...}}}. perm allow: response:{behavior:\"allow\"}. perm deny: response:{behavior:\"deny\", message}. mcp result: response:{mcp_response:{jsonrpc, id, result:{content:[{type:\"text\",text}], isError:bool}}}. control_response error: {response:{subtype:\"error\", request_id, error}}. SDK MCP handshake: initialize method -> {protocolVersion:\"2025-11-25\", capabilities:{tools:{listChanged:false}}, serverInfo:{name,version}}, then notifications/initialized, then tools/list.", "mechanism": "The SDK spawns the CLI with BOTH --input-format stream-json AND --output-format stream-json, so stdin AND stdout are NDJSON. stdin carries: (a) user turns \u2014 `{\"type\":\"user\",\"message\":{\"role\":\"user\",\"content\":...}}` one per line, generator-yielded for multi-turn; (b) control_response messages replying to CLI requests; (c) on connect (client mode) an initialize control_request registering hooks (PreToolUse/PostToolUse/UserPromptSubmit/Stop/SubagentStop/PreCompact with matcher globs) and sdk_mcp_servers. stdout carries assistant/user/result/stream_event/system messages PLUS control_request messages from the CLI: can_use_tool (permission), hook_callback, and mcp_message (invoke an in-process @tool / SDK MCP server tool). The CLI issues a JSON-RPC handshake against each SDK MCP server (initialize -> capabilities -> tools/list) before calling tools. SDK responses to mcp_message MUST wrap the JSON-RPC result in an `mcp_response` field (undocumented but required \u2014 missing it causes a 60s timeout). request_id multiplexes concurrent control requests. Writes must be newline-terminated + flushed; each JSON object on exactly one line. Close stdin for graceful shutdown; SIGTERM if it doesn't exit.", "name": "stdin/stdout NDJSON control protocol (SDK <-> CLI)", "purpose": "The bidirectional wire protocol between an SDK host process and the Claude Code CLI subprocess \u2014 used for permission callbacks, hooks, in-process SDK MCP tools, and streaming multi-turn input."}], "confidence": "high", "dimension": "streaming-protocol", "externalInterfaces": ["CLI flag: --output-format stream-json|json|text", "CLI flag: --input-format stream-json (enables stdin NDJSON control protocol)", "CLI flag: --include-partial-messages (enables token-level stream_event deltas)", "CLI flag: --verbose (REQUIRED with stream-json)", "CLI flag: --permission-prompt-tool stdio (route permissions over control protocol)", "CLI flag: --bare (skip hooks/skills/plugins/MCP/CLAUDE.md auto-load; recommended for SDK/CI; future default for -p)", "CLI flag: --json-schema + --output-format json (structured output -> result.structured_output)", "CLI flag: --setting-sources user,project,local", "CLI flag: --system-prompt / --append-system-prompt / --append-system-prompt-file", "CLI flag: --permission-mode acceptEdits|dontAsk|default|plan|bypassPermissions", "HTTP: POST https://api.anthropic.com/v1/messages body {\"stream\": true} -> Content-Type: text/event-stream", "Env: ANTHROPIC_API_KEY, CLAUDE_CODE_OAUTH_TOKEN, CLAUDE_CONFIG_DIR (default ~/.claude), CLAUDE_CODE_ENTRYPOINT, CLAUDE_AGENT_SDK_VERSION, CLAUDE_CODE_SYNC_PLUGIN_INSTALL", "Python SDK: query(prompt, options) async generator; ClaudeAgentOptions(include_partial_messages=True); ClaudeSDKClient.connect()", "Python types: from claude_agent_sdk.types import StreamEvent, UserMessage, AssistantMessage, SystemMessage, ResultMessage", "TypeScript SDK: @anthropic-ai/claude-agent-sdk; SDKPartialAssistantMessage {type:'stream_event'}; SDKMessage union; SDKUserMessage generator"], "keyBehaviors": ["stream-json output requires THREE flags together for token streaming: --output-format stream-json --verbose --include-partial-messages. Omit --include-partial-messages and you get only complete assistant/user/result/system lines (no per-token deltas). Omit --verbose and stream-json does not work.", "DELIBERATE type mismatch in tool_use streaming: content_block_start.input is an empty OBJECT {}, but each delta carries a STRING (partial_json). Do not assign deltas to .input; concatenate strings and parse once at content_block_stop. The empty object is just a slot marker.", "The `index` field on content_block_* events is the authoritative key into the final Message.content[] array. Multiple blocks (text, then tool_use, then text again) are distinguished by index, and the order of start/stop events preserves final array order.", "Usage in message_delta is CUMULATIVE (output_tokens grows), not incremental. message_start.usage has input_tokens + output_tokens:1 (placeholder). Final usage is read from the LAST message_delta before message_stop.", "The CLI emits a `result` (type:\"result\") message as the terminal event of a stream-json run \u2014 that is the sentinel a consumer waits on. Known bug (issue #1920): it is sometimes missing, hanging naive consumers.", "system/init is the first event (model, tools, mcpServers, plugins, plugin_errors). With CLAUDE_CODE_SYNC_PLUGIN_INSTALL set, system/plugin_install events (status: started/installed/failed/completed) precede system/init. Use plugins/plugin_errors fields to fail CI on a plugin that failed to load.", "system/api_retry carries: attempt (starts at 1), max_retries, retry_delay_ms, error_status (int OR null for connection errors with no HTTP response), and an error category enum: authentication_failed, oauth_org_not_allowed, billing_error, rate_limit, overloaded, invalid_request, model_not_found, server_error, max_output_tokens, unknown.", "Extended thinking: thinking_delta events build the .thinking text; a single signature_delta arrives JUST BEFORE content_block_stop carrying the signature used to verify block integrity. With thinking.display:\"omitted\", NO thinking_delta is sent \u2014 the block opens, gets one signature_delta, and closes. display:\"summarized\" streams a condensed summary.", "Fine-grained streaming (eager_input_streaming:true on a tool) can yield INVALID or partial JSON (especially if stop_reason is max_tokens, truncating mid-parameter). A robust consumer must tolerate parse failure and, when echoing the bad input back as a tool_result error, wrap it as {\"INVALID_JSON\":\"\"}.", "Error recovery differs by model family: Claude 4.5 and earlier \u2014 re-feed the partial response as an assistant message and resume. Claude 4.6 and later \u2014 instead send a USER message instructing the model to continue from where it left off (e.g. `Your previous response was interrupted and ended with X. Continue.`). Tool-use and thinking blocks CANNOT be partially recovered; resume from the most recent text block.", "server_tool_use / web_search_tool_result blocks are emitted inline in the SAME stream (index increments across them) for built-in tools like web_search_20250305. The web_search_tool_result block arrives as a content_block_start already containing the full content array (no deltas), then a content_block_stop.", "Piped stdin to `claude -p` is capped at 10MB (since v2.1.128) \u2014 over the cap the process exits non-zero. Background Bash tasks spawned during a -p run are terminated ~5s after the final result and stdin close (behavior since v2.1.163; before that a non-exiting bg process held the run open forever).", "Agent SDK message ordering with partials ON: StreamEvents for one assistant turn -> AssistantMessage (complete) -> [tool runs] -> next turn's StreamEvents -> ... -> ResultMessage. Without partials, the StreamEvents are suppressed but AssistantMessage/UserMessage/SystemMessage/ResultMessage still arrive.", "SDK subprocess control protocol: every control_response must echo the request_id; SDK MCP tool responses must wrap JSON-RPC result in `mcp_response` (undocumented, omission = 60s timeout). Each JSON message on stdin must be one line, newline-terminated, flushed. Close stdin to shut down gracefully.", "Compact boundary: when history is auto-compacted, Python emits a SystemMessage with subtype \"compact_boundary\"; TypeScript emits SDKCompactBoundaryMessage. A Go reimplementation must produce this boundary to keep SDK consumers in sync."], "openQuestions": ["Exact TS field names for the result envelope emitted by `--output-format json` (result, session_id, is_error, total_cost_usd, usage, num_turns, duration_ms, duration_api_ms, stop_reason, structured_output) \u2014 confirm against current TS SDKMessage definitions in @anthropic-ai/claude-agent-sdk rather than the Python dataclass shapes.", "Whether `claude -p --output-format stream-json` still REQUIRES --verbose in the latest 2.x (docs and the Go community doc both say yes, but exact current version gate unverified).", "Exact set and ordering of system/init fields emitted in stream-json (model, cwd, tools, mcpServers, plugins, plugin_errors, permissionMode, version) for a faithful Go replica \u2014 the docs only enumerate plugins/plugin_errors explicitly.", "The precise CLI exit codes for the 10MB stdin cap error and for the missing-result-event hang (not documented; only behavior described)."], "sources": [{"title": "Stream responses in real-time \u2014 Claude Code Docs (Agent SDK streaming-output)", "url": "https://code.claude.com/docs/en/agent-sdk/streaming-output", "why": "Authoritative: defines StreamEvent dataclass, include_partial_messages flag, message flow ordering, text_delta + input_json_delta accumulation examples."}, {"title": "Streaming messages \u2014 Claude API Docs (platform.claude.com)", "url": "https://platform.claude.com/docs/en/build-with-claude/streaming", "why": "Authoritative source for the raw SSE event flow: message_start, content_block_start/delta/stop, message_delta (cumulative usage), message_stop, ping, error; full text/tool/thinking/web_search wire examples; Claude 4.5 vs 4.6 error recovery."}, {"title": "Run Claude Code programmatically \u2014 Claude Code Docs (headless)", "url": "https://code.claude.com/docs/en/headless", "why": "Authoritative: --output-format text|json|stream-json, the --verbose + --include-partial-messages requirement, system/init, system/api_retry field table, system/plugin_install, the jq text-delta one-liner, --bare mode, 10MB stdin cap (v2.1.128), background-task exit (v2.1.163)."}, {"title": "Fine-grained tool streaming \u2014 Claude API Docs", "url": "https://platform.claude.com/docs/en/agents-and-tools/tool-use/fine-grained-tool-streaming", "why": "Authoritative: eager_input_streaming:true per-tool flag, the input:{} placeholder vs partial_json string contract, invalid-JSON handling and INVALID_JSON wrapper, max_tokens truncation behavior."}, {"title": "Message Types \u2014 Claude Agent SDK for Python", "url": "https://anthropics-claude-agent-sdk-python-82.mintlify.app/api/types/messages", "why": "Authoritative dataclass shapes for UserMessage, AssistantMessage (error enum), SystemMessage (subtype), ResultMessage (full field list: subtype, duration_ms, duration_api_ms, is_error, num_turns, session_id, stop_reason, total_cost_usd, usage, result, structured_output), StreamEvent (uuid/session_id/event/parent_tool_use_id), Task* messages."}, {"title": "Streaming Input \u2014 Claude Code Docs (streaming-vs-single-mode)", "url": "https://code.claude.com/docs/en/agent-sdk/streaming-vs-single-mode", "why": "Authoritative: SDKUserMessage generator shape for stdin stream-json, image content blocks, continue/resume, single-vs-streaming input mode limits."}, {"title": "Inside the Claude Agent SDK: From stdin/stdout Communication to Production", "url": "https://buildwithaws.substack.com/p/inside-the-claude-agent-sdk-from", "why": "Detailed (SDK v0.1.19) reverse-engineering of the subprocess NDJSON control protocol: can_use_tool / hook_callback control_request/response shapes, request_id multiplexing, the CLI invocation flags, and the initialize handshake."}, {"title": "claude-agent-sdk-go/docs/cli-protocol.md (GitHub)", "url": "https://github.com/Roasbeef/claude-agent-sdk-go/blob/main/docs/cli-protocol.md", "why": "Most precise wire-format reference for a Go reimplementation: exact control_request/control_response JSON for initialize, permission, mcp_message, the required mcp_response wrapper (undocumented), MCP handshake, error envelope, env vars, and shutdown semantics."}, {"title": "Claude Code stream-json: the output format that changes everything \u2014 Background Claude", "url": "https://backgroundclaude.com/blog/stream-json", "why": "Concrete confirmation of the three-flag rule, the system/api_retry shape, and a correct NDJSON line-buffering Node consumer (events straddle chunk boundaries)."}, {"title": "Missing Final Result Event in Streaming JSON Output \u2014 anthropics/claude-code #1920", "url": "https://github.com/anthropics/claude-code/issues/1920", "why": "Documents the known gotcha that the terminal {\"type\":\"result\",...} event is sometimes missing in stream-json, which any consumer must tolerate."}, {"title": "[BUG] stdout under --output-format stream-json stops \u2014 anthropics/claude-code #17248", "url": "https://github.com/anthropics/claude-code/issues/17248", "why": "Evidence of stream-json stdout stalls affecting automated consumers; relevant for a replica's reliability guarantees."}, {"title": "Handling invalid JSON in Anthropic's fine-grained tool streaming", "url": "https://andyjakubowski.com/engineering/handling-invalid-json-in-anthropic-fine-grained-tool-streaming", "why": "Reinforces that Anthropic (unlike OpenAI Structured Outputs) does NOT guarantee valid partial/final JSON under eager streaming, with concrete recovery patterns."}], "summary": "Claude Code's streaming protocol is layered across five distinct surfaces that a Go reimplementation must reproduce. (1) The Anthropic Messages API emits server-sent events (SSE) over an HTTP stream: a strict sequence of message_start -> [per content block: content_block_start -> content_block_delta(s) -> content_block_stop] -> message_delta (cumulative usage + stop_reason) -> message_stop, with interspersed ping/error events. (2) tool_use inputs stream as partial-JSON fragments via input_json_delta deltas whose partial_json strings must be concatenated and parsed once at content_block_stop; the content_block_start.input placeholder is an empty object {} by deliberate design, and the deltas are strings (a type mismatch re-implementors must handle). Fine-grained eager_input_streaming can deliver invalid/truncated JSON. (3) The Claude Agent SDK (Python/TypeScript) wraps the bundled CLI as a subprocess and communicates via newline-delimited JSON (NDJSON) over stdin/stdout; raw API SSE events are wrapped into a StreamEvent message (type \"stream_event\" / SDKPartialAssistantMessage) only when include_partial_messages/includePartialMessages is enabled, interleaved with semantic AssistantMessage/UserMessage/SystemMessage/ResultMessage objects. (4) Headless `claude -p --output-format stream-json --verbose --include-partial-messages` emits NDJSON on stdout where each line is one event; event types include system (with subtypes init/api_retry/compact_boundary/plugin_install), stream_event, assistant, user, result (terminal). (5) The SDK<->CLI control protocol is a bidirectional NDJSON stream over stdin/stdout with control_request/control_response messages for permission (can_use_tool), hooks, and in-process SDK MCP tool calls, multiplexed by request_id. The terminal sentinel of a stream-json run is a ResultMessage (type \"result\"), which is the single load-bearing contract for consumers."}, "session-transcript": {"asOfDate": "2026-06", "claimsToVerify": ["Default retention is exactly 30 days via cleanupPeriodDays, minimum 1, and 0 is rejected with a validation error (Simon Willison's 99999 trick delays it ~274 years; you cannot disable deletion, only delay it).", "The on-disk project directory is the absolute cwd with EVERY non-alphanumeric character replaced by a single '-' (e.g. /Users/me/proj -> -Users-me-proj); this applies to underscores and non-ASCII too, which causes collisions/fragmentation for non-ASCII paths.", "Every transcript line carries a parentUuid (not just uuid), forming a DAG/linked-list; compact_boundary records set parentUuid:null and carry logicalParentUuid referencing the now-erased pre-compaction last message, immediately followed by a user message with isCompactSummary:true whose content starts with 'This session is being continued from a previous conversation that ran out of context.'", "forkSession is NOT a byte copy: the SDK rewrites every sessionId field and remaps message UUIDs before appending under a new key; sessionStore cannot be combined with persistSession:false (throws) nor with enableFileCheckpointing (throws)."], "components": [{"config": "CLAUDE_CONFIG_DIR relocates the entire ~/.claude root. cleanupPeriodDays (settings.json, default 30, min 1, 0 rejected) sweeps stale files at startup and also sweeps orphaned subagent worktrees. CLAUDE_CODE_SKIP_PROMPT_HISTORY=1 / --no-session-persistence / persistSession:false suppress writes. There is no disable for cleanup, only delay (set 99999 for ~274 years).", "dataModel": "Path layout: $CLAUDE_CONFIG_DIR/projects//.jsonl + subagent sidecars under subagents/agent-.jsonl and file-history snapshots. Encoded-cwd = absolute cwd with every non-alphanumeric char replaced by '-' (e.g. /Users/me/proj -> -Users-me-proj); confirmed by docs and GitHub issues: non-ASCII chars collapse to '-' too (issue #19972), and even underscores get replaced (issue #39424), so two distinct paths can collide. session-id is a random UUID; the filename stem MUST equal the sessionId field on every line.", "mechanism": "On session start Claude Code derives an encoded directory name from the absolute working directory by replacing every non-alphanumeric character with '-' and creates (or opens) ~/.claude/projects//.jsonl. Each line is appended as a self-contained JSON object; the file is append-only and never truncated/rewritten. Resume resolves the encoded dir from cwd, then scans for the target session-id (or the most-recently-modified one for --continue). Moving a session with /cd (v2.1.169+) relocates the file into the new directory's project storage. Session-ID lookup is scoped to the current project dir + its git worktrees; a session created elsewhere yields 'No conversation found with session ID: '.", "name": "On-disk layout & project key encoding", "purpose": "Determines the physical path each session transcript is written to and how the directory name is derived from the working directory."}, {"config": "ISO-8601 UTC timestamps. version field carries the Claude Code release that wrote the line. gitBranch captured per-line for the Ctrl+B branch filter.", "dataModel": "{ type, uuid, parentUuid, sessionId, timestamp, cwd, version, gitBranch, plus type-specific fields }", "mechanism": "Every line carries type, uuid, parentUuid, sessionId, timestamp, plus optional cwd/version/gitBranch. uuid is a per-record identifier; parentUuid points to the PRECEDING record's uuid, building a linked list / directed-acyclic-graph (in practice a tree) \u2014 this is what makes resume, rewind, and fork possible. The first record's parentUuid is null. Because it's a DAG not a flat log, the same file can represent branching (forks written into a new file but sharing prefix uuids). On the SDK SessionStore path, entries are emitted as SessionStoreEntry objects = opaque JSON-safe values one-per-line.", "name": "Transcript entry schema (common fields)", "purpose": "Defines the shape of each JSONL line so the chain can be reconstructed for resume/rewind/fork."}, {"config": "userType distinguishes human vs system-injected. todos field persists the structured Task list state alongside the message. permissionMode records the session's permission level.", "dataModel": "{ type:'user'|'assistant', message:{ role, content, [usage, model, stop_reason, id] }, subtype, user/assistant-only fields }", "mechanism": "Type 'user': message.role='user', content is EITHER a plain string OR an array of content blocks; tool results come back as a block { type:'tool_result', tool_use_id, content:string|text/image-block-array, is_error }. Extra user fields: userType ('external' for human input), todos (current task-list snapshot), permissionMode. Type 'assistant': message is the full API response with model, role, content (array of {type:'text',text} / {type:'tool_use',id,name,input} / {type:'thinking'} blocks), stop_reason, usage, id; extra field requestId. Compaction summary is a user-typed line with isCompactSummary:true, isVisibleInTranscriptOnly:true and content beginning 'This session is being continued from a previous conversation that ran out of context.'", "name": "Message types: user & assistant", "purpose": "The two conversational record kinds; everything else is metadata around them."}, {"config": "Hook events keyed by hookEvent (PreToolUse/PostToolUse) and hookName (e.g. PostToolUse:Bash). queue-operation records input-buffered text.", "dataModel": "system subtype set includes: compact_boundary, stop_hook_summary, mirror_error (SDK sessionStore failure). progress.data: { type:'hook_progress', hookEvent, hookName, command }.", "mechanism": "Type 'system': carries subtype. Notable subtypes: 'compact_boundary' (the compaction marker \u2014 see Compaction component), 'stop_hook_summary' (end-of-turn hook results: hookCount, hookInfos[command+duration], hookErrors, preventedContinuation, stopReason), and (SDK mirror) 'mirror_error'. Type 'progress': hook execution events; data.type e.g. 'hook_progress', data.hookEvent (e.g. 'PostToolUse'), data.hookName (e.g. 'PostToolUse:Bash'), data.command. Type 'queue-operation': operation:'enqueue', content = queued user text while the assistant was mid-turn. Type 'file-history-snapshot': snapshot.trackedFileBackups = map of file path -> backup state, used by /rewind to restore file trees.", "name": "Metadata record types: system, progress, queue-operation, file-history-snapshot", "purpose": "Non-conversational events written into the same JSONL so the transcript is a complete execution log."}, {"config": "CLAUDE_CODE_AUTO_COMPACT_WINDOW + CLAUDE_AUTOCOMPACT_PCT_OVERRIDE tune the trigger. preTokens lets external tools know how close to the limit the session was.", "dataModel": "Boundary: { type:'system', subtype:'compact_boundary', logicalParentUuid, parentUuid:null, content:'Conversation compacted', compactMetadata:{ trigger:'auto'|'manual', preTokens:number } }", "mechanism": "When context approaches the model's limit (~167K observed), Claude Code writes a system record { type:'system', subtype:'compact_boundary', logicalParentUuid:, parentUuid:null, content:'Conversation compacted', compactMetadata:{ trigger:'auto'|'manual', preTokens: } }. The referenced pre-compaction uuids are dropped from the active context. Immediately after, it appends a synthetic user message with isCompactSummary:true, parentUuid pointing at the boundary uuid, content = an LLM-generated summary of everything so far. A single file can contain MANY boundaries (observed 5 in a 21-hour session, compacting ~every 2h). getSessionMessages returns the post-compaction chain only (e.g. 18 msgs from 503 raw entries); raw history must be read via store.load().", "name": "Compaction segments (within a single file)", "purpose": "Keeps long sessions running past the context window by periodically summarizing and resetting the active chain, while preserving the original transcript."}, {"config": "slug is the cross-file conversation identifier. Continuation prefix lines are byte-duplicates of parent's tail \u2014 dedup by sessionId.", "dataModel": "File d621b0b1.jsonl contains: lines[0..N] with sessionId=d8af951f (parent, skip as duplicates) then lines[N+1..] with sessionId=d621b0b1 (this file's own). shared slug across both files.", "mechanism": "Sometimes a fresh session-id file is created that logically continues an earlier session. The new file's first lines carry the PARENT session's sessionId (a byte-for-byte duplicate of the parent's trailing compact_boundary + messages), then at some line the sessionId switches to the new file's own id; that switch point's record has parentUuid bridging into the parent's last record. Detection is STRUCTURAL \u2014 there is no parentSessionId/resumedFrom field: extract session-id from the filename; if the first record's sessionId differs, the first id is the parent and only records whose sessionId == filename id belong to THIS file (prefix ones are duplicates to skip). A shared slug field (human-readable name, e.g. 'zesty-singing-newell') persists across continuations.", "name": "Cross-file session continuation (continuation files)", "purpose": "Allows a single logical conversation to span multiple JSONL files when a session is resumed into a new file."}, {"config": "Python SDK always persists; TypeScript-only persistSession:false for ephemeral. mirror_error system msg emitted (not retried) on append failure. SessionStore key includes subpath for sidecars.", "dataModel": "SessionKey={ projectKey:string, sessionId:string, subpath?:string }; subpath e.g. 'subagents/agent-' is opaque key suffix following on-disk layout.", "mechanism": "SDK options.sessionStore replaces/augments local storage. projectKey = the same stable filesystem-safe cwd encoding; sessionId = session uuid; subpath set for subagent/sidecar transcripts ('subagents/agent-'). append(key,entries[]) called after each local batch; load(key) called once before subprocess spawn on resume. Dual-write: Claude Code subprocess ALWAYS writes local disk first, then forwards the batch to append(). If append rejects/times out, error is logged and a {type:'system',subtype:'mirror_error'} is emitted into the iterator; query continues (local copy is durable); failed batches are NOT retried. load must return entries deep-equal to appended (byte-equal not required). forkSession rewrites all sessionId fields + remaps uuids, then appends under a new key (NOT a byte/copy-object shortcut). Cannot combine sessionStore with persistSession:false (throws) nor with enableFileCheckpointing (throws \u2014 file-history blobs are local-disk-only).", "name": "SessionStore mirror (SDK external storage)", "purpose": "Mirrors transcript lines to an external backend (S3/Redis/Postgres) so sessions resume across hosts; defines the formal append/load contract the Go impl should mirror."}, {"config": "Main file = main conversation. subagents/agent-.jsonl for each subagent. Permission decisions, summaries, and snapshots all sidecar'd under the same session dir.", "dataModel": "Sibling/sidecar files alongside .jsonl in the project dir; listSubkeys enumerates them for resume.", "mechanism": "Each subagent (Task tool) gets its own transcript at subpath 'subagents/agent-' (relative to the session directory). listSubagents requires the store's listSubkeys; getSubagentMessages uses listSubkeys when available else falls back to direct subpath. On resume, listSubkeys is called to restore subagent files; without it only the main transcript is materialized. Other sidecars include file-history snapshots for /rewind and the session summary. Subagent transcripts are excluded from --resume/--continue pickers and claude agents list when spawned under CLAUDE_CODE_CHILD_SESSION (v2.1.172+).", "name": "Subagent transcripts & sidecar files", "purpose": "Stores per-subagent conversation logs and supporting artifacts under the same project dir."}], "confidence": "high", "dimension": "session-transcript", "externalInterfaces": ["CLI flags: --continue (alias -c), --resume (alias -r) [], --fork-session, --from-pr , --no-session-persistence, -n ", "In-session commands: /resume [], /rename , /branch [], /rewind, /clear, /compact [instructions], /export [filename]", "Env vars: CLAUDE_CONFIG_DIR, CLAUDE_CODE_SKIP_PROMPT_HISTORY, CLAUDE_CODE_CHILD_SESSION (v2.1.172+), CLAUDE_CODE_FORCE_SESSION_PERSISTENCE, CLAUDE_CODE_AUTO_COMPACT_WINDOW, CLAUDE_AUTOCOMPACT_PCT_OVERRIDE", "settings.json keys: cleanupPeriodDays (default 30, min 1, 0 rejected)", "SDK options: resume:, continue:true, fork_session:true, persistSession:false, sessionStore, enableFileCheckpointing", "SDK result message fields: session_id, subtype; SystemMessage carries session id early (TS direct field, Python nested in data)", "SDK functions: listSessions(), getSessionInfo(), getSessionMessages(), renameSession(), tagSession(), deleteSession(), forkSession(), listSubagents(), getSubagentMessages()", "File path scheme: $CLAUDE_CONFIG_DIR/projects//.jsonl (+ subagents/agent-.jsonl)"], "keyBehaviors": ["project dir name = absolute cwd with EVERY non-alphanumeric char replaced by '-' (collapses underscores and non-ASCII, so non-ASCII paths fragment/collide \u2014 known issue #39424, #19972).", "--continue resumes most-recently-modified session for the current dir; --resume opens picker, or resumes by exact name (ambiguous name => picker with name prefilled) or by raw session-id. /resume on ambiguity ERRORS instead of opening picker.", "session-id lookup is scoped to current project dir + its git worktrees; --resume from a different cwd reports 'No conversation found with session ID: '. Session picker Ctrl+W widens to all worktrees, Ctrl+A to all projects.", "--fork-session + (--continue|--resume) OR /branch create a copy: prints BOTH new and original session ids, original stays in picker. 'Allow for this session' permissions do NOT carry into the fork. Resuming the same session in two terminals without forking INTERLEAVES into one transcript.", "Transcript file is append-only and never truncated/rewritten, even through /clear and compaction; /clear starts a fresh context but the old transcript remains resumable.", "Default cleanup: 30 days at startup; minimum 1; setting 0 is REJECTED with a validation error; you cannot disable deletion, only delay it (99999 ~= 274 years). cleanup also sweeps orphaned subagent worktrees.", "claude -p / Agent SDK sessions DO NOT appear in the session picker but are resumable by explicit id. Python SDK ALWAYS persists to disk; only TypeScript supports persistSession:false (in-memory only) and that cannot coexist with sessionStore.", "Compaction is detectable structurally: compact_boundary sets parentUuid:null + logicalParentUuid; the following user msg has isCompactSummary:true and content starting 'This session is being continued from a previous conversation that ran out of context.' Re-feeding isCompactSummary lines as real dialogue is a classic bug \u2014 skip them.", "Checkpoints (/rewind, double-Esc) revert CODE+conversation/conversation-only/code-only or summarize from/up to a point. Only edits via Claude's Write/Edit/NotebookEdit are tracked \u2014 Bash-driven file changes (rm/mv/cp) and external edits are NOT tracked. Original messages are always preserved in transcript even after summarize.", "CLAUDE_CODE_CHILD_SESSION (v2.1.172+) marks nested sessions and auto-excludes them from --resume/--continue/up-arrow history/agents list; CLAUDE_CODE_FORCE_SESSION_PERSISTENCE=1 overrides; honored on v2.1.169 and earlier, removed in v2.1.170-2.1.171."], "openQuestions": ["Exact set of all current system subtypes beyond compact_boundary / stop_hook_summary / mirror_error (e.g. tool approval, timing, init) \u2014 would require reading the latest claude-code-sdk source.", "Precise algorithm for slug generation (the human-readable name shared across continuation files) and where it is stored on each line.", "Exact JSON schema of file-history-snapshot.trackedFileBackups entries and how /rewind maps a snapshot to a restore point in the DAG.", "Whether sessionId lines that differ from the filename in a continuation file are byte-for-byte identical to the parent's tail or lightly transformed (the writeup claims byte-identical; confirm against source)."], "sources": [{"title": "Manage sessions - Claude Code Docs (code.claude.com)", "url": "https://code.claude.com/docs/en/sessions", "why": "Official source for --continue/--resume/--fork-session/--from-pr, /branch, /rewind, /rename, picker shortcuts (Ctrl+W/A/B), /export, and the exact transcript path ~/.claude/projects//.jsonl + cleanupPeriodDays default + CLAUDE_CONFIG_DIR."}, {"title": "How Claude Code Session Continuation Works - Massively Parallel Procrastination", "url": "https://blog.fsck.com/agent-blog/2026/02/22/claude-code-session-continuation/", "why": "Deepest technical source for the JSONL record schema (user/assistant/system/progress), parentUuid DAG, compact_boundary fields (logicalParentUuid, parentUuid:null, compactMetadata.trigger/preTokens), isCompactSummary, and cross-file continuation detection algorithm + slug field."}, {"title": "docs/claude-code-transcript-format.md - kent/consciousness forge", "url": "https://evilpiepirate.org/forge/kent/consciousness/src/commit/6a7ec9732b8f6964f07e112b27eda8b4fa6920f7/docs/claude-code-transcript-format.md", "why": "Concise field reference: common fields (uuid/parentUuid/sessionId/timestamp/cwd/version/gitBranch), tool_result content blocks, assistant usage/stop_reason/requestId, system subtypes (stop_hook_summary), progress/queue-operation/file-history-snapshot types, compaction segment model."}, {"title": "Persist sessions to external storage (SessionStore) - Claude Code Docs", "url": "https://code.claude.com/docs/en/agent-sdk/session-storage", "why": "Authoritative SessionKey/SessionStore/SessionStoreEntry contract, subpath 'subagents/agent-', dual-write-first-to-disk semantics, mirror_error, forkSession uuid-rewrite (not byte copy), persistSession:false incompatibility, getSessionMessages returns post-compaction chain."}, {"title": "Work with sessions (Agent SDK) - Claude Code Docs", "url": "https://code.claude.com/docs/en/agent-sdk/sessions", "why": "Official encoded-cwd rule (every non-alphanumeric char -> '-', /Users/me/proj -> -Users-me-proj), continue vs resume vs fork semantics, session_id on result/SystemMessage, resume-across-hosts mechanics."}, {"title": "Checkpointing - Claude Code Docs", "url": "https://code.claude.com/docs/en/checkpointing", "why": "Official /rewind behavior, checkpoint = per user prompt, persists across sessions, 30-day cleanup, only Write/Edit/NotebookEdit tracked (Bash/external not tracked), summarize from/up-to here."}, {"title": "Claude Code settings - Claude Code Docs", "url": "https://code.claude.com/docs/en/settings", "why": "Exact cleanupPeriodDays semantics: default 30, minimum 1, 0 rejected with validation error, also governs orphaned subagent worktree removal; worktree.baseRef/symlinkDirectories settings."}, {"title": "Environment variables - Claude Code Docs", "url": "https://code.claude.com/docs/en/env-vars", "why": "Definitive env-var surface: CLAUDE_CODE_SKIP_PROMPT_HISTORY, CLAUDE_CODE_CHILD_SESSION (v2.1.172+), CLAUDE_CODE_FORCE_SESSION_PERSISTENCE, CLAUDE_AUTOCOMPACT_PCT_OVERRIDE, CLAUDE_CODE_DEBUG_LOGS_DIR default ~/.claude/debug/.txt."}, {"title": "Don't let Claude Code delete your session logs - Simon Willison", "url": "https://simonwillison.net/2025/Oct/22/claude-code-logs/", "why": "Independently confirms ~/.claude/projects/encoded-directory/*.jsonl location, the 30-day deletion default (github issue 4172), and the cleanupPeriodDays:99999 workaround (cannot disable, only delay)."}, {"title": "[FEATURE/BUG] project path encoding - anthropics/claude-code#19972", "url": "https://github.com/anthropics/claude-code/issues/19972", "why": "Confirms the encoding replaces non-alphanumeric (and non-ASCII) chars with '-', causing collisions and readability loss for non-ASCII paths."}], "summary": "Claude Code persists every conversation as an append-only JSONL transcript, one file per session, at $CLAUDE_CONFIG_DIR/projects//.jsonl (default ~/.claude). Each line is one JSON object \u2014 a user message, assistant response, system event, hook progress, queued input, or file-history snapshot \u2014 and every record carries a uuid plus parentUuid, forming a DAG/linked-list rather than a flat log. Long sessions are split by \"compact_boundary\" segments that inject a synthetic summary user message and reset the parent chain; cross-file continuation is detected by a sessionId that changes mid-file while parentUuid bridges the gap. Resume (--continue/--resume ), fork (--fork-session or /branch), and rewind (/rewind, double-Esc) all operate by walking this parentUuid chain and (for code rewind) the file-history-snapshot entries. The SDK's SessionStore interface is a dual-write mirror of the same JSONL entries (local disk first, then append()) and cannot be combined with persistSession:false or enableFileCheckpointing."}, "context-compaction": {"asOfDate": "2026-06", "claimsToVerify": ["Auto-compact threshold = getEffectiveContextWindowSize(model) - 13,000, where effective window = contextWindow - min(maxOutputTokens, 20,000); CLAUDE_AUTOCOMPACT_PCT_OVERRIDE (1-100) overrides to min(floor(effective*pct/100), default threshold).", "Manual /compact summary request: same model + same system prompt + full history + summarization instruction as final user msg, with thinkingConfig={type:disabled} and maxOutputTokensOverride=20,000; the post-compaction continuation message is a USER message containing the + as plain text plus the transcriptPath pointer.", "API microcompact uses clear_tool_uses_20250919 with DEFAULT_MAX_INPUT_TOKENS=180,000 trigger and DEFAULT_TARGET_INPUT_TOKENS=40,000 (clear_at_least = 140,000); clear_thinking_20251015 with keep:'all' is emitted whenever hasThinking && !isRedactThinkingActive.", "Client-side microcompact constants: g3Y=40,000 protected token window, F3Y=3 always-protected recent tool results, B3Y=20,000 minimum savings threshold; clearable tools = Bash, Read, Glob, Grep, WebFetch, WebSearch.", "Prompt cache TTL: Claude Code requests 1-hour TTL automatically on Claude subscriptions (drops to 5-min on usage credits); API key/Bedrock/Vertex/Foundry default 5-min; FORCE_PROMPT_CACHING_5M=1 forces 5-min; ENABLE_PROMPT_CACHING_1H=1 opts into 1-hour on API key."], "components": [{"config": "Env: CLAUDE_CODE_AUTO_COMPACT_WINDOW (int>0, clamps effective window down), CLAUDE_AUTOCOMPACT_PCT_OVERRIDE (float 1-100, returns min(percentageThreshold, base)), DISABLE_COMPACT (disables ALL incl /compact), DISABLE_AUTO_COMPACT (auto only, /compact works), CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE (int>0, overrides blocking limit), CLAUDE_CODE_MAX_OUTPUT_TOKENS. Settings.json: autoCompactEnabled (bool). Feature flags (ant-only, wrapped in feature()): REACTIVE_COMPACT (gate tengu_cobalt_raccoon -> reactive only, suppress proactive), CONTEXT_COLLAPSE (separate headroom system owns 90%/95% gates).", "dataModel": "AutoCompactTrackingState = {compacted: bool, turnCounter: number, turnId: string, consecutiveFailures?: number}. RecompactionInfo = {isRecompactionInChain: bool, turnsSincePreviousCompact: number, previousCompactTurnId, autoCompactThreshold, querySource}. calculateTokenWarningState returns {percentLeft, isAboveWarningThreshold, isAboveErrorThreshold, isAboveAutoCompactThreshold, isAtBlockingLimit}.", "mechanism": "After each turn completes, shouldAutoCompact() is invoked in the query loop. It short-circuits false for forked-agent query sources ('session_memory', 'compact', and 'marble_origami' under CONTEXT_COLLAPSE). If disabled via env/config, returns false. Under feature('REACTIVE_COMPACT') or CONTEXT_COLLAPSE, proactive auto-compact is suppressed and reactiveCompact handles the API 413. Otherwise: tokenCount = tokenCountWithEstimation(messages) - snipTokensFreed; compares against getAutoCompactThreshold(model). If above threshold: autoCompactIfNeeded() first tries trySessionMemoryCompaction (no-LLM, reuses stored memory); if that fails, calls compactConversation(messages, ctx, cacheSafeParams, suppressUserQuestions=true, customInstructions=undefined, isAutoCompact=true, recompactionInfo). MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES=3 circuit breaker stops retrying doomed compactions (added 2026-03-10 to stop ~250K wasted API calls/day). On success, runPostCompactCleanup + setLastSummarizedMessageId(undefined) + notifyCompaction (reset cache-read baseline).", "name": "Auto-compact trigger & threshold (getAutoCompactThreshold / shouldAutoCompact / autoCompactIfNeeded)", "purpose": "Decides when to fire full conversation compaction, based on actual token usage from the API response vs a computed threshold."}, {"config": "Env: CLAUDE_CODE_MAX_OUTPUT_TOKENS (overrides model max output). Constants hardcoded in autoCompact.ts: AUTOCOMPACT_BUFFER_TOKENS=13_000, WARNING_THRESHOLD_BUFFER_TOKENS=20_000, ERROR_THRESHOLD_BUFFER_TOKENS=20_000, MANUAL_COMPACT_BUFFER_TOKENS=3_000, MAX_OUTPUT_TOKENS_FOR_SUMMARY=20_000.", "dataModel": "Constants (v2.1.68 / current autoCompact.ts): MAX_OUTPUT_TOKENS_FOR_SUMMARY=20_000; AUTOCOMPACT_BUFFER_TOKENS=13_000; WARNING_THRESHOLD_BUFFER_TOKENS=20_000; ERROR_THRESHOLD_BUFFER_TOKENS=20_000; MANUAL_COMPACT_BUFFER_TOKENS=3_000; MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES=3.", "mechanism": "getEffectiveContextWindowSize(model): contextWindow = getContextWindowForModel(model, getSdkBetas()) (200k standard, or 1M for [1m]/extended models: Opus 4.6+, Sonnet 4.6, Fable 5); if CLAUDE_CODE_AUTO_COMPACT_WINDOW set & valid, contextWindow = min(contextWindow, parsed); return contextWindow - reservedTokensForSummary where reservedTokensForSummary = min(getMaxOutputTokensForModel(model), 20_000). getAutoCompactThreshold(model): base = effectiveWindow - 13_000; if CLAUDE_AUTOCOMPACT_PCT_OVERRIDE (float 1-100) set, return min(floor(effectiveWindow*pct/100), base). Blocking limit (hard stop) = effectiveWindow - 3_000 (or CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE). Warning shown at threshold - 20_000.", "name": "Effective context window & buffers", "purpose": "Computes the usable context size by subtracting reserved output space and safety buffers from the raw model context window."}, {"config": "Env: DISABLE_MICROCOMPACT. NOTE: in shipped CC, tool-result clearing via clear_tool_uses_20250919 is ant-only (gated on process.env.USER_TYPE==='ant' AND USE_API_CLEAR_TOOL_RESULTS / USE_API_CLEAR_TOOL_USES); the thinking-block strategy is always emitted when thinking is active.", "dataModel": "ContextEditStrategy union: {type:'clear_tool_uses_20250919', trigger:{type:'input_tokens',value}, keep:{type:'tool_uses',value}, clear_tool_inputs?, exclude_tools?, clear_at_least?} | {type:'clear_thinking_20251015', keep:{type:'thinking_turns',value}|'all'}. TOOLS_CLEARABLE_RESULTS = SHELL_TOOL_NAMES + Glob + Grep + Read + WebFetch + WebSearch. TOOLS_CLEARABLE_USES = FileEdit + FileWrite + NotebookEdit. Response: context_management.applied_edits[] with cleared_tool_uses/cleared_input_tokens.", "mechanism": "getAPIContextManagement({hasThinking, isRedactThinkingActive, clearAllThinking}): if hasThinking && !isRedactThinkingActive, push {type:'clear_thinking_20251015', keep: clearAllThinking ? {thinking_turns:1} : 'all'}. Tool clearing is ant-only: if USER_TYPE==='ant' && (USE_API_CLEAR_TOOL_RESULTS || USE_API_CLEAR_TOOL_USES): push {type:'clear_tool_uses_20250919', trigger:{input_tokens: API_MAX_INPUT_TOKENS ?? 180_000}, clear_at_least:{input_tokens: trigger - keepTarget}, clear_tool_inputs: TOOLS_CLEARABLE_RESULTS} and/or the uses variant (exclude_tools: TOOLS_CLEARABLE_USES). API_MAX_INPUT_TOKENS default 180_000, API_TARGET_INPUT_TOKENS default 40_000. clear_thinking_20251015 must be listed first in edits[]. Beta header: context-management-2025-06-27.", "name": "API-based microcompact (apiMicrocompact.ts -> clear_tool_uses_20250919 / clear_thinking_20251015)", "purpose": "Server-side context-editing strategies attached to every request via context_management.edits[] \u2014 the native path that mirrors client microcompact behavior."}, {"config": "Env: DISABLE_MICROCOMPACT. Constants (v2.1.68 deobf): g3Y=40_000, F3Y=3, B3Y=20_000, eV8=2_000.", "dataModel": "U96 = Set cleared IDs (persists across turns). Cleared tool result replaced with string '[Tool result cleared]' (or written to temp file with re-read instruction). Images/documents -> '[image]' / '[document]'.", "mechanism": "Function Rg() runs during message serialization before each API call. Triggered when isAboveWarningThreshold AND clearable tool-result tokens > 20k. Algorithm: (1) find tool_use/tool_result pairs for eligible tools (bash, read_file, grep, glob, web_fetch, web_search); (2) always keep last F3Y=3 tool results; (3) scan backwards accumulating tool-result sizes until > g3Y=40k counted; (4) everything beyond that 40k window is eligible; (5) if eligible tokens > B3Y=20k, strip them (result -> '[Tool result cleared]', images/docs -> '[image]'/'[document]'); (6) cleared tool IDs tracked in U96 set across turns. NO LLM call.", "name": "Client-side microcompact (legacy in-memory, Rg())", "purpose": "In-process tool-result pruning that runs inline during message serialization (no LLM, no API context_management), the fallback when API strategies unavailable."}, {"dataModel": "9 sections: Primary Request/Intent; Key Technical Concepts; Files & Code Sections (with snippets); Errors & fixes; Problem Solving; All user messages (non-tool); Pending Tasks; Current Work; Optional Next Step (verbatim quotes). CompactionResult = {boundaryMarker, summaryMessages, attachments, hookResults}. Usage.iterations[] = {type:'compaction'|'message', input_tokens, output_tokens}.", "mechanism": "compactConversation(): (1) Run PreCompact hooks (can inject custom instructions); (2) check session memory (QP1) \u2014 if a stored summary exists and fits, skip the LLM; (3) build API request = full history + system prompt (same as conversation) + summary prompt as a final USER message, using mainLoopModel, thinkingConfig:{type:'disabled'}, maxOutputTokensOverride=20_000, tools = read_file only; (4) stream response, extract ... block (the model first emits an block for its own reasoning, then the ); (5) clear readFileState; (6) re-inject recently-read files (bM4), plan file (IP1), skills (uM4), plan-mode (mM4); (7) run session-start hooks; (8) return {boundaryMarker:'Conversation compacted', summaryMessages, attachments, hookResults}. The summary request SHARES the prefix with the live conversation, so it reads the existing cache rather than reprocessing history. Server-side variant: beta compact-2026-01-12, context_management.edits=[{type:'compact_20260112'}], returns a 'compaction' content block; API drops all blocks before it on subsequent requests.", "name": "Manual /compact & full compaction (compactConversation / bG6)", "purpose": "LLM-based summarization that replaces the entire message history with a structured summary. Same code path for auto and manual; manual can take custom focus instructions and scope (partial)."}, {"dataModel": "Continuation message = USER role with: intro line, plain-text analysis block, plain-text summary block, optional transcriptPath pointer, optional 'Recent messages preserved verbatim', optional auto-compact tail instruction.", "mechanism": "After compaction, history is rebuilt as: [boundaryMarker message 'Conversation compacted'][summaryMessage JQ6 containing analysis+summary as plain text][messagesToKeep (partial /compact only)][attachments: re-injected files/skills/plan][hookResults: session-start outputs]. JQ6 text: 'This session is being continued from a previous conversation that ran out of context. The summary below covers the earlier portion...' followed by the analysis and summary blocks, then 'If you need specific details from before compaction... read the full transcript at: {transcriptPath}', and for auto-compact: 'Please continue the conversation from where we left off without asking the user any further questions. Continue with the last task.'", "name": "Continuation message & post-compaction reconstruction (JQ6)", "purpose": "The user-role message injected as the first item of the new history after a compaction, framing the summary and pointing to the full transcript."}, {"config": "For sharing cache across machines (Agent SDK), suppress per-machine system-prompt sections (working dir, platform, etc.).", "dataModel": "Layers: System prompt (core instructions, tool defs, output style) | Project context (CLAUDE.md, auto memory, unscoped rules) | Conversation (messages, results). cache key includes model + effort level + fast-mode header. current_usage fields: cache_creation_input_tokens, cache_read_input_tokens.", "mechanism": "cache_control breakpoint at end of system prompt keeps the system prompt cached separately so a compaction summary write doesn't invalidate it. Up to 4 breakpoints allowed. TTL selection: on Claude subscription, CC auto-requests 1h TTL (drops to 5m when over plan limit, drawing usage credits); on API key/Bedrock/Vertex/Foundry/Claude Platform on AWS, default 5m, opt into 1h via ENABLE_PROMPT_CACHING_1H=1; FORCE_PROMPT_CACHING_5M=1 forces 5m regardless. Cache scope is per machine+directory (system prompt embeds cwd, platform, shell, OS version, auto-memory paths, branch, recent commits). Subagents use 5m TTL even on subscription; forks inherit parent prefix and read parent cache.", "name": "Prompt cache layering & breakpoints (cache_control)", "purpose": "How Claude Code orders the request and places cache_control breakpoints to maximize prefix reuse and minimize invalidation."}, {"dataModel": "Invocation counter per skill; total bytes counter; oldest-first eviction. Re-injection keys: skills (capped), CLAUDE.md (re-read from disk), auto memory (re-read from disk).", "mechanism": "At session start: system prompt + tool definitions + project-root CLAUDE.md + user-level CLAUDE.md + auto memory load once (held in memory, ~2-5k tokens typical; recommendation: keep CLAUDE.md <200 lines / ~2-2.5k tokens). After compaction: system prompt & output style unchanged (not message history); project-root CLAUDE.md + unscoped rules re-injected from disk; auto memory re-injected from disk; path-scoped rules (paths: frontmatter) LOST until a matching file is read again; nested CLAUDE.md LOST until a file in that subdir is read; invoked skill bodies re-injected, capped at 5,000 tokens/skill and 25,000 total, oldest dropped first (truncation keeps the start of SKILL.md). Manual /compact with focus instructions lets the user steer what survives.", "name": "System-prompt & project-context token budgeting", "purpose": "Controls what fills the fixed prefix vs the compaction-volatile conversation layer, and what survives compaction."}, {"config": "compaction_control deprecated in Python/TS/Ruby SDKs in favor of server-side compact_20260112.", "dataModel": "Server stop_reason='compaction'. context_management.original_input_tokens vs input_tokens (after edits). token-count endpoint applies existing compaction blocks but triggers no new compaction.", "mechanism": "Server-side (beta compact-2026-01-12, context_management.edits with type:'compact_20260112'): trigger default 150k (min 50k), pause_after_compaction to inject extra blocks, custom instructions fully replace default prompt, supports streaming (single compaction_delta event), returns usage.iterations[] (compaction + message iterations; top-level usage excludes compaction iteration). SDK client-side (tool_runner, compaction_control \u2014 DEPRECATED in favor of server-side): threshold default 100k, optional separate summary model, injects summary prompt as user turn, replaces history with ..., can use a cheaper summary model (server-side cannot). Token-count note: cache_read_input_tokens from server tools (web search) can inflate perceived usage and trigger premature compaction.", "name": "Server-side compaction vs SDK compaction (compact_20260112)", "purpose": "Two API-level compaction modes: server-side (recommended, beta) vs SDK client-side (deprecated compaction_control)."}], "confidence": "high", "dimension": "context-compaction", "externalInterfaces": ["Anthropic API beta header: compact-2026-01-12 (server-side compaction, compact_20260112 edit in context_management.edits)", "Anthropic API beta header: context-management-2025-06-27 (clear_tool_uses_20250919, clear_thinking_20251015)", "API request field: context_management.edits = [ContextEditStrategy...] (compaction, clear_tool_uses, clear_thinking)", "API response field: context_management.applied_edits[] (cleared_tool_uses, cleared_thinking_turns, cleared_input_tokens)", "API response: content block type 'compaction' (stop_reason 'compaction'); streaming content_block_delta type 'compaction_delta'", "API response: usage.iterations[] = [{type:'compaction'|'message', input_tokens, output_tokens}]", "API: cache_control = {type:'ephemeral', ttl:'5m'|'1h'} on system prompt / messages / compaction blocks (max 4 breakpoints)", "Slash command: /compact [instructions] (full or partial from message index)", "Slash command: /context (live breakdown by category)", "Slash command: /clear (full reset, reloads startup)", "Slash command: /memory (show loaded CLAUDE.md + auto memory)", "Settings.json key: autoCompactEnabled (bool)", "Env vars: DISABLE_COMPACT, DISABLE_AUTO_COMPACT, DISABLE_MICROCOMPACT, DISABLE_PROMPT_CACHING[_HAIKU|_SONNET|_OPUS|_FABLE], ENABLE_PROMPT_CACHING_1H, FORCE_PROMPT_CACHING_5M, CLAUDE_AUTOCOMPACT_PCT_OVERRIDE, CLAUDE_CODE_AUTO_COMPACT_WINDOW, CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE, CLAUDE_CODE_MAX_OUTPUT_TOKENS, CLAUDE_AFTER_LAST_COMPACT", "PreCompact hook (injects custom instructions into summary prompt)", "sessionMemory / transcript files (transcriptPath pointer in JQ6 continuation message)"], "keyBehaviors": ["DEFAULT AUTO-COMPACT THRESHOLD (the headline number a re-implementor must get right): effectiveWindow - 13,000, where effectiveWindow = contextWindow - min(maxOutputTokens, 20,000). For a 200k model with 8192 max output: 200,000 - 8,192 - 13,000 = 178,808 (~89.4%). For a 1M model: ~987k. The buffer of 13k was DROPPED from an earlier 20k/33k/45k in early-2026 changes; current constant is 13,000.", "TOKEN SOURCE FOR THE TRIGGER: must use ACTUAL token count from the API response (input_tokens + cache_creation_input_tokens + cache_read_input_tokens + output_tokens), NOT a client-side estimate. shouldAutoCompact does use tokenCountWithEstimation for the proactive check, but the authoritative numbers come from the API usage object. Using estimates will mis-fire.", "BLOCKING LIMIT (hard stop) = effectiveWindow - 3,000. This is where the session truly cannot proceed. Below autocompact threshold but above warning threshold, microcompact fires. There are 5 distinct token states: normal / above warning (threshold-20k) / above error / above autocompact (threshold) / at blocking limit (effectiveWindow-3k).", "MICROCOMPACT IS NON-LLM: client-side microcompact (Rg) does pure in-memory string replacement ('[Tool result cleared]') and never calls the model. It runs INLINE during message serialization before every API call, can fire in the same turn as full compaction, and tracks cleared tool IDs in a persistent set U96. Constants: protect last 40k tokens of tool results, always keep last 3 tool results, only act if >20k tokens clearable.", "API-BASED MICROCOMPACT IS ANT-ONLY for tool clearing: clear_tool_uses_20250919 strategy is gated behind process.env.USER_TYPE==='ant' AND USE_API_CLEAR_TOOL_RESULTS/USES. The clear_thinking_20251015 strategy (keep:'all') IS shipped to everyone when extended thinking is active. The beta header is context-management-2025-06-27. A 1h-idle condition sets clearAllThinking -> keep only last thinking turn (value:1, since schema requires >=1).", "COMPACT INVOKES THE MODEL WITH thinking DISABLED and maxOutputTokens capped at 20,000, tools = read_file only. Extended thinking is turned off during the summarization sub-call. The summary request reuses the SAME system prompt + history prefix so it gets a cache hit (the slow part is generation, not cache miss).", "CIRCUIT BREAKER: MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES=3. After 3 consecutive failed auto-compacts (e.g. irrecoverable prompt_too_long), CC stops trying for the rest of the session. Added 2026-03-10 because 1,279 sessions had 50+ consecutive failures (up to 3,272), wasting ~250K API calls/day.", "RECOMPACTON METADATA is threaded through: isRecompactionInChain (was the previous turn already a compaction?), turnsSincePreviousCompact, previousCompactTurnId. This lets the summarization prompt know it is summarizing an already-summarized history.", "COMPACT CAN FAIL if the model calls a tool during summarization instead of writing a summary -> returns compaction block with content:null (server-side) or throws 'Failed to generate conversation summary' (client). Workaround: custom instructions explicitly telling the model not to call tools.", "CACHE INVALIDATION LIST (a re-impl must replicate exactly): switching models, changing effort level (/effort), enabling fast mode (header is cache key, fixed to persist across toggles in v2.1.86+), connecting/disconnecting an MCP server whose tools load into prefix (deferred tools are safe), enabling/disabling a plugin with MCP servers, denying an entire tool via bare-name deny rule, compacting, upgrading Claude Code. Cache-SAFE: file edits, editing CLAUDE.md mid-session (doesn't apply until restart), changing output style, changing permission mode, invoking skills/commands (append-only), /recap, /rewind, spawning subagents.", "TTL LOGIC: subscription auth -> 1h auto (drops to 5m when over limit using credits); API key/Bedrock/Vertex/Foundry -> 5m default, ENABLE_PROMPT_CACHING_1H=1 for 1h; FORCE_PROMPT_CACHING_5M=1 forces 5m everywhere. Subagents ALWAYS 5m even on subscription. Forks inherit parent cache. Cache scope = per machine+directory (system prompt embeds cwd/platform/shell/OS/branch/recent-commits).", "WHAT SURVIVES COMPACTION (exact table): system prompt + output style = unchanged; project-root CLAUDE.md + unscoped rules + auto memory = re-injected from disk; path-scoped rules (paths: frontmatter) = LOST until matching file read; nested subdir CLAUDE.md = LOST until file in subdir read; invoked skills = re-injected capped 5,000 tokens/skill, 25,000 total, oldest dropped first, truncation keeps TOP of SKILL.md; hooks = N/A (run as code).", "SESSION MEMORY COMPACTION is tried FIRST (no LLM) before the full compactConversation path \u2014 if a stored session-memory summary exists and fits, it's reused. Cache-sharing feature flag tengu_compact_cache_prefix tries to reuse a compaction result cached from another session with the same conversation prefix. Streaming retry flag tengu_compact_streaming_retry retries compaction on stream failure.", "REACTIVE COMPACT (feature('REACTIVE_COMPACT'), gate tengu_cobalt_raccoon, ant-only): suppresses proactive auto-compact and instead lets the API return prompt_too_long (413), then reactiveCompact handles it as a fallback (it consults isAutoCompactEnabled directly, bypassing the suppression).", "CONTEXT COLLAPSE (feature('CONTEXT_COLLAPSE')): a separate headroom system with 90% commit-start / 95% blocking-spawn gates. When enabled, autocompact is suppressed (would race collapse at ~93% effective). marble_origami (ctx-agent) query source is also excluded from autocompact because runPostCompactCleanup would destroy the main thread's committed log."], "openQuestions": ["Exact current value of the autocompact buffer in the very latest shipped version (sources show 13,000 as of v2.1.68 / early 2026; community write-ups reference an older 20k/33k/45k progression \u2014 a re-impl should treat 13,000 as the constant but verify against the installed package).", "Whether server-side compact_20260112 is actually wired into shipped Claude Code yet, or whether CC still uses the client-side LLM-summarization path (compactConversation) as of mid-2026 \u2014 the API feature is beta and the SDK compaction_control is deprecated, but CC's own usage is not publicly confirmed.", "The exact set of tools eligible for client-side microcompact clearing in the current build (deobf v2.1.68 lists bash, read_file, grep, glob, web_fetch, web_search + edit/write/notebook for the uses path; whether TodoWrite, Task, etc. are now included).", "Exact behavior of 'snip' (snipTokensFreed parameter) \u2014 a separate pruning mechanism whose rough-delta is subtracted from the token estimate; its trigger and algorithm are not fully documented.", "Whether the 1M context window now requires a beta header or [1m] model variant on Opus 4.6+/Sonnet 4.6 (sources say GA/no-beta as of the 1M GA announcement, but Bedrock/Vertex still gate it behind model selection)."], "sources": [{"title": "Compaction - Claude API Docs (server-side compact_20260112)", "url": "https://platform.claude.com/docs/en/build-with-claude/compaction", "why": "Official server-side compaction spec: beta header compact-2026-01-12, trigger default 150k, pause_after_compaction, custom instructions, compaction block handling, usage.iterations, cache_control on compaction blocks, streaming events, model-list (Opus 4.8/Sonnet 4.6), limitations (tool-call-during-summary)."}, {"title": "autoCompact.ts source (deobfuscated) - alex000kim/claude-code", "url": "https://github.com/alex000kim/claude-code/blob/main/src/services/compact/autoCompact.ts", "why": "Authoritative source for exact thresholds/buffers/env vars: MAX_OUTPUT_TOKENS_FOR_SUMMARY=20000, AUTOCOMPACT_BUFFER_TOKENS=13000, WARNING/ERROR=20000, MANUAL_COMPACT=3000, MAX_CONSECUTIVE_FAILURES=3, getEffectiveContextWindowSize, getAutoCompactThreshold, calculateTokenWarningState, isAutoCompactEnabled, shouldAutoCompact, circuit breaker, CLAUDE_CODE_AUTO_COMPACT_WINDOW, CLAUDE_AUTOCOMPACT_PCT_OVERRIDE, CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE, DISABLE_COMPACT/DISABLE_AUTO_COMPACT, REACTIVE_COMPACT and CONTEXT_COLLAPSE feature gating."}, {"title": "Claude Code compaction deep dive v2.1.68 (deobfuscated gist)", "url": "https://gist.github.com/sam-saffron-jarvis/9d8e291c4e696ac7948702d6c4884448", "why": "Deobfuscated v2.1.68 details: the 5 mechanisms table, exact full-compact/partial-compact/sub-agent prompts, JQ6 continuation message, client-side microcompact constants (g3Y=40000, F3Y=3, B3Y=20000, eV8=2000), bG6() flow, post-compaction re-injection, edge cases, full env-var table."}, {"title": "Context editing - Claude API Docs (clear_tool_uses_20250919 / clear_thinking_20251015)", "url": "https://platform.claude.com/docs/en/build-with-claude/context-editing", "why": "Official server-side context-editing spec: beta header context-management-2025-06-27, strategy params (trigger default 100k, keep default 3 tool uses, clear_at_least, exclude_tools, clear_tool_inputs), thinking clearing keep model-class defaults (Opus 4.5+/Sonnet 4.6+ keep all), cache invalidation rules, applied_edits response, token-count endpoint, SDK compaction_control deprecation + defaults (100k, custom model, summary prompt)."}, {"title": "How Claude Code uses prompt caching - Claude Code Docs", "url": "https://code.claude.com/docs/en/prompt-caching", "why": "Official cache layering: prefix-match rule, 3-layer order (system prompt / project context / conversation), exhaustive invalidation list, cache-safe list, TTL selection (subscription=1h auto, API key=5m, ENABLE_PROMPT_CACHING_1H, FORCE_PROMPT_CACHING_5M), cache scope per machine+directory, subagent/fork cache behavior, cache token fields."}, {"title": "Explore the context window - Claude Code Docs", "url": "https://code.claude.com/docs/en/context-window", "why": "Official what-survives-compaction table (system prompt unchanged, CLAUDE.md/auto-memory re-injected from disk, path-scoped rules & nested CLAUDE.md lost, skills re-injected capped 5,000/skill + 25,000 total oldest-first), /context and /memory commands, 1M context on Fable 5/Opus 4.6+/Sonnet 4.6."}, {"title": "apiMicrocompact.ts source (API context-management strategies)", "url": "https://claude-code-os.vercel.app/docs/claude-src/file/services/compact/apiMicrocompact.ts", "why": "Source for getAPIContextManagement: DEFAULT_MAX_INPUT_TOKENS=180_000, DEFAULT_TARGET_INPUT_TOKENS=40_000, clear_thinking_20251015 keep:'all' vs clearAllThinking keep:{thinking_turns:1}, TOOLS_CLEARABLE_RESULTS (shell/glob/grep/read/webfetch/websearch) and TOOLS_CLEARABLE_USES (edit/write/notebook), ant-only gating (USER_TYPE==='ant' + USE_API_CLEAR_TOOL_RESULTS/USES), env API_MAX_INPUT_TOKENS/API_TARGET_INPUT_TOKENS."}], "summary": "Claude Code (latest v2.1.68+ as of mid-2026) manages a finite context window through a layered pipeline: (1) a client-side microcompact that runs inline before every API call to strip old tool results without an LLM, (2) an optional API-native \"cached microcompact\" using the new clear_tool_uses_20250919 / clear_thinking_20251015 context-editing strategies (beta, ant-only for tool clearing, GA for thinking), (3) a full auto-compact that fires when actual token usage crosses getAutoCompactThreshold() = effectiveWindow - 13,000 tokens (effectiveWindow = contextWindow - min(maxOutputTokens, 20,000)), and (4) a manual /compact command that reuses the same compactConversation() path with optional custom focus instructions and optional partial scope. Compaction sends the full history + a structured 9-section summarization prompt (which first wraps analysis in tags then a block) to the SAME mainLoopModel with thinkingConfig disabled and maxOutputTokens capped at 20,000, then replaces history with [boundaryMarker][continuation message][kept messages][re-injected files/skills/plan]. The system prompt layer is cached separately (cache_control breakpoint at end of system prompt) so it survives compaction; the conversation layer is rebuilt from the summary. Prompt cache TTL is 5-minute by default on API keys and 1-hour on Claude subscriptions (auto-selected), with up to 4 cache_control breakpoints. Server-side compaction (beta compact-2026-01-12) is a newer API-native alternative that returns a \"compaction\" content block; Claude Code's client-side path is the legacy but still-primary mechanism."}, "system-prompt-assembly": {"asOfDate": "2026-06", "claimsToVerify": ["EXACT BOUNDARY MARKER: the cache-boundary marker is the literal string __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__ inserted into the system prompt array and stripped before the API call (not sent to the model). Verify against leaked src/constants/prompts.ts and src/utils/api.ts.", "EXACT SECTION COUNT/ORDER: getSystemPrompt() concatenates ~18 ordered sections; sections 1-12 (CLI prefix, Intro, System Rules, Doing Tasks, Actions, Using Tools, Tone/Style, Output Efficiency, etc.) are static+cacheable, and everything from Environment Info onward (Env Info, Scratchpad, Function Result Clearing, MCP Instructions, Memory, CLAUDE.md, Append) is dynamic per-session. Verify exact order and which are gated by feature flags (TOKEN_BUDGET, CACHED_MICROCOMPACT, etc.).", "CLAUDE.md IS NOT IN THE SYSTEM PROMPT: official docs state CLAUDE.md/CLAUDE.local.md content is injected into the conversation as a USER message (project context), not into the system prompt; it therefore does NOT affect system-prompt cache entries. The exception is excludeDynamicSections (TS) / exclude_dynamic_sections (Python), added claude-agent-sdk v0.2.98 / v0.1.58, which moves the env-info block from the system prompt into the first user message."], "components": [{"config": "systemPrompt: { type:'preset', preset:'claude_code', append?:string, excludeDynamicSections?:boolean } (TS); system_prompt={'type':'preset','preset':'claude_code','append':...} (Python). Custom: systemPrompt: string. None => minimal default. excludeDynamicSections added v0.2.98 (TS) / v0.1.58 (Python). CLI flags: --append-system-prompt, --exclude-dynamic-system-prompt-sections, --system-prompt. Env: CLAUDE_CODE_SIMPLE truthy => single-line minimal prompt.", "dataModel": "Priority tiers: 0 Override, 1 Coordinator (feature active => toolset stripped to Agent+TaskStop+SendMessage), 2 mainThreadAgentDefinition (proactive: append; else replace), 3 --system-prompt CLI (replace), 4 Default = getSystemPrompt(). The branded SystemPrompt type prevents passing raw string[] to the API.", "mechanism": "buildEffectiveSystemPrompt() resolves which prompt base is used via a strict priority ladder: (0) overrideSystemPrompt non-empty replaces everything; (1) COORDINATOR_MODE feature => dedicated coordinator prompt (strips toolset to Agent + TaskStop + SendMessage); (2) mainThreadAgentDefinition exists => proactive mode appends to default, else replaces; (3) --system-prompt CLI arg replaces default; (4) default = full getSystemPrompt() output. The SDK exposes three starting points: minimal default (omitted systemPrompt), claude_code preset (object {type:'preset',preset:'claude_code', append?:string, excludeDynamicSections?:boolean}), or a custom string.", "name": "Effective Prompt Resolution (priority system)", "purpose": "Decides the final prompt base before per-turn assembly."}, {"config": "Gates: ask_user_enabled, non_interactive (omits shell-shortcut section in SDK/headless), agent_tool_enabled (+ fork_subagent + explore_plan_agents), skills_enabled (+ experimental_skill_search), verification_agent, memory_configured, user_type_ant, language_set, output_style, mcp_connected (+ mcp_delta_mode), scratchpad_enabled, microcompact_enabled, token_budget, kairos_brief, is_git_repo & !remote & git_instructions_enabled, append_system_prompt.", "dataModel": "Sections registered via systemPromptSection(name, compute) [cached, invalidated only on /clear or /compact] or DANGEROUS_uncachedSystemPromptSection(name, compute, reason) [recomputed every turn \u2014 used for getMcpInstructionsSection, Env Info]. clearSystemPromptSections() invalidates the memo AND clears beta-header latches.", "mechanism": "Static zone (cacheable, scope 'global'): 1 CLI System Prefix ('You are Claude Code, Anthropic's official CLI for Claude.'), 2 Intro (interactive vs headless swaps 'assist' for 'complete'), 3 Cyber Risk Instruction, 4 URL Safety ('NEVER generate or guess URLs'), 5 System Rules (output format, prompt-injection defense, system-reminder handling, compaction), 6 Doing Tasks (anti-YAGNI; conditional on output_style keepCodingInstructions), 7 Executing Actions (LOW/MEDIUM/HIGH blast-radius taxonomy; always-confirm set: rm -rf/DROP TABLE, git push/publish, migrations/force-push), 8 Using Your Tools (prefer dedicated tools Read/Edit/Glob/Grep over Bash; varies by repl_mode/embedded_search/task_tool_enabled), 9 Tone & Style (no emojis; varies user_type_external), 10 Output Efficiency (internal 'between-tool calls \u226425 words' vs external 'go straight to the point'), 11 Token Budget (GATED on feature('TOKEN_BUDGET')), 12 Proactive/KAIROS (GATED on feature('PROACTIVE')). Then the cache boundary marker, then the Dynamic zone (scope 'org' or uncached): 13 Env Info (cwd, isGit, platform, shell, osVersion, model name, knowledge cutoff; varies undercover/worktree), 14 Scratchpad, 15 Function Result Clearing (microcompact_enabled; '5 most recent results always kept'), 16 Summarize Tool Results, 17 MCP Server Instructions (DANGEROUS_uncached \u2014 recomputed every turn), 18 Memory, plus Language, Output Style, Git Status Snapshot (current branch / recent commits / working tree \u2014 snapshot in time), Numeric Length Anchors (user_type_ant), Brief (kairos_brief), and Append System Prompt at the very end.", "name": "getSystemPrompt() \u2014 section factory", "purpose": "The core factory that concatenates ~18 ordered sections split by a cache boundary."}, {"config": "Env var sources: osType, osVersion, osRelease (platform runtime), getCwd(), getIsGit(). CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1 loads CLAUDE.md/rules from --add-dir paths.", "dataModel": "Env fields read: osType, osVersion, osRelease, getCwd(), getIsGit(). The gitStatus block carries currentBranch, mainBranch (default branch for PRs), gitUser, and a working-tree status string + recent commits list.", "mechanism": "Env Info is a DANGEROUS_uncachedSystemPromptSection recomputed per turn. It reads osType/osVersion/osRelease, getCwd(), getIsGit(). A separate 'Git Status Snapshot' block (gated is_git_repo && not remote && git_instructions_enabled) injects current branch, default (main) branch, git user, and a working-tree status with recent commits. The whole env block is what breaks the prefix cache for the static zone \u2014 excludeDynamicSections moves it into the first user message instead.", "name": "Environment / System Context section", "purpose": "Inject cwd, platform, shell, model, OS version, git status so the model knows its execution environment."}, {"config": "settingSources / setting_sources controls whether 'project' and 'user' files load (default both enabled). CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1 loads memory from --add-dir paths. claudeMdExcludes (glob, arrays merge across layers) skips files. --setting-sources may exclude 'local'.", "dataModel": "Discovery order: managed policy (cannot be excluded) -> ~/.claude/CLAUDE.md -> ancestor dirs root-down (CLAUDE.md then CLAUDE.local.md at each level) -> ./CLAUDE.md or ./.claude/CLAUDE.md -> ./CLAUDE.local.md. .claude/rules/*.md (no paths frontmatter) join at CLAUDE.md priority; path-scoped rules (paths: glob YAML) load on file read. HTML block comments stripped (code-block comments preserved). Imports expanded recursively up to 4 hops. Auto-memory MEMORY.md first 200 lines or 25KB loaded; topic files on demand only.", "mechanism": "IMPORTANT asymmetry: in the Agent SDK CLAUDE.md is NOT injected into the system prompt \u2014 the SDK reads it and injects it as a USER message (project context) alongside the conversation. Per the memory docs: 'CLAUDE.md content is delivered as a user message after the system prompt, not as part of the system prompt itself.' Resolution walks up the directory tree from cwd collecting CLAUDE.md and CLAUDE.local.md, concatenating root-down with .local appended after .md at each level. Managed policy CLAUDE.md (/Library/Application Support/ClaudeCode/CLAUDE.md on macOS, /etc/claude-code/ on Linux, C:\\Program Files\\ClaudeCode\\ on Windows) loads first and cannot be excluded. @path imports resolve relative to the importing file with max depth 4 hops. Subdirectory CLAUDE.md files load lazily when Claude reads files there. Project-root CLAUDE.md is re-injected after /compact.", "name": "CLAUDE.md cascade (memory)", "purpose": "Persistent project/user/org instructions, loaded per session and lazily."}, {"config": "mcp_connected gate; mcp_delta_mode toggles per-turn attachment vs inline. Instructions are re-fetched because tools/list can change (MCP list_changed).", "dataModel": "instructions: string from InitializeResult. Per-server section header '## '. Composite prompt text assembled under '# MCP Server Instructions'.", "mechanism": "When MCP servers are connected, each server's instructions field (returned in InitializeResult during the initialize handshake) is injected as a '# MCP Server Instructions' section, one subsection per server, in the dynamic/uncached zone (DANGEROUS_uncachedSystemPromptSection => recomputed every turn). If mcp_delta_mode is enabled, instructions are delivered as a per-turn attachment instead of inline in the system prompt. Empty/missing instructions are omitted.", "name": "MCP Server Instructions injection", "purpose": "Inject per-server 'how to use this server' guidance into the dynamic prompt zone."}, {"config": "Boundaries: UserPromptSubmit default timeout lowered to 30s; MessageDisplay 10s. Tokens/effort injected as $CLAUDE_EFFORT env and effort:{level} in hook JSON. Managed hooks survive disableAllHooks from lower layers.", "dataModel": "Output schema: { continue?:bool, stopReason?:string, suppressOutput?:bool, systemMessage?:string, terminalSequence?:string(allowlist OSC 0/1/2/9/99/777 + BEL), decision?:'block', reason?:string, hookSpecificOutput:{ hookEventName, permissionDecision?:'allow'|'deny'|'ask', permissionDecisionReason?, additionalContext?, retry?:bool } }. additionalContext/systemMessage/plain stdout capped 10,000 chars; overflow => file + preview. Exit codes: 0 success (JSON parsed), 2 blocking error (stderr fed to Claude), other = non-blocking. HTTP: 2xx+body=JSON, non-2xx=non-blocking.", "mechanism": "Five handler types: command (stdin JSON / stdout+exit), http (POST body / 2xx response JSON), mcp_tool (calls a tool on a connected server; text output treated as command stdout), prompt (single-turn Claude yes/no), agent (spawns a tool-using subagent). The additionalContext field in hookSpecificOutput is wrapped by Claude Code in a tag and inserted at a position determined by the firing event: SessionStart/Setup/SubagentStart => start of conversation before first prompt; UserPromptSubmit/UserPromptExpansion => alongside submitted prompt; PreToolUse/PostToolUse/PostToolUseFailure/PostToolBatch => next to the tool result; Stop/SubagentStop => end of turn. Matches: 'Claude Code wraps the string in a system reminder and inserts it into the conversation at the point where the hook fired.' Exit 0 with stdout on UserPromptSubmit/UserPromptExpansion/SessionStart also adds the text as Claude-visible context (these three events only). Exit 2 blocks per the per-event blocking table.", "name": "Hook injection (system-reminder wrapping)", "purpose": "Run user-defined shell/HTTP/MCP/prompt/agent interceptors at lifecycle events and inject their output as model-visible reminders."}, {"config": "Matched by tool name. Settings keys: hooks.[].matcher, hooks[].if (permission-rule syntax), disableAllHooks, allowManagedHooksOnly, once (skill-frontmatter only). Hook sources: ~/.claude/settings.json, .claude/settings.json, .claude/settings.local.json, managed policy, plugin hooks/hooks.json, skill/agent frontmatter.", "dataModel": "Input: { session_id, transcript_path, cwd, permission_mode:'default'|'plan'|'acceptEdits'|'auto'|'dontAsk'|'bypassPermissions', effort:{level}, hook_event_name, plus event-specific (tool_name, tool_input). agent_id/agent_type added in subagents. Output: permissionDecision allow/deny/ask + reason (PreToolUse), retry:bool (PermissionDenied), additionalContext (model-facing), systemMessage (user-facing warning), suppressOutput, terminalSequence, continue:false + stopReason.", "mechanism": "Tool-event hooks (PreToolUse, PostToolUse, PostToolUseFailure, PermissionRequest, PermissionDenied) match by tool_name. matcher rules: '*' / '' / omitted => all; only [A-Za-z0-9_|] => exact or |-separated exact list; any other char => JS regex. MCP tools are named mcp____; match-all-from-server needs mcp____.* (the .* makes it a regex; bare mcp__memory is treated as exact string and matches nothing). Optional per-handler 'if' uses permission-rule syntax (e.g. Bash(rm *), Edit(*.ts)) and only evaluates on tool events. SessionStart matches startup|resume|clear|compact; InstructionsLoaded matches session_start|nested_traversal|path_glob_match|include|compact.", "name": "Hook event matchers & tool-name namespacing", "purpose": "Filter which hooks fire for which tool/event."}, {"config": "Todo tracking built into Agent SDK (TaskCreate/TaskUpdate/TaskList). Plan mode is permission_mode:'plan'. Reminders are non-system-prompt context \u2014 they appear as tags in the message stream.", "dataModel": "Reminders are blocks attached as attachments to user messages (not stored in the system prompt array).", "mechanism": "These are NOT part of the system prompt. They are injected as attachments appended to user messages each turn: (a) todo/task state ('The task tools haven't been used recently... consider using TaskCreate'), (b) active plan-mode ('plan only, do not code yet'), (c) auto-surfaced relevant skills ('Skills relevant to your task:'), (d) hook-produced additionalContext, (e) git/file-change diff reminders after tool edits. They are wrapped in tags and the model is instructed (via System Rules section) to read and apply them.", "name": "Dynamic reminders: todo / plan mode / skill surfacing", "purpose": "Steer the model mid-conversation without rebuilding the system prompt."}], "confidence": "high", "dimension": "system-prompt-assembly", "externalInterfaces": ["SDK (TS): systemPrompt: {type:'preset',preset:'claude_code',append?,excludeDynamicSections?}", "SDK (Python): system_prompt={'type':'preset','preset':'claude_code','append':...,'exclude_dynamic_sections':bool}", "SDK: settingSources=['user','project'] / setting_sources=['user','project'] (empty array disables CLAUDE.md)", "SDK: settings.outputStyle (string) selects ~/.claude/output-styles/.md", "CLI flags: --append-system-prompt, --system-prompt, --exclude-dynamic-system-prompt-sections, --add-dir, --setting-sources", "Env: CLAUDE_CODE_SIMPLE, CLAUDE_CODE_USE_BEDROCK/VERTEX/OPENAI, CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD, CLAUDE_CODE_DISABLE_AUTO_MEMORY", "Managed CLAUDE.md paths: /Library/Application Support/ClaudeCode/CLAUDE.md (macOS), /etc/claude-code/CLAUDE.md (Linux/WSL), C:\\Program Files\\ClaudeCode\\CLAUDE.md (Windows)", "settings.json keys: claudeMd, claudeMdExcludes (glob array), autoMemoryEnabled, autoMemoryDirectory, outputStyle, hooks.{Event}[]", "Output styles: ~/.claude/output-styles/*.md and .claude/output-styles/*.md with frontmatter name/description/keep-coding-instructions", "Hook config JSON: hooks.[].matcher + [].hooks[].{type,command/args|url|server+tool|prompt,if,timeout,async,asyncRewake,statusMessage,once}", "Internal TS functions: getSystemPrompt(), buildEffectiveSystemPrompt(), systemPromptSection(), DANGEROUS_uncachedSystemPromptSection(), clearSystemPromptSections(), splitSysPromptPrefix(), normalizeMessagesForAPI()", "Type: branded SystemPrompt = string[] & {__brand:'SystemPrompt'}", "Cache-control scopes: 'global' (cross-org) and 'org' (per-org)"], "keyBehaviors": ["CLAUDE.md lives in the CONVERSATION (user message), not the system prompt, in the Agent SDK \u2014 it does not affect the system-prompt cache entry. The env-info block (cwd/platform/git/shell/model) DOES live in the system prompt and is what normally prevents cache reuse across directories.", "excludeDynamicSections moves the env-info block into the FIRST USER MESSAGE so the system prompt (preset + append) becomes byte-identical across users/machines and shares a cache entry. Tradeoff: text in a user message carries marginally less weight than in the system prompt. Requires claude-agent-sdk TS v0.2.98 / Python v0.1.58.", "Three caching modes in splitSysPromptPrefix(): Mode 1 (MCP present) => no global cache, whole prompt scope 'org' because MCP tool defs change; Mode 2 (1P default, no MCP) => split at boundary, static=scope 'global' (cross-org cacheable), dynamic=uncached; Mode 3 (3P providers Bedrock/Vertex/OpenAI) => whole prefix scope 'org'.", "The boundary marker __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__ is inserted into the prompt array but REMOVED before sending to the API \u2014 the model never sees it. It exists only so splitSysPromptPrefix can find the split point.", "systemPromptSection() memoizes compute results and is only cleared by /clear or /compact (clearSystemPromptSections also clears beta-header latches). DANGEROUS_uncachedSystemPromptSection forces per-turn recompute and is deliberately named to discourage use \u2014 reserved for genuinely per-turn content (MCP instructions, env info).", "Output styles: a custom output style by DEFAULT REPLACES the preset's software-engineering instructions; set keep-coding-instructions: true in frontmatter to layer on top instead. Stored in ~/.claude/output-styles/ (user) or .claude/output-styles/ (project). Loaded via settingSources user/project. Python SDK has no programmatic outputStyle selector.", "CLAUDE.md loading is gated by settingSources \u2014 an empty array disables CLAUDE.md entirely even though the claude_code preset is active. 'project' loads ./CLAUDE.md or ./.claude/CLAUDE.md; 'user' loads ~/.claude/CLAUDE.md.", "CLAUDE.md import depth is capped at 4 hops; relative @paths resolve against the importing file, not cwd. Block HTML comments are stripped before injection (code-block comments preserved). Subdirectory CLAUDE.md files load lazily on file reads, not at launch.", "Auto-memory MEMORY.md: only first 200 lines OR 25KB (whichever first) loaded at session start; topic files loaded on demand. Storage at ~/.claude/projects//memory/, shared across worktrees of one git repo. Requires Claude Code v2.1.59+. Toggle: autoMemoryEnabled setting, CLAUDE_CODE_DISABLE_AUTO_MEMORY=1, or /memory UI.", "managed-policy CLAUDE.md cannot be excluded by claudeMdExcludes and cannot be disabled \u2014 it always applies. The claudeMd key in managed-settings.json is an alternative to deploying a managed CLAUDE.md file (only honored in managed/policy settings).", "Git Status Snapshot injected only when is_git_repo && not remote && git_instructions_enabled. It is explicitly a 'snapshot in time' and the prompt warns it will not update during the conversation.", "MCP server instructions come from the instructions field of the MCP InitializeResult; Claude Code injects them as a per-server subsection. If mcp_delta_mode is on, they are attached per-turn instead. Because MCP tool lists can change (list_changed), the MCP instructions section is DANGEROUS_uncached.", "Hook additionalContext/systemMessage/plain stdout are CAPPED at 10,000 chars; overflow is written to a file and replaced with a preview + path. additionalContext is wrapped in a tag and inserted at the event-appropriate position (start of convo / alongside prompt / next to tool result / end of turn) \u2014 it is model-visible but not shown as a chat message.", "Exit code 2 is the ONLY blocking signal for most hook events (exit 1 = non-blocking error, action proceeds). UserPromptSubmit exit 2 erases the prompt; PreToolUse exit 2 blocks the tool; Stop exit 2 keeps Claude going. JSON output is only parsed on exit 0.", "As of v2.1.139 command hooks run without a controlling terminal on macOS/Linux (/dev/tty unavailable); use terminalSequence JSON field (allowlisted OSC 0/1/2/9/99/777 + BEL, v2.1.141+) for notifications instead.", "For OpenAI-compatible providers, normalizeMessagesForAPI() flattens the SystemPrompt[] by joining with \\n\\n into a single 'system' role message and strips cache_control / Anthropic beta headers.", "Plan mode injects an attachment to user messages ('plan only, do not code yet') and is reflected as permission_mode:'plan' in hook input. Plan mode actually writes plan markdown files then wipes the planning context before execution."], "openQuestions": ["Exact byte content / wording of the 12 static sections in the CURRENT (2026) public build \u2014 Piebald-AI repo tracks this per version; should be sampled directly from the target version for a 1:1 replica.", "Full current set of feature-flag gates (TOKEN_BUDGET, CACHED_MICROCOMPACT, PROACTIVE/KAIROS, COORDINATOR_MODE, experimental_skill_search, verification_agent, fork_subagent, explore_plan_agents, undercover) and their default on/off state per build.", "Precise wording of the env-info template line (Working directory / Is a git repository / Platform / Shell / OS Version / model name / knowledge cutoff) and whether 'date' is still injected in 2026 builds.", "Whether managed-policy and ~/.claude/CLAUDE.md are injected into the SYSTEM PROMPT (as the CLI does) or only the user message (as the SDK does) \u2014 the two surfaces diverge; the Go replica must pick per surface.", "Exact implementation of mcp_delta_mode (per-turn attachment format) and scratchpad path scheme."], "sources": [{"title": "Modifying system prompts \u2014 Claude Code Docs (official)", "url": "https://code.claude.com/docs/en/agent-sdk/modifying-system-prompts", "why": "Authoritative: preset/append/custom/excludeDynamicSections, CLAUDE.md goes to conversation not system prompt, excludeDynamicSections min versions (TS v0.2.98 / Python v0.1.58), what env fields embed in the prompt and break cache."}, {"title": "How Claude remembers your project \u2014 Claude Code Docs (official)", "url": "https://code.claude.com/docs/en/memory", "why": "Authoritative CLAUDE.md cascade: 4 scopes + load order, ancestor walk, CLAUDE.local.md appended per level, @import max depth 4, HTML comment stripping, /compact re-injection of project root, claudeMdExcludes, managed CLAUDE.md paths, auto-memory first-200-lines/25KB cap."}, {"title": "Hooks reference \u2014 Claude Code Docs (official)", "url": "https://code.claude.com/docs/en/hooks", "why": "Authoritative hook lifecycle, all 30 events, matcher semantics (exact vs regex), mcp____ namespacing, 5 handler types, JSON output schema (additionalContext/systemMessage/permissionDecision/decision block/terminalSequence), exit-2 blocking, 10k char cap, wrapping and insertion-point rules."}, {"title": "System Prompt Assembly \u2014 DeepWiki (claude-code-best, indexed 2026-06-12)", "url": "https://deepwiki.com/claude-code-best/claude-code/2.3-system-prompt-assembly", "why": "Reverse-engineered from leaked source: getSystemPrompt() in src/constants/prompts.ts, branded SystemPrompt type, SYSTEM_PROMPT_DYNAMIC_BOUNDARY marker removed pre-send, systemPromptSection vs DANGEROUS_uncachedSystemPromptSection, buildEffectiveSystemPrompt priority ladder, splitSysPromptPrefix 3 cache modes, CLAUDE_CODE_SIMPLE fast path."}, {"title": "How Claude Code Builds Its System Prompt \u2014 18 Layers (Cadences)", "url": "https://codex.cadences.app/en/blog/claude-code-system-prompt/", "why": "Independent corroboration of the 18 ordered sections, static/dynamic boundary placement at section 12-13, anti-YAGNI section content, risk taxonomy LOW/MED/HIGH, conditional feature-flag gates (TOKEN_BUDGET, PROACTIVE/KAIROS, CACHED_MICROCOMPACT, COORDINATOR_MODE)."}, {"title": "How Claude Code Builds a System Prompt \u2014 dbreunig (2026-04-04)", "url": "https://www.dbreunig.com/2026/04/04/how-claude-code-builds-a-system-prompt.html", "why": "Most granular per-section inventory with conditional gates and variation triggers (output_style, user_type_ant, repl_mode, embedded_search, task_tool_enabled, agent_tool_enabled+fork_subagent, skills_enabled, experimental_skill_search, verification_agent, memory_configured, undercover, is_worktree, language_set, microcompact_enabled, token_budget, kairos_brief, is_git_repo&&!remote&&git_instructions_enabled, append_system_prompt), plus env-info template text and git snapshot block."}, {"title": "Server Instructions: Giving LLMs a user manual \u2014 MCP Blog", "url": "https://blog.modelcontextprotocol.io/posts/2025-11-03-using-server-instructions/", "why": "Confirms MCP servers return instructions in InitializeResult and hosts (including Claude Code) inject them into the system prompt; basis for the DANGEROUS_uncached MCP instructions section."}, {"title": "Piebald-AI/claude-code-system-prompts (GitHub)", "url": "https://github.com/Piebald-AI/claude-code-system-prompts", "why": "Version-tracked dump of the actual assembled system prompt text, 27 builtin tool descriptions, and sub-agent prompts (Explore/Plan/Task) \u2014 ground truth for exact wording per version."}, {"title": "Server instructions issue \u2014 anthropics/claude-code #43749", "url": "https://github.com/anthropics/claude-code/issues/43749", "why": "Documents the instructions field consumption from InitializeResult into session context."}, {"title": "Inside Claude Code's System Prompt \u2014 claudecodecamp", "url": "https://www.claudecodecamp.com/p/inside-claude-code-s-system-prompt", "why": "Community corroboration of 110+ conditionally assembled instructions and section ordering."}], "summary": "Claude Code's system prompt is not a static string but a per-turn assembled array of blocks (branded `SystemPrompt` type) built by `getSystemPrompt()` in `src/constants/prompts.ts` and resolved by `buildEffectiveSystemPrompt()`. It is split into a STATIC, globally-cacheable zone (~12 sections: identity, intro, system rules, doing-tasks, actions, using-tools, tone/style, output-efficiency, token-budget, proactive) and a DYNAMIC, per-session zone (env info, scratchpad, function-result-clearing, MCP instructions, memory, CLAUDE.md, output-style, git-status, append-prompt) divided by a `__SYSTEM_PROMPT_DYNAMIC_BOUNDARY__` marker that is stripped before the API call. Each section is either memoized via `systemPromptSection()` (cached until `/clear` or `/compact`) or recomputed every turn via `DANGEROUS_uncachedSystemPromptSection()` (used for MCP instructions and env info). CLAUDE.md content is injected as a USER message (project context), NOT into the system prompt in the SDK; in the interactive CLI it appears in the prompt assembly. Hooks inject `` tags via `additionalContext`/`systemMessage` at event-appropriate positions. The Agent SDK exposes preset/custom/append options and `excludeDynamicSections` (v0.2.98+) to move per-session context into the first user message for cross-session cache reuse."}, "memory-claudemd": {"asOfDate": "2026-06", "claimsToVerify": ["Auto memory requires Claude Code v2.1.59+ and stores MEMORY.md at ~/.claude/projects//memory/, where the first 200 lines OR 25KB (whichever comes first) are loaded at session start; topic files are NOT loaded at startup but auto-surfed (up to 5) by a Sonnet side-query as attachments, not via FileReadTool.", "@import recursion is capped at a MAXIMUM DEPTH OF 4 HOPS per the current official docs (code.claude.com/docs/en/memory) \u2014 note many third-party write-ups and some mirror sites say 5; the canonical Anthropic doc says 4. Re-verifier should confirm against the live docs page.", "Managed-policy CLAUDE.md precedence: managed (highest) \u2192 CLI args \u2192 local \u2192 project \u2192 user (lowest); the managed CLAUDE.md (file or the managed-only `claudeMd` settings key) cannot be excluded by claudeMdExcludes, and the Windows legacy path C:\\ProgramData\\ClaudeCode\\managed-settings.json was removed in v2.1.75 (now C:\\Program Files\\ClaudeCode\\).", "Block-level HTML comments in CLAUDE.md are stripped before context injection (comments inside code fences are preserved; visible via Read tool)."], "components": [{"config": "Path: ./CLAUDE.md (lower precedence) then ./CLAUDE.local.md appended after at same level. Excludable via claudeMdExcludes.", "dataModel": "Files: CLAUDE.md, CLAUDE.local.md. Target size <200 lines (guideline).", "mechanism": "Claude Code walks up from cwd to (but not including) filesystem root, checking each dir for CLAUDE.md + CLAUDE.local.md. All discovered files are concatenated (not overridden), ordered root-down so cwd-level is read LAST. At each level CLAUDE.local.md is appended after CLAUDE.md. Subdirectory files load lazily on demand when Claude reads files there. Managed-policy + user + project-root files survive /compact (re-read from disk); nested subdir files do NOT auto-reinject.", "name": "CLAUDE.md directory-walk + concatenation order", "purpose": "Resolve and assemble all CLAUDE.md/CLAUDE.local.md into one context blob, root-to-cwd, no overriding."}, {"config": "OS-specific managed paths: macOS /Library/Application Support/ClaudeCode/CLAUDE.md; Linux/WSL /etc/claude-code/CLAUDE.md; Windows C:\\Program Files\\ClaudeCode\\CLAUDE.md. Or in managed-settings.json via the `claudeMd` key (managed/policy scope only; ignored in user/project/local).", "dataModel": "managed-settings.json: {\"claudeMd\": \"Always run make lint\\nNever push to main\"}. managed-settings.d/*.json merged systemd-style (alphabetical, arrays concat+dedup, objects deep-merged, dotfiles ignored).", "mechanism": "Managed-policy CLAUDE.md is highest precedence (above CLI args), loaded BEFORE user and project CLAUDE.md, and CANNOT be excluded by claudeMdExcludes. Three delivery mechanisms: server-managed (Claude.ai admin console), MDM/OS plist (macOS com.anthropic.claudecode domain / Windows HKLM\\SOFTWARE\\Policies\\ClaudeCode registry 'Settings' JSON value), file-based managed-settings.json + drop-in managed-settings.d/. Settings precedence overall: Managed > CLI args > Local > Project > User. Permissions MERGE across scopes; most other settings OVERRIDE.", "name": "Settings-scope precedence (managed \u2192 user \u2192 project \u2192 local)", "purpose": "Determines which scope wins and how CLAUDE.md content is sourced from settings vs files."}, {"config": "Set CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1.", "dataModel": "Loaded files: CLAUDE.md, .claude/CLAUDE.md, .claude/rules/*.md, CLAUDE.local.md (skipped if local excluded via --setting-sources).", "mechanism": "Regex/token expansion of @-prefixed paths inside CLAUDE.md. First-encounter of EXTERNAL imports in a project triggers an approval dialog listing files; if declined, imports stay disabled and dialog does not reappear. AGENTS.md is NOT read natively \u2014 bridge via `@AGENTS.md` import or symlink.", "name": "@import expansion + --add-dir", "purpose": "Compose memory from multiple files; load memory from additional directories."}, {"config": "Settings: autoMemoryEnabled (bool, default true), autoMemoryDirectory (absolute or ~/). Env: CLAUDE_CODE_DISABLE_AUTO_MEMORY=1.", "dataModel": "Files: MEMORY.md (index, <200 lines / 25KB), topic files with frontmatter name/description/type(one of: user, feedback, project, reference). Line format: '- [Title](file.md) \u2014 hook' (~150 chars).", "mechanism": "At session start, first 200 lines OR first 25KB of MEMORY.md (whichever first) is loaded into system prompt. Topic files are NOT loaded at startup. Per-turn, a Sonnet side-query scans up to 200 .md files (excluding MEMORY.md), extracts filename/mtime/description/type, returns JSON {selected_memories:[]} (max 256 tokens, up to 5 files), which are injected as `relevant_memories` attachments (NOT FileReadTool calls). Topic files use 2-step save: (1) write file with YAML frontmatter name/description/type, (2) add one-line pointer to MEMORY.md. Background autoDream consolidation fires after >=24h since last consolidation AND >=5 sessions, runs as forked agent, protected by .consolidate-lock PID file with 60-min stale guard.", "name": "Auto memory (MEMORY.md index + topic files)", "purpose": "Claude-written scratchpad: index always loaded, topic files surfaced on-demand."}, {"config": "Subclass betaMemoryTool (TS) / BetaAbstractMemoryTool (Python/C#) / BetaMemoryToolHandler (Java). Tool name='memory'. Must restrict to /memories dir, validate canonical paths, reject ../ sequences and URL-encoded traversal.", "dataModel": "Tool type 'memory_20250818', name 'memory'. Commands: view{path,view_range?}, create{path,file_text}, str_replace{path,old_str,new_str}, insert{path,insert_line,insert_text}, delete{path}, rename{old_path,new_path}. Paths confined to /memories/.", "mechanism": "Client-side tool; the app implements handlers. Claude auto-views /memories before tasks. Tool returns: directories listed 2-deep with human sizes (tab-separated, excluding dotfiles + node_modules); files returned with line numbers (6-char right-aligned, tab sep, 1-indexed, max 999,999 lines). Auto system-prompt injection: 'IMPORTANT: ALWAYS VIEW YOUR MEMORY DIRECTORY BEFORE DOING ANYTHING ELSE. MEMORY PROTOCOL...'. NOTE: this is the API/SDK memory tool, distinct from Claude Code's built-in auto-memory subsystem \u2014 Claude Code's auto-memory does not expose this tool by default; the CLI uses its own filesystem-based memory instead.", "name": "memory tool (API tool_type memory_20250818)", "purpose": "Generic file-based memory CRUD primitive (API/SDK clients), distinct from Claude Code's built-in auto-memory."}, {"config": "Hooks key: InstructionsLoaded with matcher values session_start|nested_traversal|path_glob_match|include|compact. Exit code ignored (non-blocking). Output capped 10,000 chars.", "dataModel": "Hook stdin JSON includes load_reason field. JSON output via exit 0 stdout. hookSpecificOutput.hookEventName='InstructionsLoaded'.", "mechanism": "Fires at session start AND when files lazily load mid-session (e.g. subdir CLAUDE.md read, path-glob rule triggered, @import include resolved, /compact re-inject). Matcher field = load reason. Non-blocking (exit code ignored), cannot decision-control; useful for logging which files load and why.", "name": "InstructionsLoaded hook", "purpose": "Observability for memory/rules loading."}, {"config": "Rule files in .claude/rules/ (recursive) or ~/.claude/rules/. frontmatter: paths: [globs].", "dataModel": "YAML frontmatter `paths: [\"src/api/**/*.ts\"]`. Rules WITHOUT paths frontmatter load unconditionally at launch at .claude/CLAUDE.md priority.", "mechanism": "Rules in .claude/rules/*.md are discovered recursively. Those with a `paths:` frontmatter field only inject when Claude reads a file matching the glob. User-level rules load before project rules (lower precedence). Trigger on file read, not every tool use. Symlinks supported, circular handled. Loaded on demand when matching files opened. Also loadable from --add-dir dirs when CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1.", "name": ".claude/rules/ path-scoped rules", "purpose": "Modular, conditional memory injection scoped to file globs."}], "confidence": "high", "dimension": "memory-claudemd", "externalInterfaces": ["File paths: ./CLAUDE.md, ./.claude/CLAUDE.md, ./CLAUDE.local.md, ~/.claude/CLAUDE.md, ~/.claude/rules/*.md, .claude/rules/*.md, ~/.claude/projects//memory/MEMORY.md + topic .md files", "Managed CLAUDE.md paths: macOS /Library/Application Support/ClaudeCode/CLAUDE.md | Linux/WSL /etc/claude-code/CLAUDE.md | Windows C:\\Program Files\\ClaudeCode\\CLAUDE.md", "managed-settings.json + managed-settings.d/*.json drop-in dir in same system dir (drop-in requires v2.1.x+)", "Settings keys: claudeMd (managed-only), claudeMdExcludes (glob array, mergeable), autoMemoryEnabled (bool), autoMemoryDirectory (abs or ~/), --setting-sources, --add-dir flag", "Env vars: CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1, CLAUDE_CODE_DISABLE_AUTO_MEMORY=1, CLAUDE_CODE_NEW_INIT=1", "API memory tool: tools=[{\"type\":\"memory_20250818\",\"name\":\"memory\"}], path root /memories/, commands view/create/str_replace/insert/delete/rename", "CLI commands: /init, /memory", "Hook event: InstructionsLoaded (matcher values: session_start, nested_traversal, path_glob_match, include, compact)", "UI keybinding: '#' prefix in prompt = quick-add memory to CLAUDE.md"], "keyBehaviors": ["CLAUDE.md is CONTEXT, NOT config \u2014 injected as a user message AFTER the system prompt, never guaranteed to be followed. To hard-enforce behavior use PreToolUse hooks or managed settings permissions.deny.", "Concatenation is root-to-cwd, cwd-level read LAST; per level CLAUDE.local.md appended after CLAUDE.md. Files never override each other across the tree.", "Block-level HTML comments are STRIPPED before context injection (saves tokens). Comments INSIDE code fences are preserved. Read tool shows comments unstripped.", "@import relative paths resolve relative to the file CONTAINING the import, NOT cwd. Both relative and absolute paths allowed. Home-dir imports (@~/.claude/x.md) for cross-worktree sharing.", "@import recursion MAX DEPTH = 4 hops (per current official docs code.claude.com/docs/en/memory). NOTE: several third-party write-ups and some mirror sites say 5; the canonical Anthropic doc states 4 \u2014 verify against live docs before hardcoding.", "Auto memory needs Claude Code v2.1.59+. MEMORY.md load cap: first 200 lines OR first 25KB, whichever first; content beyond NOT loaded at start. CLAUDE.md is loaded in FULL regardless of length (no 200-line hard cap, but adherence degrades).", "Project path in ~/.claude/projects//memory/ is derived from the GIT REPO root, so all worktrees + subdirs in one repo share ONE auto-memory dir. Outside a git repo, project root is used.", "autoMemoryDirectory must be absolute or start with ~/. When set in .claude/settings.json or settings.local.json, honored only AFTER workspace trust dialog accepted (same gate as hooks).", "claudeMdExcludes matches ABSOLUTE file paths via glob, configurable at any settings layer, arrays MERGE across layers. Managed-policy CLAUDE.md is NEVER excludable.", "Subagents can maintain their own auto memory (per-subagent memory dirs).", "Topic files surfaced by a Sonnet side-query (NOT FileReadTool): up to 5 files/turn, returned as JSON {selected_memories:string[]} max 256 tokens, injected as relevant_memories attachments, already-surfaced filtered out.", "autoDream background consolidation: triggers after >=24h since last consolidation AND >=5 sessions, forked subagent, 4 phases (orient/gather/consolidate/prune), PID lock file .consolidate-lock with 60-min stale guard, rollback rewinds mtime on failure.", "Topic file 4 types: user, feedback, project, reference. YAML frontmatter name/description/type. description is what Sonnet selector reads for relevance \u2014 vague = never surfaced.", "What NOT to save: code patterns/architecture/paths (derivable), git history (git log authoritative), debugging fixes (in commit msg), anything already in CLAUDE.md, ephemeral task details.", "Managed settings parse tolerantly since v2.1.169: invalid entries stripped with warning, rest enforced. Security fields (allowedMcpServers, enforceAvailableModels, forceLoginOrgUUID, etc.) have per-field fail-closed behavior.", "Legacy Windows managed path C:\\ProgramData\\ClaudeCode\\managed-settings.json removed in v2.1.75; must migrate to C:\\Program Files\\ClaudeCode\\.", "Settings files are watched and hot-reloaded mid-session (permissions, hooks, apiKeyHelper) firing ConfigChange hook; but `model` and outputStyle are read-once at start (use /model or restart).", "# quick-add memory: typing '#' prefix in prompt triggers Claude Code to write the memory into the relevant CLAUDE.md file (had a regression bug on Windows, issue #14868, Dec 2025)."], "openQuestions": ["EXACT import recursion depth: official docs say max 4 hops, but several mirrors/third-party deep-dives say 5 \u2014 needs live re-verification against code.claude.com/docs/en/memory and the actual MAX_IMPORT_DEPTH constant in source.", "Exact JSON schema of the InstructionsLoaded hook stdin payload (full field list, not just load_reason) \u2014 not fully captured; would need the hooks reference #hook-events section.", "Whether Claude Code's built-in auto-memory uses the SAME memory_20250818 tool under the hood or a separate proprietary filesystem layer (manavgup deep-dive implies a separate subsystem: memdir/autoDream/extractMemories services, NOT the API memory tool).", "Exact '' directory-name hashing/encoding scheme used under ~/.claude/projects//memory/ (how repo path -> folder name).", "Whether the Sonnet-side-query memory surfacing (up to 5 files, 256-token JSON) is documented officially or only reverse-engineered \u2014 official docs only state 'first 200 lines/25KB loaded'."], "sources": [{"title": "How Claude remembers your project \u2014 Claude Code Docs (code.claude.com/docs/en/memory)", "url": "https://code.claude.com/docs/en/memory", "why": "Canonical source for the full memory subsystem: CLAUDE.md hierarchy table, @import 4-hop limit, walk-up resolution order, CLAUDE.local.md appending, auto memory (MEMORY.md 200-line/25KB cap, ~/.claude/projects//memory/, autoMemoryEnabled/Directory/CLAUDE_CODE_DISABLE_AUTO_MEMORY, v2.1.59+ requirement, compaction survival, claudeMd managed key, claudeMdExcludes, --add-dir env, InstructionsLoaded hook reference, .claude/rules/ path-scoping."}, {"title": "Claude Code settings \u2014 Claude Code Docs (code.claude.com/docs/en/settings)", "url": "https://code.claude.com/docs/en/settings", "why": "Authoritative settings-scope precedence (Managed > CLI > Local > Project > User), managed-settings.json locations per OS, managed-settings.d/ drop-in systemd-style merge, managed CLAUDE.md path equivalence, v2.1.75 Windows legacy-path removal, v2.1.169 tolerant parsing, hot-reload + ConfigChange hook, model/outputStyle read-once."}, {"title": "Memory tool \u2014 Claude API Docs (platform.claude.com/docs/en/agents-and-tools/tool-use/memory-tool)", "url": "https://platform.claude.com/docs/en/agents-and-tools/tool-use/memory-tool", "why": "Defines the API memory tool (type memory_20250818, name memory, commands view/create/str_replace/insert/delete/rename, /memories dir, path-traversal security, return formats, auto MEMORY PROTOCOL prompt). Distinct from Claude Code's built-in auto-memory."}, {"title": "Hooks reference \u2014 Claude Code Docs (code.claude.com/docs/en/hooks)", "url": "https://code.claude.com/docs/en/hooks", "why": "Confirms InstructionsLoaded event exists, fires at session start + lazy load, matcher = load reason (session_start, nested_traversal, path_glob_match, include, compact), exit code ignored (non-blocking), plus full hook lifecycle including PreCompact/PostCompact relevant to memory re-injection."}, {"title": "09 \u2014 Memory System \u00b7 Inside Claude Code (manavgup.github.io/shipai)", "url": "https://manavgup.github.io/shipai/deep-dives/claude-code/09-memory.html", "why": "Reverse-engineered internals: src/memdir/autoDream/extractMemories services, MEMORY.md pointer-index format, 4 memory types (user/feedback/project/reference), Sonnet side-query surfacing (up to 5 files, 256-token JSON), autoDream 24h+5-session trigger with .consolidate-lock 60-min stale guard, 200-line/25KB truncation detail. Useful for a faithful reimplementation even though it's community-sourced."}, {"title": "[BUG] # memory shortcut no longer saves to CLAUDE.md \u2014 anthropics/claude-code#14868", "url": "https://github.com/anthropics/claude-code/issues/14868", "why": "Confirms the '#' prefix quick-add-memory-to-CLAUDE.md behavior is a real, official feature (and documents a Dec 2025 Windows regression)."}, {"title": "Boris Cherny Threads post \u2014 '#' quick-add memory announcement", "url": "https://www.threads.com/@boris_cherny/post/DHq60G7vkNz", "why": "Anthropic staff announcement confirming '#' prefix writes memories to CLAUDE.md files."}], "summary": "Claude Code's memory subsystem has two parallel, complementary mechanisms. (1) CLAUDE.md files are human-authored instruction files loaded into every session as context (NOT enforced config) via a strict precedence hierarchy: managed-policy \u2192 user (~/.claude/CLAUDE.md) \u2192 project (./CLAUDE.md or ./.claude/CLAUDE.md) \u2192 local (./CLAUDE.local.md), all concatenated root-to-cwd and never overriding each other. CLAUDE.md supports `@path` import syntax (relative resolves against the importing file, not cwd; recursion capped at max depth 4 hops; HTML comments stripped before injection). (2) Auto memory (Claude-written, requires v2.1.59+) lives in ~/.claude/projects/<project>/memory/ keyed by git repo root (shared across worktrees), with MEMORY.md as a pointer-index (first 200 lines OR 25KB loaded into context) and topic .md files surfaced on-demand by a Sonnet side-query. A separate generic API \"memory\" tool (tool_type memory_20250818, name \"memory\") exists for SDK clients operating a /memories directory. The `#` prefix in the REPL quick-adds a memory to the relevant CLAUDE.md. CLAUDE.md content is injected as a USER message after the system prompt, and the InstructionsLoaded hook fires whenever any CLAUDE.md or .claude/rules/*.md enters context."}, "permissions": {"asOfDate": "2026-06", "claimsToVerify": ["The SDK 6-step permission evaluation order is exactly: Hooks -> Deny rules -> Ask rules -> Permission mode -> Allow rules -> canUseTool callback; and ask rules force a prompt even in bypassPermissions mode, while in dontAsk mode ask rules are denied instead of prompting.", "auto mode is ignored from project/local settings (.claude/settings.json, .claude/settings.local.json) as of v2.1.142, and must be set in ~/.claude/settings.json; as of v2.1.126 bypassPermissions no longer prompts even for writes to protected paths (.git, .claude, etc.) which earlier versions still prompted for.", "Rule syntax gotcha: Bash(ls *) requires the space and enforces a word-boundary (matches 'ls -la' not 'lsof'); Bash(ls*) without space matches both; trailing :* (Bash(ls:*)) is equivalent to trailing ' *' but is ONLY recognized at end of pattern; Read/Edit pattern anchors differ \u2014 //path=filesystem root, ~/path=home, /path=project root (NOT absolute!), path/./path=relative to cwd.", "Permission rules evaluate deny->ask->allow in order with FIRST match winning regardless of specificity: a matching ask rule prompts even if a more specific allow rule also matches; and Read/Edit allow rules require BOTH symlink path and target to match while deny rules fire if EITHER matches.", "The NDJSON control_response for 'allow' REQUIRES an updatedInput field (original or modified input); deny REQUIRES a message field; request_id must match; CLI blocks ~60s waiting for a response, and without --permission-prompt-tool stdio tools auto-deny in non-interactive mode."], "components": [{"config": "settings.json under `permissions.defaultMode`. CLI flag `--permission-mode ` overrides for one session. Valid values: default, acceptEdits, plan, auto, dontAsk, bypassPermissions.", "dataModel": "PermissionMode = \"default\" | \"acceptEdits\" | \"plan\" | \"auto\" | \"dontAsk\" | \"bypassPermissions\". (Python SDK Literal only declares 4: default/acceptEdits/plan/bypassPermissions; CLI also supports auto and dontAsk.)", "mechanism": "Shift+Tab cycles default->acceptEdits->plan. Enabled optional modes slot in after plan in order: bypassPermissions first, auto last. auto appears only via opt-in; dontAsk never appears in cycle (set via flag). bypassPermissions requires startup with --permission-mode bypassPermissions / --dangerously-skip-permissions / --allow-dangerously-skip-permissions (the --allow- variant adds to cycle without activating). On Linux/macOS bypassPermissions refuses to run as root/sudo (check auto-skipped inside recognized sandbox). Modes set the baseline; deny+explicit-ask rules apply in EVERY mode including bypassPermissions.", "name": "Permission Modes", "purpose": "Global session-level policy controlling how often tools pause for approval."}, {"config": "Keys live under top-level `permissions` object. Precedence (high->low): Managed > CLI args > local project (.claude/settings.local.json) > shared project (.claude/settings.json) > user (~/.claude/settings.json). Deny at ANY level cannot be overridden. Settings files are hot-reloaded (permissions/hooks/ConfigChange hook fire).", "dataModel": "Rule = `Tool` | `Tool(specifier)`. `Bash`/`Bash(*)` = all uses (as deny, removes tool from model context entirely). Scoped deny like `Bash(rm *)` leaves tool available, blocks matching calls.", "mechanism": "Evaluation order: DENY -> ASK -> ALLOW; first match wins regardless of specificity. A matching ASK prompts even when a more specific ALLOW also matches. Bare-name deny (e.g. `Bash`) removes the tool from Claude's context before evaluation; only scoped deny (e.g. `Bash(rm *)`) is matched at the per-call step. Enforced by Claude Code, NOT by the model (CLAUDE.md only shapes behavior, doesn't grant access).", "name": "Permission Rules (allow/ask/deny)", "purpose": "Per-tool, pattern-based pre-approval / forced-prompt / block lists in settings.json."}, {"config": "Read-only set is built-in and NOT configurable (override via ask/deny rule).", "dataModel": "Separators: && || ; | |& & . Stripped wrappers: timeout, time, nice, nohup, stdbuf, bare xargs (no flags). NOT stripped: direnv exec, devbox run, mise exec, npx, docker exec (so `Bash(devbox run *)` matches anything after run). Exec wrappers (watch, setsid, ionice, flock) and find -exec/-delete always prompt.", "mechanism": "Glob `*` matches any chars including spaces (one wildcard spans multiple args). Space before `*` enforces word boundary: `Bash(ls *)` matches `ls -la` not `lsof`; `Bash(ls*)` matches both. Trailing `:*` is equivalent to trailing ` *` but ONLY at end of pattern. Claude Code is shell-operator-aware: command separators (&& || ; | |& & newline) split compound commands and EACH subcommand must match independently. Approving compound `git status && npm test` saves up to 5 separate rules (e.g. just `npm test`). Built-in read-only commands run without prompt in every mode: ls, cat, echo, pwd, head, tail, grep, find, wc, which, diff, stat, du, cd, and read-only git forms. Read-only forms allow unquoted globs; write/exec-capable flags (find -delete, sort, sed, git) still prompt.", "name": "Bash Pattern Matching", "purpose": "Match shell commands against allow/deny rules with prefix/suffix/wildcard globs."}, {"config": "cd into working/additional dir is read-only; cd + git in one compound always prompts.", "dataModel": "Symlink rule: Allow requires BOTH symlink path AND target to match; Deny fires if EITHER matches. `*` = within one segment, `**` = across directories. Bare filename = gitignore semantics (any depth): `Read(.env)` == `Read(**/.env)`.", "mechanism": "Read rules apply to Read + Grep + Glob + @file mentions + IDE-open-file context. Edit rules apply to all built-in editing tools AND file commands recognized in Bash (cat, head, tail, sed) \u2014 but NOT arbitrary subprocesses. Four anchor types: `//abs/path` (filesystem root), `~/path` (home), `/path` (PROJECT ROOT, not absolute!), `path`/`./path` (cwd). A pattern like `/Users/alice/file` is relative to project root, NOT absolute. Windows paths normalized to POSIX (C:\\Users\\alice -> /c/Users/alice).", "name": "Read/Edit Path Rules", "purpose": "File-path-scoped allow/deny using gitignore-style patterns with 4 anchor types."}, {"config": "autoAllowBashIfSandboxed: true (default) lets sandboxed Bash skip bare-Bash ask rule.", "dataModel": "Network deny: WebFetch rules + sandbox deniedDomains both apply (deny-first).", "mechanism": "WebFetch rules use `domain:` prefix matching hostname (case-insensitive, trailing `.` stripped). `*` matches across `.` ONLY as leading `*.` or whole pattern; elsewhere within one label. Exact rule beats wildcard when both match. Sandbox (Bash-only, OS-level) merges with permissions: filesystem boundary = sandbox.filesystem + Read/Edit deny; network boundary = WebFetch rules + allowedDomains/deniedDomains.", "name": "WebFetch + Sandbox Interaction", "purpose": "Network/domain gating, complementary to OS sandbox."}, {"config": "disableAutoMode / disableBypassPermissionsMode set to \"disable\" (any scope, typically managed). allowManagedPermissionRulesOnly prevents user/project allow/ask/deny rules.", "dataModel": "Source enum: userSettings | projectSettings | localSettings | session. Behavior enum: allow | deny | ask. Update.type: addRules | replaceRules | removeRules | setMode | addDirectories | removeDirectories.", "mechanism": "High-precedence settings that cannot be overridden. Managed-only keys include allowManagedPermissionRulesOnly (only managed allow/ask/deny apply), disableBypassPermissionsMode, disableAutoMode. Precedence: Managed > CLI args > Local project > Shared project > User. If denied at any level, nothing can allow it. Embedder can tighten (not loosen) via managedSettings when parentSettingsBehavior=merge.", "name": "Settings Precedence + Managed-Only", "purpose": "Merge rules across scopes with deny-wins semantics; org-level enforcement."}, {"config": "Output format determined by --output-format (text|stream-json|json).", "dataModel": "types.py: PermissionResultAllow{behavior:\"allow\", updated_input, updated_permissions?}; PermissionResultDeny{behavior:\"deny\", message, interrupt?}. ToolPermissionContext{signal, suggestions: [PermissionUpdate]}. CanUseTool = Callable[[str, dict, ToolPermissionContext], Awaitable[PermissionResult]].", "mechanism": "SDK exposes `canUseTool(tool_name, input, context)` callback returning PermissionResultAllow (with updated_input + optional updated_permissions for 'always allow') or PermissionResultDeny (with message). In Python this callback requires streaming mode AND a PreToolUse hook returning {continue_:true} to keep the stream open. The callback can be pending indefinitely (defer decision to resume later). Also fires for AskUserQuestion clarifying questions. Hooks run BEFORE canUseTool and can allow/deny/modify.", "name": "canUseTool Callback (SDK)", "purpose": "Runtime interactive approval surfaced to embedding application."}, {"config": "Flags required: --output-format stream-json --input-format stream-json --verbose --permission-prompt-tool stdio. DEBUG_CLAUDE_AGENT_SDK=1 or --debug for logs.", "dataModel": "control_request{type, request_id, request:{subtype:\"can_use_tool\"|\"set_permission_mode\", tool_name, input, decision_reason?, tool_use_id?, permission_suggestions?, mode?}}. control_response{type, response:{subtype:\"success\", request_id, response:{behavior:\"allow\"|\"deny\", updatedInput|message}}}.", "mechanism": "Headless CLI driven by host over stdin/stdout NDJSON. With `--permission-prompt-tool stdio`, when a tool needs approval CLI emits a `control_request` (subtype `can_use_tool`) and BLOCKS (~60s default) until host replies with matching `control_response`. Allow MUST include `updatedInput` (original or modified); deny MUST include `message`; request_id must match. Without this flag tools auto-deny in non-interactive mode. Dynamic mid-session mode switch via control_request subtype `set_permission_mode`.", "name": "NDJSON Control Protocol (CLI stdio)", "purpose": "Wire protocol for embedding hosts to receive/approve permission prompts."}, {"config": "On enter auto mode, dropped: Bash(*)/PowerShell(*), Bash(python*) wildcards, package-manager run commands, Agent allow rules. Narrow rules (Bash(npm test)) carry over. Restored on exit.", "dataModel": "Non-configurable thresholds. Classifier sees user msgs + tool calls + CLAUDE.md; tool results STRIPPED (separate server-side probe flags suspicious tool-result content).", "mechanism": "Auto mode (v2.1.83+, research preview) routes non-trivial actions to a server-side classifier model (independent of /model). Trusts working dir + configured remotes; everything else external. Reads + working-dir edits skip classifier; shell/network go through it. Blocked by default: curl|bash, sensitive data exfil, prod deploys, mass deletion, IAM grants, force push/push to main. On 3 consecutive OR 20 total blocks, auto mode pauses and resumes prompting; non-interactive `-p` mode aborts. Boundaries stated in conversation act as block signals (re-read from transcript each check, lost on compaction).", "name": "Auto Mode Classifier", "purpose": "Background model classifier that approves/blocks actions to eliminate routine prompts."}, {"config": "permissions.allow rules do NOT pre-approve protected-path writes \u2014 safety check runs before allow rules. `.claude/worktrees` is exempt (Claude's own worktrees).", "dataModel": "Dirs: .git, .config/git, .vscode, .idea, .husky, .cargo, .devcontainer, .yarn, .mvn, .claude (except .claude/worktrees). Files: .gitconfig, .gitmodules, .bashrc, .zshrc, .profile, .envrc, .npmrc, .yarnrc.yml, .pnp.cjs, .bazelrc, .pre-commit-config.yaml, lefthook.yml, gradle-wrapper.properties, .devcontainer.json, .mcp.json, .claude.json, etc.", "mechanism": "A fixed set of dirs/files (repo state + Claude config + shell/package config) whose writes are never auto-approved except in bypassPermissions (as of v2.1.126). default/acceptEdits/plan -> prompt; auto -> classifier; dontAsk -> deny; bypassPermissions -> allow. Prompt for .claude/ write offers 'Yes, and allow Claude to edit its own settings for this session'.", "name": "Protected Paths", "purpose": "Circuit breaker preventing corruption of repo state and Claude's own config."}], "confidence": "high", "dimension": "permissions", "keyBehaviors": ["Six modes total: default, acceptEdits, plan, auto, dontAsk, bypassPermissions. The Python SDK PermissionMode Literal only declares 4 (default/acceptEdits/plan/bypassPermissions) \u2014 auto and dontAsk are CLI-level and TypeScript-only for `auto`.", "auto mode requires v2.1.83+ AND plan + model (Opus 4.6+/Sonnet 4.6 on Anthropic API; Opus 4.7/4.8 only on Bedrock/Vertex/Foundry) AND on Bedrock/Vertex/Foundry the env var CLAUDE_CODE_ENABLE_AUTO_MODE=1 (v2.1.158+). Admins set permissions.disableAutoMode=\"disable\" to lock off. auto is IGNORED in project/local settings as of v2.1.142 (must be in ~/.claude/settings.json or managed).", "bypassPermissions as of v2.1.126 NO LONGER prompts for protected-path writes (earlier versions did). It still prompts for explicit ask rules and for rm targeting / or ~. Refuses to run as root/sudo on Linux/macOS (auto-skipped in recognized sandbox). disableBypassPermissionsMode=\"disable\" blocks it.", "dontAsk mode auto-DENIES every prompt; only permissions.allow rules and read-only Bash commands execute; explicit ask rules are DENIED (not prompted). Cloud (web) sessions ignore defaultMode dontAsk and bypassPermissions from settings files.", "acceptEdits auto-approves: Edit/Write + filesystem Bash cmds (mkdir, touch, rm, rmdir, mv, cp, sed) + their safe prefixes (LANG=C, NO_COLOR=1) + wrappers (timeout/nice/nohup). Only for paths inside cwd or additionalDirectories. PowerShell: Set-Content, Add-Content, Clear-Content, Remove-Item + aliases.", "Rule specificity does NOT change evaluation order: deny -> ask -> allow, first match wins. A matching ask prompts even if a more-specific allow also matches the same call.", "Bash pattern word-boundary subtlety: `Bash(ls *)` (space before *) matches `ls -la` NOT `lsof`; `Bash(ls*)` matches both. `:*` suffix == trailing ` *` but only at END of pattern (`Bash(git:* push)` treats colon literally).", "Bash compound commands: separators && || ; | |& & newline each split into subcommands; EVERY subcommand must independently match. Approving `git status && npm test` saves up to 5 separate rules (one per subcommand needing approval). Wrappers timeout/time/nice/nohup/stdbuf and bare xargs are stripped BEFORE matching; direnv/devbox/mise/npx/docker exec are NOT.", "Read/Edit deny applies to built-in file tools + cat/head/tail/sed in Bash, but NOT to arbitrary subprocesses (python/node scripts). For OS-level enforcement use the sandbox.", "Symlink asymmetry: allow requires BOTH symlink path AND target to match; deny fires if EITHER matches. So symlink inside allowed dir pointing to denied file is blocked.", "WebFetch domain: `*` crosses `.` only as leading `*.` or whole pattern; `domain:github.*` matches github.io but NOT github.evil.com (anti-homograph). Exact rule beats wildcard in same list.", "MCP rule glob constraint: allow rules accept tool-name globs ONLY after literal `mcp____` prefix (server segment glob-free). Unanchored allow globs like `*` or `mcp__*` are SKIPPED with a startup warning. Deny/ask globs are unrestricted (`mcp__*`, `*`).", "auto mode on-enter drops broad allow rules: Bash(*)/PowerShell(*), Bash(python*) wildcard interpreters, package-manager run commands, Agent allow rules. Narrow rules like Bash(npm test) carry over. Restored on exit.", "auto mode fallback thresholds are NON-configurable: 3 consecutive blocks OR 20 total blocks -> pause and resume prompting. Any allowed action resets consecutive counter; total counter persists for session. Non-interactive -p mode aborts on repeated blocks.", "Settings precedence (high->low): Managed > CLI args > Local project (.claude/settings.local.json) > Shared project (.claude/settings.json) > User (~/.claude/settings.json). Deny at ANY level is final. Settings files are hot-reloaded.", "additionalDirectories in settings grants FILE ACCESS only; --add-dir flag additionally loads some config (skills, partial plugin settings, CLAUDE.md only if CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1).", "Allow rules don't constrain bypassPermissions: allowed_tools only pre-approves listed tools; unlisted tools fall through to mode where bypassPermissions approves everything. Use disallowed_tools to block specific tools in bypass.", "Subagent inheritance: parent bypassPermissions/acceptEdits/auto is inherited by ALL subagents and cannot be overridden per-subagent; any permissionMode in subagent frontmatter is IGNORED in auto mode. Classifier checks subagents at 3 points (spawn task desc, each action, return history).", "Hook decisions do NOT bypass deny/ask rules: a hook returning allow still gets deny/ask rules evaluated; a hook exit code 2 (block) takes precedence over allow rules. PreToolUse runs before the prompt; PermissionRequest hook is for notifications.", "Tool names containing _ or * are exempt from the 'unknown tool' startup warning; otherwise deny/ask rules matching no known tool emit a warning."], "openQuestions": ["Exact default ~60s control_request blocking timeout value and whether it is configurable (docs say '~60s default', gist says not configurable).", "Whether SDKControlPermissionRequest (control can_use_tool) carries permission_suggestions populated by default in the CLI build, or only in SDK-wrapped modes.", "Exact behavior of the auto-mode classifier's server-side tool-result suspicious-content probe (separate from classifier) \u2014 implementation detail not fully documented.", "Full enumeration of which `git` subcommands are classified read-only by the built-in read-only command set (only 'read-only forms of git' is documented generically)."], "sources": [{"title": "Configure permissions - Claude Code Docs", "url": "https://code.claude.com/docs/en/permissions", "why": "Primary source: full rule syntax (Tool/Tool(specifier)), deny->ask->allow evaluation, Bash/PowerShell/Read/Edit/WebFetch/MCP/Agent/Cd per-tool semantics, symlink handling, protected paths list, hooks interaction, settings precedence, managed-only keys."}, {"title": "Choose a permission mode - Claude Code Docs", "url": "https://code.claude.com/docs/en/permission-modes", "why": "Primary source for all 6 modes (default/acceptEdits/plan/auto/dontAsk/bypassPermissions), auto-mode classifier details (v2.1.83+, model/provider gating, 3-consecutive/20-total fallback, subagent 3-point checks), v2.1.126/v2.1.142 version-specific behavior, protected-path per-mode matrix, disable flags."}, {"title": "Configure permissions (Agent SDK) - Claude Code Docs", "url": "https://code.claude.com/docs/en/agent-sdk/permissions", "why": "Authoritative 6-step SDK evaluation order (Hooks->Deny->Ask->Mode->Allow->canUseTool), allowed_tools/disallowed_tools semantics, subagent mode inheritance, dontAsk/bypassPermissions edge cases, plan-mode forces edits through canUseTool."}, {"title": "Handle approvals and user input (Agent SDK) - Claude Code Docs", "url": "https://code.claude.com/docs/en/agent-sdk/user-input", "why": "canUseTool callback signature/args, PermissionResultAllow/Deny shapes, updated_input/updated_permissions for 'approve and remember', ToolPermissionContext.suggestions, AskUserQuestion routing, dummy PreToolUse hook requirement in Python."}, {"title": "claude_code_sdk/types.py (PermissionMode/PermissionUpdate/PermissionResult dataclasses)", "url": "https://github.com/anthropics/claude-code-sdk-python/blob/cfdd28a2/src/claude_code_sdk/types.py", "why": "Exact Python dataclass shapes for PermissionMode, PermissionUpdateDestination(userSettings/projectSettings/localSettings/session), PermissionRuleValue, PermissionUpdate(addRules/replaceRules/removeRules/setMode/addDirectories/removeDirectories), PermissionResultAllow/Deny, ToolPermissionContext."}, {"title": "ToolPermissionRequest struct - claude_codes Rust crate (docs.rs)", "url": "https://docs.rs/claude-codes/latest/claude_codes/io/struct.ToolPermissionRequest.html", "why": "Authoritative CLI wire struct: {tool_name, input, permission_suggestions, blocked_path, decision_reason, tool_use_id} + builder methods allow/allow_with/allow_and_remember confirming updatedInput + permissions shape."}, {"title": "claude-cli-agent-protocol skill (NDJSON control_request/control_response)", "url": "https://playbooks.com/skills/bohdan-shulha/skills/claude-cli-agent-protocol", "why": "Concrete NDJSON examples for control_request (subtype can_use_tool/set_permission_mode) and control_response (behavior allow needs updatedInput, deny needs message, request_id match, ~60s block, --permission-prompt-tool stdio requirement)."}, {"title": "Claude Code settings - Claude Code Docs", "url": "https://code.claude.com/docs/en/settings", "why": "Exact permissions.* settings keys (allow/ask/deny/additionalDirectories/defaultMode/disableBypassPermissionsMode/disableAutoMode/skipDangerousModePermissionPrompt), defaultMode valid values incl v2.1.142 auto-restriction, config scopes, hot-reload behavior, managed-only allowManagedPermissionRulesOnly."}], "summary": "Claude Code's permission system layers three independent mechanisms: (1) six session-level permission MODES (default, acceptEdits, plan, auto, dontAsk, bypassPermissions) that set the auto-approval baseline; (2) pattern-based RULE LISTS (allow/ask/deny) in settings.json (and via --allowedTools/--disallowedTools) that are evaluated in fixed order deny->ask->allow with first-match-wins regardless of specificity; and (3) a runtime INTERACTIVE callback (`canUseTool` in SDK; `control_request`/`control_response` NDJSON over stdin/stdout in headless CLI). Rules are enforced by the harness, never the model \u2014 CLAUDE.md/prompt text only shapes what Claude attempts, not what is allowed. Deny rules at ANY settings scope cannot be overridden (managed > CLI args > local project > shared project > user). The system is heavily version-evolved (2025-2026): `auto` mode (v2.1.83+, research preview, server-side classifier, fallback at 3-consecutive/20-total blocks), `dontAsk` (locked-down CI), `acceptEdits`/`auto`/`plan` aliases, protected-path write guards (bypass no longer prompts as of v2.1.126), and `additionalDirectories` for multi-root file access. The Go replica must implement the exact 6-step SDK evaluation order, the exact rule syntax (gitignore-style path anchors for Read/Edit, glob for Bash with process-wrapper stripping and compound-command splitting, domain: prefix for WebFetch), and the exact NDJSON control protocol for tool approvals."}, "hooks": {"asOfDate": "2026-06", "claimsToVerify": ["Exit code 2 is the ONLY blocking exit code; exit 1 is treated as a NON-blocking error and the action proceeds (the exception is WorktreeCreate where any non-zero exit aborts). PreToolUse multiple-hook precedence is deny > defer > ask > allow.", "Default timeouts: command/http/mcp_tool = 600s (10 min) but lowered to 30s on UserPromptSubmit and 10s on MessageDisplay; prompt = 30s; agent = 60s; SessionEnd has a special 1.5s default budget (raisable to 60s, or overridden by CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS). Stop hook override cap is 8 consecutive blocks (CLAUDE_CODE_STOP_HOOK_BLOCK_CAP). additionalContext/systemMessage/stdout capped at 10000 chars.", "PreToolUse uses hookSpecificOutput.permissionDecision (allow/deny/ask/defer) + permissionDecisionReason + updatedInput (NOT top-level decision/reason which is DEPRECATED for this event; legacy approve/block map to allow/deny). Other events (PostToolUse, Stop, UserPromptSubmit, PreCompact, ConfigChange) use TOP-LEVEL decision:'block' + reason. PermissionRequest uses hookSpecificOutput.decision.behavior (allow/deny). PreToolUse hooks fire BEFORE permission-mode checks and can deny even in bypassPermissions mode."], "components": [{"config": "Hook timeout defaults: command/http/mcp_tool = 600s (10 min); UserPromptSubmit lowers these to 30s; MessageDisplay lowers to 10s; prompt = 30s; agent = 60s; SessionEnd = 1.5s default (raised to highest per-hook timeout up to 60s; CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS overrides). disableAllHooks:true disables all (managed hooks need managed-level disable). allowManagedHooksOnly blocks user/project/plugin hooks.", "dataModel": "settings.json: {\"hooks\": {: [ {\"matcher\": \"\", \"hooks\": [ ] } ] }}. Matcher group = {matcher, hooks[]}. Handler (command) = {type:\"command\", command, args?, timeout?, async?, asyncRewake?, shell?, if?, statusMessage?, once?}. HTTP = {type:\"http\", url, headers?, allowedEnvVars?, timeout?}. mcp_tool = {type:\"mcp_tool\", server, tool, input?, timeout?}. prompt = {type:\"prompt\", prompt, model?, timeout?, continueOnBlock?}. agent = {type:\"agent\", prompt, model?, timeout?}.", "mechanism": "JSON config at 3 nesting levels: hook event name -> array of matcher groups (each {matcher, hooks:[]}) -> array of hook handler objects. On event fire: matcher evaluated against the input field (tool_name for tool events, source/reason/type for others); matched groups' handlers run in PARALLEL; identical handlers auto-deduped (command dedup by command+args, HTTP by URL). For tool events, an optional per-handler `if` field (permission-rule syntax like \"Bash(git *)\") filters further before spawning the process. Hooks run with user's full permissions and cwd = session cwd; env inherits parent plus CLAUDE_PROJECT_DIR, CLAUDE_PLUGIN_ROOT, CLAUDE_PLUGIN_DATA, CLAUDE_ENV_FILE, CLAUDE_CODE_REMOTE, CLAUDE_EFFORT. As of v2.1.139 macOS/Linux hooks run in their own session WITHOUT a controlling terminal (no /dev/tty).", "name": "Configuration schema & resolution", "purpose": "Defines where/how hooks are declared and merged across scopes"}, {"config": "Event-specific matchers: PreToolUse/PostToolUse/PostToolUseFailure/PermissionRequest/PermissionDenied on tool_name; SessionStart on source(startup|resume|clear|compact); Setup on init|maintenance; SessionEnd on reason(clear|resume|logout|prompt_input_exit|bypass_permissions_disabled|other); Notification on permission_prompt|idle_prompt|auth_success|elicitation_dialog|elicitation_complete|elicitation_response; SubagentStart/SubagentStop on agent_type; PreCompact/PostCompact on manual|auto; ConfigChange on user_settings|project_settings|local_settings|policy_settings|skills; StopFailure on error type; InstructionsLoaded on load reason; UserPromptExpansion on command name; Elicitation/ElicitationResult on MCP server name; FileChanged = literal filenames split on |.", "dataModel": "30+ events total. Tool-loop: PreToolUse, PermissionRequest, PermissionDenied, PostToolUse, PostToolUseFailure, PostToolBatch. Per-turn: UserPromptSubmit, UserPromptExpansion, Stop, StopFailure. Per-session: SessionStart, Setup, SessionEnd. Subagent/team: SubagentStart, SubagentStop, TeammateIdle, TaskCreated, TaskCompleted. Display: MessageDisplay. Async/side-effect: Notification, InstructionsLoaded, ConfigChange, CwdChanged, FileChanged, WorktreeCreate, WorktreeRemove. Compaction: PreCompact, PostCompact. MCP elicitation: Elicitation, ElicitationResult.", "mechanism": "Events: SessionStart, Setup, UserPromptSubmit, UserPromptExpansion, PreToolUse, PermissionRequest, PermissionDenied, PostToolUse, PostToolUseFailure, PostToolBatch, Notification, MessageDisplay, SubagentStart, SubagentStop, TaskCreated, TaskCompleted, Stop, StopFailure, TeammateIdle, InstructionsLoaded, ConfigChange, CwdChanged, FileChanged, WorktreeCreate, WorktreeRemove, PreCompact, PostCompact, Elicitation, ElicitationResult, SessionEnd. Cadences: once/session (SessionStart/SessionEnd), once/turn (UserPromptSubmit/Stop/StopFailure), every tool call (PreToolUse/PostToolUse/etc.). Events without matcher support (always fire): UserPromptSubmit, PostToolBatch, Stop, TeammateIdle, TaskCreated, TaskCompleted, WorktreeCreate, WorktreeRemove, CwdChanged, MessageDisplay.", "name": "Hook event catalog", "purpose": "Enumerates every lifecycle point that can fire a hook"}, {"config": "agent_id/agent_type only added when running under --agent or inside subagent. model field ONLY on SessionStart and not guaranteed. effort/CLAUDE_EFFORT only when model supports effort param.", "dataModel": "Common stdin JSON: {session_id, transcript_path, cwd, permission_mode (default|plan|acceptEdits|auto|dontAsk|bypassPermissions), hook_event_name, effort:{level:low|medium|high|xhigh|max}}. Under --agent/subagent also: agent_id, agent_type. PreToolUse adds: tool_name, tool_input (tool-specific), tool_use_id. PostToolUse adds: tool_input, tool_response, tool_use_id, duration_ms. PermissionRequest adds: tool_name, tool_input, permission_suggestions[] (NO tool_use_id). Notification adds: message, title?, notification_type. Stop adds: stop_hook_active, last_assistant_message, background_tasks[], session_crons[]. SubagentStop adds: agent_id, agent_type, agent_transcript_path, last_assistant_message, stop_hook_active, background_tasks, session_crons. SessionStart adds: source, model?, agent_type?, session_title?. SessionEnd adds: reason. PreCompact/PostCompact add: trigger, custom_instructions/compact_summary.", "mechanism": "Every event's stdin JSON carries common fields plus event-specific fields. The matcher is evaluated against a specific field from this JSON (e.g. tool_name for PreToolUse).", "name": "Stdin JSON input contract", "purpose": "The exact JSON payload passed to every hook"}, {"config": "exclusive: exit codes OR exit-0 JSON, never both (exit 2 ignores JSON). stdout must be ONLY the JSON object (shell profile echoes break parsing). terminalSequence allowlist: OSC 0/1/2/9/99/777 + BEL only; anything else (CSI, OSC 8/52/1337) ignored. terminalSequence requires v2.1.141+.", "dataModel": "Exit 0 + JSON: {continue:true, stopReason?, suppressOutput:false, systemMessage?, terminalSequence?, [decision/reason for block-events], [hookSpecificOutput:{hookEventName, ...}]}. Exit 2 + stderr -> blocking. Exit other -> non-blocking error notice ' hook error' + first stderr line in transcript.", "mechanism": "Exit 0 = success; stdout parsed for JSON (only on exit 0). For UserPromptSubmit/UserPromptExpansion/SessionStart, stdout (even non-JSON) is added to Claude context. Exit 2 = BLOCKING error: stdout/JSON IGNORED, stderr fed back to Claude as error. Effect per event (PreToolUse blocks tool, UserPromptSubmit rejects prompt, Stop prevents stopping, PostToolUse just shows stderr since tool already ran, etc.). Any other exit code (incl 1) = NON-blocking error; transcript shows notice + first stderr line, execution continues. WorktreeCreate is the exception: ANY non-zero exit aborts creation.", "name": "Exit code / stdout contract", "purpose": "How a hook signals block/allow/error"}, {"config": "PreToolUse precedence deny>defer>ask>allow. defer only in -p non-interactive (v2.1.89+), only single tool call in turn. additionalContext/updatedInput ignored on defer. PreToolUse deny fires BEFORE permission-mode checks (blocks even in bypassPermissions). Hooks can tighten but never loosen past deny rules.", "dataModel": "Top-level decision: {decision:\"block\", reason}. PreToolUse: {hookSpecificOutput:{hookEventName:\"PreToolUse\", permissionDecision:\"allow|deny|ask|defer\", permissionDecisionReason?, updatedInput?, additionalContext?}}. PermissionRequest: {hookSpecificOutput:{hookEventName:\"PermissionRequest\", decision:{behavior:\"allow|deny\", updatedInput?, updatedPermissions?, message?, interrupt?}}}. PermissionDenied: {hookSpecificOutput:{hookEventName:\"PermissionDenied\", retry:true}}. PostToolUse: {hookSpecificOutput:{hookEventName:\"PostToolUse\", decision?, reason?, additionalContext?, updatedToolOutput?, updatedMCPToolOutput?}}. Stop/SubagentStop: top-level {decision:\"block\", reason} OR {hookSpecificOutput:{hookEventName:\"Stop\", additionalContext}}. SessionStart: {hookSpecificOutput:{hookEventName:\"SessionStart\", additionalContext?, initialUserMessage?, sessionTitle?, watchPaths?, reloadSkills?}}.", "mechanism": "Different events use different JSON shapes. (1) Top-level decision: UserPromptSubmit, UserPromptExpansion, PostToolUse, PostToolUseFailure, PostToolBatch, Stop, SubagentStop, ConfigChange, PreCompact -> {decision:\"block\", reason}. (2) hookSpecificOutput.permissionDecision: PreToolUse (allow/deny/ask/defer + reason + updatedInput + additionalContext). (3) hookSpecificOutput.decision.behavior: PermissionRequest (allow/deny + updatedInput + updatedPermissions + message + interrupt). (4) hookSpecificOutput.retry: PermissionDenied. (5) Exit code or continue:false: TeammateIdle, TaskCreated, TaskCompleted. (6) Path return: WorktreeCreate. (7) hookSpecificOutput.action: Elicitation/ElicitationResult. (8) hookSpecificOutput.displayContent: MessageDisplay. (9) Context only: SessionStart, Setup, SubagentStart. (10) None: Notification, SessionEnd, PostCompact, InstructionsLoaded, StopFailure, CwdChanged, FileChanged, WorktreeRemove.", "name": "Decision control / output fields", "purpose": "Per-event structured control beyond exit codes"}, {"config": "SessionStart/Setup only support command+mcp_tool (not http/prompt/agent). prompt default timeout 30s, agent 60s (up to 50 turns). continueOnBlock default false.", "dataModel": "prompt hook: {type:\"prompt\", prompt:\"...$ARGUMENTS...\", model?, timeout:30, continueOnBlock?:false}. agent hook: {type:\"agent\", prompt, model?, timeout:60}.", "mechanism": "prompt hook: sends prompt+input to a Claude model (Haiku default, overridable via model field) single-turn; model returns {ok:true|false, reason}. ok:false -> decision:block with per-event behavior (Stop/SubagentStop feeds reason to Claude; PreToolUse denies; PostToolUse ends turn/warning). continueOnBlock:true feeds reason back instead of ending. agent hook: spawns subagent w/ Read/Grep/Glob, up to 50 turns, returns same {ok,reason}. Both support only the 13 events that allow prompt/agent type.", "name": "Prompt & agent hooks", "purpose": "LLM-based judgment hooks vs deterministic command hooks"}, {"config": "async only on type:command. async hooks cannot block. asyncRewake implies async.", "dataModel": "async command hook: {type:\"command\", command, async:true, timeout?:600}. asyncRewake: {type:\"command\", command, asyncRewake:true}.", "mechanism": "async:true (command hooks only): runs in background, Claude continues immediately. On exit, additionalContext delivered on NEXT turn (waits if idle). Cannot block/return decisions. asyncRewake:true implies async AND wakes Claude on exit code 2 (stderr or stdout shown as system reminder). No dedup across async firings.", "name": "Async hooks", "purpose": "Non-blocking background execution"}], "confidence": "high", "dimension": "hooks \u2014 the Claude Code hooks system (events, config schema, stdin/stdout/exit-code contracts, blocking/decision semantics)", "keyBehaviors": ["PreToolUse fires BEFORE permission-mode checks: a hook returning permissionDecision:deny blocks the tool even in bypassPermissions mode or with --dangerously-skip-permissions. The reverse is NOT true \u2014 a hook allow does not override deny rules from any settings scope (incl managed). Hooks tighten but never loosen.", "Exit code 1 is NON-blocking (conventional Unix failure but treated as non-blocking error; action proceeds). ONLY exit code 2 blocks (exception: WorktreeCreate, where any non-zero aborts). Use exit 2 to enforce policy.", "Exit 2 and JSON output are mutually exclusive: exit 2 ignores stdout/JSON entirely. JSON is only parsed on exit 0. stdout must contain ONLY the JSON object (shell profile echoes break parsing \u2014 wrap in `if [[ $- == *i* ]]`).", "All matching hooks run to completion in parallel before results merge (one hook's deny does NOT stop sibling hooks). For PreToolUse the most restrictive wins: deny > defer > ask > allow. additionalContext from ALL hooks is kept and combined.", "PreToolUse previously used top-level decision/reason (now DEPRECATED for this event); legacy values 'approve'/'block' map to 'allow'/'deny'. Use hookSpecificOutput.permissionDecision instead. Other events (PostToolUse, Stop, etc.) STILL use top-level decision/reason as current format.", "Stop hooks have an 8-consecutive-block cap (CLAUDE_CODE_STOP_HOOK_BLOCK_CAP env raises it). Hooks receive stop_hook_active=true to detect re-entry and exit early. Stop hooks do NOT fire on user interrupts; API errors fire StopFailure instead (whose output/exit code are ignored).", "defer (PreToolUse) only works in -p non-interactive mode (v2.1.89+), only when Claude makes a SINGLE tool call in the turn, and exits with stop_reason:tool_deferred preserving deferred_tool_use{id,name,input}. Resume with claude -p --resume . If deferred tool gone on resume -> stop_reason:tool_deferred_unavailable + is_error.", "Output cap: additionalContext, systemMessage, and plain stdout capped at 10000 chars. Over-cap saved to a file in session dir and replaced with preview+path. description fields in background_tasks/session_crons capped at 1000 chars.", "PostToolUse updatedToolOutput must match the tool's output schema (e.g. Bash returns {stdout,stderr,interrupted,isImage}); mismatched shape is IGNORED and original used. MCP tool output passes through without schema validation. Telemetry captures ORIGINAL output before hook.", "when multiple PreToolUse hooks return updatedInput, the LAST to finish wins (non-deterministic since parallel). Avoid >1 hook modifying same tool's input.", "Matchers are CASE-SENSITIVE. A matcher with ONLY letters/digits/_/| is exact-match or |-separated exact list. Any other char => treated as JavaScript regex. mcp__memory (only letters/_) matches NO tool \u2014 must use mcp__memory__.* (the .* makes it a regex).", "MessageDisplay is display-only (transcript + Claude see original; only on-screen rendered text changes), runs per-batch-of-lines interactively (once per full message in -p/SDK). default timeout 10s. No matcher. Only fires for assistant text messages, not tool results or typed text.", "PermissionRequest does NOT fire in -p non-interactive mode \u2014 use PreToolUse for automated decisions. updatedPermissions entries: addRules/replaceRules/removeRules/setMode/addDirectories/removeDirectories, each with destination session|localSettings|projectSettings|userSettings. setMode bypassPermissions only if session launched with bypass available; never persisted as defaultMode.", "ConfigChange can block all sources EXCEPT policy_settings (managed settings always apply; hooks fire for audit but block ignored). SessionEnd has 1.5s default timeout, budget raisable to 60s via per-hook timeout or CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS.", "Hooks in skills/agents use YAML frontmatter (same nested format). For subagents, Stop hooks auto-convert to SubagentStop. `once:true` only honored in skill frontmatter (ignored in settings/agent frontmatter)."], "openQuestions": ["Exact JSON shape returned to the SDK for each exit-code/decision combination (e.g. the precise fields of the SDK result object beyond stop_reason:tool_deferred) \u2014 requires reading the claude-code-sdk TypeScript types, not just docs.", "Precise merge order when hooks from multiple scopes (user/project/local/managed/plugin/skill) collide on the same event+matcher \u2014 docs say plugin hooks 'merge' but the precedence on conflicts is underspecified.", "How `if` permission-rule syntax parses non-Bash tools (Edit(*.ts) etc.) at the token level \u2014 docs give a Bash table but not the full grammar for other tools."], "sources": [{"title": "Hooks reference - Claude Code Docs", "url": "https://code.claude.com/docs/en/hooks", "why": "Primary authoritative source: full reference for all 30+ hook events, config schema (matcher/handler fields), stdin JSON input, exit-code/JSON output contract, decision control table, async/prompt/agent/HTTP/mcp_tool hook types, and version-specific thresholds (v2.1.139/141/145/174/85/89, 10000-char cap, 1.5s SessionEnd, 8-block cap). Fetched via .md for complete untruncated content."}, {"title": "Automate actions with hooks - Claude Code Docs", "url": "https://code.claude.com/docs/en/hooks-guide", "why": "Official guide confirming exit-code semantics (0=proceed/2=block/other=non-blocking error), PreToolUse permissionDecision allow/deny/ask + defer precedence, hooks-and-permission-modes interaction (deny blocks even in bypassPermissions), prompt/agent hook ok/reason schema, hook-not-firing and Stop-cap troubleshooting."}, {"title": "Claude Code & Agent SDK Hooks (2026) - morphllm", "url": "https://www.morphllm.com/claude-code-hooks", "why": "Independent 2026 corroboration of the 30 hook events, stdin JSON shapes, exit codes, matchers, and timeouts; cross-checks official docs for currentness."}, {"title": "Claude Code Hooks: Complete Guide - claudefa.st", "url": "https://claudefa.st/blog/tools/hooks/hooks-guide", "why": "Community cross-check confirming PreToolUse exit 2 stops the tool and the decision/JSON-output control flow."}, {"title": "Hooks reference - Claude Wiki", "url": "https://claude-wiki.com/hooks-reference.html", "why": "Secondary corroboration of the command-vs-HTTP input/output contract and stdin/stdout/exit-code semantics."}], "summary": "Claude Code's hooks system lets users attach deterministic handlers (shell commands, HTTP endpoints, MCP tool calls, or LLM prompt/agent evaluations) to ~30 named lifecycle events (PreToolUse, PostToolUse, PostToolUseFailure, PostToolBatch, PermissionRequest, PermissionDenied, UserPromptSubmit, UserPromptExpansion, Notification, Stop, StopFailure, SubagentStart, SubagentStop, TeammateIdle, TaskCreated, TaskCompleted, SessionStart, Setup, SessionEnd, PreCompact, PostCompact, ConfigChange, CwdChanged, FileChanged, WorktreeCreate, WorktreeRemove, InstructionsLoaded, MessageDisplay, Elicitation, ElicitationResult). Hooks are configured in settings.json under a top-level `hooks` key (3-level nesting: event -> matcher group -> handler array). Command hooks receive event JSON on stdin and signal via exit code (0=success/JSON, 2=blocking error, other=non-blocking error) plus optional stdout JSON. The JSON output supports universal fields (continue, stopReason, suppressOutput, systemMessage, terminalSequence) plus event-specific decision fields: PreToolUse uses hookSpecificOutput.permissionDecision (allow/deny/ask/defer); PermissionRequest uses hookSpecificOutput.decision.behavior (allow/deny) + updatedPermissions; PostToolUse/Stop/etc use top-level decision:\"block\"+reason; PermissionDenied uses hookSpecificOutput.retry. PreToolUse precedence is deny>defer>ask>allow, and PreToolUse hooks fire BEFORE permission-mode checks (a deny hook blocks even in bypassPermissions). Hooks run in parallel with dedup; output capped at 10000 chars."}, "mcp": {"asOfDate": "2026-06", "claimsToVerify": ["scope precedence is Local > Project > User > Plugins > claude.ai connectors, and when a server name collides the ENTIRE entry from the highest-precedence source wins (fields are NOT merged across scopes); local-scope MCP servers are stored in ~/.claude.json while general local SETTINGS live in .claude/settings.local.json", "the MCP tool naming format is mcp____ (double-underscore separators), with plugin-bundled servers using mcp__plugin____ and any char outside A-Z a-z 0-9 _ - replaced by _", "MCP_TOOL_TIMEOUT default is ~28 hours; MAX_MCP_OUTPUT_TOKENS default is 25000 with a 10000-token warning threshold; per-server 'timeout' values below 1000 ms are ignored (fall through to MCP_TOOL_TIMEOUT) since v2.1.162 (before that they were floored to 1 second)"], "components": [{"config": "type: 'http' | 'streamable-http' (alias) | 'sse' | 'stdio' | 'ws'. Only http/sse/ws take 'url'. Only stdio takes 'command'+'args'+'env'. 'timeout' (ms, per-server hard tool-call wall-clock) and 'alwaysLoad' (bool) apply to all types.", "dataModel": "{ \"type\":\"http\", \"url\":\"https://...\", \"headers\":{...}, \"timeout\":600000, \"alwaysLoad\":true, \"headersHelper\":\"...\", \"oauth\":{...} }", "mechanism": "stdio: spawn child process, JSON-RPC over stdin/stdout, CLAUDE_PROJECT_DIR injected into child env, lifecycle = full session, NOT auto-reconnected. http: streamable-HTTP per MCP 2025-03-26 spec; POST for JSON-RPC, optional GET for SSE stream; supports OAuth; auto-reconnect with exponential backoff (up to 5 attempts, start 1s doubling). sse: deprecated legacy HTTP+SSE; same reconnection. ws: persistent bidirectional WebSocket (wss), header-only auth, no OAuth, configurable only via .mcp.json/add-json (NOT via --transport flag). Initial connection (v2.1.121+) retries up to 3 times on transient errors (5xx/refused/timeout); auth/404 errors not retried.", "name": "Transports", "purpose": "The 4 wire transports Claude Code uses to talk to MCP servers."}, {"config": "--scope flag on `claude mcp add` (local default / project / user). Precedence highest-first: Local > Project > User > Plugins > claude.ai connectors.", "dataModel": "~/.claude.json: { \"projects\": { \"/abs/project/path\": { \"mcpServers\": { \"\": {} } } } } (local & user scopes). project .mcp.json: { \"mcpServers\": { \"\": {} } }.", "mechanism": "Local: stored in ~/.claude.json under the current project's path key; private to user+project; DEFAULT scope (was named 'project' in old versions). Project: written to /.mcp.json; shared via VCS; requires per-user approval (prompt on load; reset via `claude mcp reset-project-choices`). User: stored in ~/.claude.json; cross-project; private to user (was named 'global' in old versions). On name collision across scopes, Claude Code connects ONCE using the single highest-precedence entry \u2014 entire entry wins, fields are NOT merged. Plugins and claude.ai connectors dedupe by endpoint (URL/command), the three scopes dedupe by name.", "name": "Configuration scopes", "purpose": "Where server definitions live and how precedence resolves duplicates."}, {"config": "Serverdef optional oauth: { clientId, callbackPort, clientSecret(stored in keychain only), authServerMetadataUrl (v2.1.64+, must be https), scopes (space-separated string, RFC 6749 format) }. CLI: --client-id, --client-secret (masked prompt; or MCP_CLIENT_SECRET env), --callback-port.", "dataModel": "OAuth discovery: GET /.well-known/oauth-protected-resource (RFC 9728) -> fallback /.well-known/oauth-authorization-server (RFC 8414). Supports Dynamic Client Registration, CIMD (Client ID Metadata Document), and pre-configured credentials.", "mechanism": "Triggered when server returns 401/403 (or WWW-Authenticate header). Flow: Claude opens browser -> user authorizes -> callback to http://localhost:PORT/callback (random port unless --callback-port pins it) -> token stored securely in OS keychain (macOS) or credentials file, auto-refreshed. oauth.scopes pins requested scopes (space-separated, overrides discovery); offline_access auto-appended if advertised. A configured headers.Authorization that the server rejects is a hard failure (no OAuth fallback). headersHelper runs arbitrary shell command at connect time, stdout = JSON object of string headers, 10s timeout, env vars CLAUDE_CODE_MCP_SERVER_NAME + CLAUDE_CODE_MCP_SERVER_URL injected; overrides static headers; requires workspace-trust dialog at project/local scope.", "name": "OAuth / Auth", "purpose": "Authenticating remote (HTTP/SSE) servers."}, {"config": "ENABLE_TOOL_SEARCH env: unset=default(defer), true=force defer+send beta header, auto / auto:N = threshold (<=10% context upfront), false=load all upfront.", "dataModel": "Tool exposed to model: name `mcp____`. tool_reference block (beta) carries deferred defs. alwaysLoad: true on server OR _meta['anthropic/alwaysLoad']=true on a tool forces upfront load.", "mechanism": "MCP tools are NOT all loaded into the system prompt upfront. By default Tool Search is ON: only tool NAMES + server instructions load at session start; Claude calls a `ToolSearch` tool to pull a specific tool's schema on demand (uses beta `tool_reference` blocks). Fallback (no tool search, e.g. Vertex, custom ANTHROPIC_BASE_URL, ENABLE_TOOL_SEARCH=false): a `WaitForMcpServers` tool makes Claude wait for connecting servers. Haiku models do NOT support tool_reference. ENABLE_TOOL_SEARCH=auto loads tools upfront if they fit within 10% of context window, defers overflow. `alwaysLoad:true` on a server forces all its tools upfront regardless of setting and blocks startup until connect (capped at 5s connect timeout). Server instructions and tool descriptions truncated at 2KB each.", "name": "Tool exposure & Tool Search", "purpose": "How MCP tools become callable by the model."}, {"config": "MAX_MCP_OUTPUT_TOKENS env (default 25000). Warning fires >10000 tokens. MCP_TIMEOUT env = startup timeout. MCP_TOOL_TIMEOUT env = global per-call default (~28h).", "dataModel": "Result text content subject to MAX_MCP_OUTPUT_TOKENS unless _meta['anthropic/maxResultSizeChars'] set (max 500000 chars). Image content ALWAYS subject to token limit regardless of annotation.", "mechanism": "When an MCP tool returns >10000 tokens, Claude Code warns. Default hard cap 25000 tokens (MAX_MCP_OUTPUT_TOKENS). Oversized text results persisted to disk and replaced with a file reference in the conversation. A tool can opt into a larger threshold via _meta['anthropic/maxResultSizeChars'] in its tools/list entry (hard ceiling 500000 chars) \u2014 applies to text content only.", "name": "Output limits", "purpose": "Bounding MCP tool output token usage."}, {"config": "Commands: claude mcp add, add-json, add-from-claude-desktop, list, get, remove, reset-project-choices, serve.", "dataModel": "/mcp shows: per-server tool count, pending/failed/rejected status, 'Show unused connectors' row (v2.1.161+).", "mechanism": "`/mcp` (in-session): lists servers with connection status (connected/pending/failed), tool count, flags servers advertising tools capability but exposing none, OAuth 'Clear authentication', approve pending project servers, retry failed. `claude mcp list` shows \u23f8 Pending approval for unapproved project servers; `claude mcp get ` shows pending/rejected status. `claude mcp serve` turns Claude Code itself into a stdio MCP server exposing View/Edit/LS etc. Reserved server name `workspace` is skipped at load with a warning.", "name": "/mcp command & CLI surface", "purpose": "User-facing management UI and commands."}, {"config": "Settings keys: allowedMcpServers, deniedMcpServers, allowManagedMcpServersOnly (managed-source-only), allowAllClaudeAiMcps (v2.1.149+, managed-source-only).", "dataModel": "Entry = { \"serverUrl\": \"https://*\" } | { \"serverCommand\": [\"npx\",\"-y\",\"pkg\"] } | { \"serverName\": \"label\" }. managed-mcp.json empty mcpServers => MCP disabled.", "mechanism": "managed-mcp.json (system path: macOS /Library/Application Support/ClaudeCode/, Linux /etc/claude-code/, Windows C:\\Program Files\\ClaudeCode\\; same format as .mcp.json; deploy via MDM/GPO, NOT server-managed settings): if present, ONLY those servers load (exclusive mode), user adds blocked with 'enterprise MCP configuration is active'. Evaluation order: merge allow/deny from all sources -> denylist match blocks unconditionally -> allowlist: remote needs serverUrl (or serverName only if no serverUrl entries exist), stdio needs serverCommand (or serverName only if no serverCommand entries). Commands match EXACTLY (all args in order). URLs support * wildcards anywhere incl scheme; hostname case-insensitive ignoring trailing dot; path case-sensitive.", "name": "Enterprise policy (managed MCP)", "purpose": "Centralized control over which MCP servers users may connect to."}, {"config": "ENABLE_CLAUDEAI_MCP_SERVERS=false disables. Anthropic-hosted connectors (Microsoft 365, Gmail, Google Calendar) require claude.ai-side connect (v2.1.162+).", "dataModel": "claude.ai connector precedence: lowest. A CC-configured server pointing at same URL hides the connector.", "mechanism": "Connectors added at claude.ai/customize/connectors auto-appear in CC when active auth method is Claude.ai subscription (NOT loaded if ANTHROPIC_API_KEY/AUTH_TOKEN/apiKeyHelper/Bedrock/Vertex active). Fetched at runtime, shown with claude.ai indicator. Unused connectors collapsed behind 'Show unused connectors' (v2.1.161+).", "name": "claude.ai connectors", "purpose": "MCP servers configured in the claude.ai web app."}], "confidence": "high", "dimension": "mcp", "externalInterfaces": ["CLI: claude mcp add [--transport http|sse|stdio] [--scope local|project|user] [--header \"K: V\"] [--env K=V] [--client-id] [--client-secret] [--callback-port N] [--channels] [args...]>", "CLI: claude mcp add-json '' [--scope user] [--client-secret]", "CLI: claude mcp add-from-claude-desktop", "CLI: claude mcp list | get | remove | reset-project-choices | serve", "In-session slash command: /mcp (status panel, OAuth, retry, clear auth)", "MCP prompt as slash command: /mcp____ [args]", "Resource @-mention: @:://", "Config files: .mcp.json (project root), ~/.claude.json (local+user), managed-mcp.json (system path)", "Env vars: MCP_TIMEOUT, MCP_TOOL_TIMEOUT, MAX_MCP_OUTPUT_TOKENS, ENABLE_TOOL_SEARCH, ENABLE_CLAUDEAI_MCP_SERVERS, MCP_CLIENT_SECRET, CLAUDE_PROJECT_DIR (injected into stdio child), CLAUDE_CODE_MCP_SERVER_NAME/URL (injected into headersHelper)", "Agent SDK: options.mcpServers{...}, options.allowedTools=[\"mcp____*\"]", "Tool name surface: mcp____ ; plugin: mcp__plugin____"], "keyBehaviors": ["Scope name history: current 'local' was 'project'; current 'user' was 'global'. 'project' scope now means the shared .mcp.json file. Do not confuse MCP local scope (lives in ~/.claude.json) with general local settings (live in .claude/settings.local.json).", "Precedence on duplicate is winner-take-all per entire server entry (Local > Project > User > Plugins > claude.ai); fields are NOT merged. The 3 scopes dedupe by name; plugins and connectors dedupe by endpoint (URL/command).", "Project-scoped servers from .mcp.json REQUIRE interactive approval before use; status shows \u23f8 Pending approval until approved / \u2717 Rejected. Reset via `claude mcp reset-project-choices`.", "Server name `workspace` is reserved/skipped at load with a rename warning.", "streamable-http is an alias for http in the `type` field (so configs copied from MCP docs work unchanged). SSE is deprecated; http preferred.", "WebSocket (`type: ws`) cannot be added via `claude mcp add --transport` \u2014 only via .mcp.json or add-json. WS has no OAuth (header-only). HTTP is the only transport supporting OAuth + the --transport flag.", "Stdio servers are NOT auto-reconnected (local processes); http/sse auto-reconnect up to 5 attempts, 1s->doubling backoff. Initial connect retries up to 3x on transient errors since v2.1.121.", "Per-server `timeout` (ms) is a hard per-call wall-clock; progress notifications do NOT extend it. Values <1000 are IGNORED (fall through to MCP_TOOL_TIMEOUT default ~28h) since v2.1.162; before v2.1.162 they were floored to 1 second. HTTP/SSE first-byte budget min 60s.", "MAX_MCP_OUTPUT_TOKENS default 25000; warning at >10000 tokens. Oversized text persisted to disk + replaced by file ref unless tool sets _meta['anthropic/maxResultSizeChars'] (ceiling 500000). Image content always subject to token cap regardless.", "Tool Search ON by default: tools deferred, discovered via `ToolSearch` tool using beta `tool_reference` blocks. Disabled by default on Vertex AI and when ANTHROPIC_BASE_URL is non-first-party. Haiku lacks tool_reference support. ENABLE_TOOL_SEARCH=auto = upfront if <=10% context. alwaysLoad:true forces upfront + blocks startup (5s cap).", "Env var expansion `${VAR}` and `${VAR:-default}` works in command/args/env/url/headers of .mcp.json. Missing var with no default = config parse failure. CLAUDE_PROJECT_DIR must use a default like ${CLAUDE_PROJECT_DIR:-.} in project/user .mcp.json (plugin configs substitute it directly).", "MCP resources: `@server:protocol://path` @-mention; Claude Code auto-provides tools to list/read resources when server supports them; fuzzy-searched in @ autocomplete. MCP prompts: surface as `/mcp____ [args]` slash commands; names normalized (spaces->_).", "Dynamic updates: servers sending MCP `list_changed` notification cause auto-refresh of tools/prompts/resources without reconnect.", "Elicitation: servers can request structured input mid-task (form or URL mode) via MCP elicitation; auto-displayed; auto-respond via Elicitation hook.", "OAuth precedence: oauth.scopes > authServerMetadataUrl > discovered /.well-known scopes. offline_access auto-appended if advertised. 403 insufficient_scope triggers re-auth with same pinned scopes. headersHelper runs fresh each connect (no caching), overrides static headers, needs workspace trust at project/local scope.", "claude.ai connectors only load when active auth = Claude.ai subscription; disabled by ANTHROPIC_API_KEY/AUTH_TOKEN/apiKeyHelper/Bedrock/Vertex. ENABLE_CLAUDEAI_MCP_SERVERS=false disables. Some Anthropic-hosted connectors (MS 365, Gmail, Google Calendar) require claude.ai-side connect (v2.1.162+).", "Enterprise allowlist semantics: allowlist with only serverName entries is NOT a security control (user can name any server 'github'). serverUrl/serverCommand entries make name entries stop matching. Denylist always wins, always merges from all sources.", "managed-mcp.json empty mcpServers = MCP fully disabled; suppresses claude.ai connectors unless allowAllClaudeAiMcps:true (managed-source-only, v2.1.149+)."], "openQuestions": ["Exact internal JSON-RPC initialize negotiation params and protocol version string Claude Code sends (likely '2025-03-26' or '2025-06-18'); not in public docs.", "Precise file/key format of the OAuth token store on disk and per-OS keychain service name.", "Whether `headersHelper` JSON merge is shallow-only and exact precedence vs `headers` beyond 'same name overrides'.", "Exact behavior of `WaitForMcpServers` internal tool name and its output schema when tool search is disabled."], "sources": [{"title": "Connect Claude Code to tools via MCP \u2014 official docs", "url": "https://code.claude.com/docs/en/mcp", "why": "Primary source: transports, scopes, tool naming, OAuth, output limits, tool search, resources, prompts, elicitation, channels \u2014 the entire MCP subsystem reference."}, {"title": "Control MCP server access for your organization (managed-mcp) \u2014 official docs", "url": "https://code.claude.com/docs/en/managed-mcp", "why": "Authoritative on managed-mcp.json paths/format, allowedMcpServers/deniedMcpServers matching rules, allowManagedMcpServersOnly, evaluation order, allowAllClaudeAiMcps."}, {"title": "MCP server-types deep dive \u2014 anthropics/claude-code repo", "url": "https://github.com/anthropics/claude-code/blob/main/plugins/plugin-dev/skills/mcp-integration/references/server-types.md", "why": "First-party repo reference documenting stdio/sse/http/ws config shapes, lifecycles, ${CLAUDE_PLUGIN_ROOT} expansion, and comparison matrix."}, {"title": "Connect to external tools with MCP (Agent SDK) \u2014 official docs", "url": "https://code.claude.com/docs/en/agent-sdk/mcp", "why": "Confirms exact tool naming convention mcp____, mcpServers option, allowedTools wildcard, .mcp.json loading via settingSources."}, {"title": "MCP Transports specification \u2014 modelcontextprotocol.io", "url": "https://modelcontextprotocol.io/specification/2025-03-26/basic/transports", "why": "Underlying protocol spec for stdio, HTTP+SSE, and streamable-HTTP semantics that Claude Code implements."}, {"title": "Streamable HTTP specification (2025-03-26 / draft) \u2014 modelcontextprotocol.io", "url": "https://modelcontextprotocol.io/specification/draft/basic/transports/streamable-http", "why": "Confirms streamable-http replaced HTTP+SSE in protocol version 2025-03-26, which Claude Code aliases to http."}], "summary": "Claude Code's MCP integration (src/services/mcp/) connects to external MCP servers over four transports (stdio, SSE [deprecated], HTTP/streamable-HTTP, WebSocket), discovers their tools/resources/prompts, and exposes them to the model with prefixed names. Servers are configured at three scopes (local, project via .mcp.json, user via ~/.claude.json) plus plugins and claude.ai connectors, with a strict precedence (Local > Project > User > Plugins > claude.ai) that connects to a server once using the single highest-precedence entry (no field merging). MCP tools are named mcp____ (plugin-bundled tools use mcp__plugin____), and by default are NOT loaded upfront \u2014 Tool Search defers tool definitions until Claude invokes a ToolSearch call, so context usage stays low. HTTP/SSE servers support OAuth 2.0 (with dynamic client registration, CIMD, or pre-configured credentials), automatic token refresh via keychain, and dynamic headersHelper scripts; stdio servers run as child processes with CLAUDE_PROJECT_DIR injected. Enterprise control is layered on via managed-mcp.json (exclusive fixed set), allowedMcpServers/deniedMcpServers allow/denylists, and managed settings. The /mcp slash command and `claude mcp list/get/add/remove` CLI manage the lifecycle, connection status, and OAuth flows."}, "subagents-task": {"asOfDate": "2026-06", "claimsToVerify": ["Task tool was renamed to Agent in v2.1.63 (still aliased as Task in system:init tools list, result.permission_denials[].tool_name, and permission rules); current SDK emits Agent in tool_use blocks", "v2.1.172 introduced nested subagents: foreground subagents can spawn at any depth, but a background subagent at depth 5 does NOT receive the Agent tool and cannot spawn further (fixed at 5, not configurable)", "The Agent tool prompt-only return contract: parent receives ONLY the subagent's final message verbatim as the tool_result (no intermediate tool calls/reasoning); built-in Explore and Plan are one-shot and return NO agentId so they cannot be resumed via SendMessage"], "components": [{"config": "type: Agent; name 'Agent'; legacy alias 'Task' for backward compat with older transcripts/permission rules/hook configs.", "dataModel": "TaskInput (zod, feature-gated):\nBase (always present): description (string, required, 3-5 word summary), prompt (string, required, full task instructions), subagent_type (string, optional), model (enum sonnet|opus|haiku, optional), run_in_background (boolean, optional).\nFull schema additions (when swarm/isolation features active): name (string, makes agent addressable via SendMessage({to:name})), team_name (string), mode (PermissionMode), isolation (enum worktree|remote), cwd (string, absolute path override).\nFeature-gated omissions: when fork active OR CLAUDE_CODE_DISABLE_BACKGROUND_TASKS set, run_in_background is stripped; when KAIROS flag off, cwd is omitted. The model never sees fields it cannot use.", "mechanism": "Registered via buildTool() factory under name \"Agent\" with legacy alias \"Task\". call() runs a 10-step decision tree BEFORE runAgent(): (1) teammate? (team_name+name set) -> spawnTeammate(); (2) resolve effective agent type: subagent_type provided -> use it; omitted+fork enabled -> undefined (fork path); omitted+fork disabled -> \"general-purpose\" default; (3) fork guard check; (4) resolve definition from activeAgents, filtering by permission deny rules + allowedAgentTypes, throw if not found/denied; (5) wait up to 30s for required MCP servers; (6) resolve isolation (param overrides def): remote->teleportToRemote(), worktree->createAgentWorktree(), null->normal; (7) sync-vs-async decision: shouldRunAsync = run_in_background || selectedAgent.background || isCoordinator || forceAsync || isProactiveActive; (8) assemble worker tool pool; (9) build system prompt + prompt messages; (10) execute (async -> registerAsyncAgent + void lifecycle; sync -> iterate runAgent inline). The dynamic prompt from getPrompt() is context-sensitive (lists available agents as an attachment message to avoid busting prompt cache, NOT inline in tool description).", "name": "AgentTool (a.k.a. Task tool)", "purpose": "The model-facing meta-tool that spawns a child subagent. The ONLY tool the parent model calls to delegate work; everything below flows from it."}, {"config": "name format: lowercase + hyphens (filename need not match name). model resolution precedence: CLAUDE_CODE_SUBAGENT_MODEL env -> per-invocation model param -> frontmatter model -> main model. plugins IGNORE hooks, mcpServers, permissionMode fields (security).", "dataModel": "---\nname: # REQUIRED\n # REQUIRED (when to delegate)\ntools: Read, Glob, Grep # optional comma-list or YAML array; '*' = all\ndisallowedTools: Write, Edit # denylist; applied BEFORE tools allowlist resolves\nmodel: sonnet|opus|haiku|fable||inherit # default: inherit\npermissionMode: default|acceptEdits|auto|dontAsk|bypassPermissions|plan\nmaxTurns: \nskills: [skill-name, ...] # full content injected, not just description\nmcpServers: [{: {type,command,args}}, \"\"]\nhooks: {PreToolUse|PostToolUse|Stop: [{matcher, hooks:[{type:command,command}]}]}\nmemory: user|project|local # dir at ~/.claude/agent-memory// etc.\nbackground: true|false # default false\neffort: low|medium|high|xhigh|max|\nisolation: worktree # temp git worktree branched from default branch\ncolor: red|blue|green|yellow|purple|orange|pink|cyan\ninitialPrompt: # auto-submitted as first user turn when agent runs as MAIN session (--agent)\n---\n", "mechanism": "Loaded at session START only (restart required for disk edits; /agents UI edits take effect immediately). Five scope locations with priority: (1) Managed settings org-wide [highest], (2) --agents CLI flag JSON [session], (3) .claude/agents/ [project], (4) ~/.claude/agents/ [user], (5) plugin agents/ dir [lowest]. Project & user scanned RECURSIVELY (subfolders OK, identity from name field only \u2014 keep names unique within a scope or one is silently discarded). Plugin subfolders BECOME part of the scoped id (agents/review/security.md in plugin my-plugin -> my-plugin:review:security). --agents JSON uses same fields, with `prompt` field = markdown body. Programmatic SDK agents take precedence over filesystem agents with the same name.", "name": "AgentDefinition file format (.claude/agents/*.md)", "purpose": "Declarative definition of a subagent: identity, capabilities, system prompt, and lifecycle config. Single source reused across subagent invocation, @-mention, --agent main-thread mode, and agent-team teammates."}, {"config": "Explore & Plan have omitClaudeMd:true (strip CLAUDE.md + git status, saves tokens; only these two skip them, NO frontmatter field to change). Explore/Plan are ONE_SHOT (no agentId returned, no SendMessage instructions, no usage trailer). Agent tool is in default disallowedTools for general-purpose to prevent exponential fan-out.", "dataModel": "Type registry built dynamically by getBuiltInAgents() gated by feature flags + GrowthBook experiments (BUILTIN_EXPLORE_PLAN_AGENTS + tengu_amber_stoat for Explore/Plan; VERIFICATION_AGENT + tengu_hive_evidence for Verification).", "mechanism": "General-purpose: full tools (minus Agent), no CLAUDE.md omission, model=getDefaultSubagentModel(). Explore: Haiku, read-only (FileEdit/FileWrite/NotebookEdit/Agent removed), CRITICAL: READ-ONLY MODE in prompt, one-shot \u2014 most spawned (~34M/week). Plan: 'inherit' model, read-only, 4-step structured process ending with Critical Files list, one-shot. Verification: read-only, 'inherit', background:true always, red, ~130-line anti-avoidance prompt, criticalSystemReminder_EXPERIMENTAL guardrail. statusline-setup: Sonnet, Read+Edit only, orange. claude-code-guide: Haiku, dontAsk mode, excluded when entrypoint=SDK. Disable all built-ins via CLAUDE_AGENT_SDK_DISABLE_BUILTIN_AGENTS=1; deny specific via permissions.deny=[\"Agent(Explore)\"] or --disallowedTools.", "name": "Built-in subagent registry (6 types)", "purpose": "The always-available agents Claude delegates to automatically. Cover exploration, planning, general work, verification, and UI helpers."}, {"config": "Thinking disabled for normal agents ({type:'disabled'}) to control cost; fork agents inherit thinkingConfig for cache identity. Explore/Plan skip CLAUDE.md & git status (gate tengu_slim_subagent_claudemd defaults true).", "dataModel": "runAgent signature: {agentDefinition, promptMessages, toolUseContext, canUseTool, isAsync, canShowPermissionPrompts, forkContextMessages, querySource, override, model, maxTurns, availableTools, allowedTools, onCacheSafeParams, useExactTools, worktreePath, description}. agentId branded type AgentId = `agent-`.", "mechanism": "15 steps: (1) Model resolution chain caller-override > agent-def > parent-model > default (getAgentModel handles 'inherit'); (2) agentId creation (override.agentId or createAgentId() -> agent-); (3) context prep \u2014 fork clones parent history via filterIncompleteToolCalls() (strips tool_use blocks lacking matching tool_result, else API rejects); fresh agents start empty; file-state cache fork=clone, fresh=createWithSizeLimit; (4) CLAUDE.md stripping for read-only agents; (5) permission isolation \u2014 custom getAppState() overlays agent mode unless parent is bypassPermissions/acceptEdits/auto (parent wins); async agents get shouldAvoidPermissionPrompts:true; allowedTools replaces session allow rules but preserves SDK --allowedTools; (6) tool resolution (fork: useExactTools passthrough for byte-identical cache prefix; else resolveAgentTools applies tools/disallowedTools/ASYNC_AGENT_ALLOWED_TOOLS); (7) system prompt (fork uses override.systemPrompt = parent's exact rendered bytes; else getAgentSystemPrompt + env details); (8) abort controller isolation (async=new unlinked controller; sync=parent's shared controller); (9) register frontmatter hooks scoped to agentId, Stop->SubagentStop conversion, strictPluginOnlyCustomization skips user agent hooks; (10) preload skills (3-strategy name resolution) as user messages; (11) MCP init (name refs shared/memoized, inline created+cleaned up); (12) createSubagentContext (sync shares setAppState, async isolates it; both share setAppStateForTasks + setResponseLength; messages own array); (13) onCacheSafeParams callback for background summarization; (14) query() loop drives child conversation, yields Messages, each recorded to sidechain transcript JSONL O(1); (15) finally{} cleanup: mcpCleanup, clearSessionHooks, cleanupAgentTracking, readFileState.clear(), initialMessages.length=0, unregisterPerfettoAgent, clearAgentTranscriptSubdir, remove agent's todos, killShellTasksForAgent.", "name": "runAgent() 15-step lifecycle", "purpose": "The single async-generator function that creates and drives a subagent's entire execution context. Every subagent type (fork/built-in/custom/coordinator-worker) flows through it."}, {"config": "7 types: local_bash(b), local_agent(a), remote_agent(r), in_process_teammate(t), local_workflow(w), monitor_mcp(m), dream(d). 5 statuses: pending->running->{completed|failed|killed}. isTerminalTaskStatus() guards message injection.", "dataModel": "TaskStateBase: {id (prefixed random, ~2.8T combos), type, status, description, toolUseId, startTime, endTime?, totalPausedMs?, outputFile (disk path), outputOffset (read cursor), notified (dedup flag)}. LocalAgentTaskState adds: agentId, prompt, selectedAgent, agentType, model?, abortController?, pendingMessages[], isBackgrounded, retain, diskLoaded, evictAfter?, progress?, lastReportedToolCount, lastReportedTokenCount. AppState.tasks is flat Record (no parent-child tree).", "mechanism": "Three comms channels: (1) Disk output files (outputFile symlink to JSONL transcript, read incrementally via outputOffset; TaskOutputTool polls, block:true polls until terminal/timeout); (2) Task notifications ( XML injected as user-role message in parent conversation, deduped via notified flag); (3) Command queue pendingMessages[] drained at tool-round boundaries by drainPendingMessages() (messages arrive BETWEEN tool rounds, never mid-execution). ProgressTracker tracks toolUseCount, latestInputTokens (cumulative-latest), cumulativeOutputTokens (summed), recentActivities (cap 5). Backgrounding mid-execution: Promise.race between next-message and background-signal; foreground iterator.return() triggers cleanup, re-spawn as async with same ID, flip isBackgrounded.", "name": "Task state machine + async communication", "purpose": "Unified state model for all background operations (shell, subagent, teammate, remote, workflow, mcp-monitor, dream). Backbone of background agent tracking, progress, and result delivery."}, {"config": "Requires CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1 (experimental). Stored on disk: team config ~/.claude/teams/{team-name}/config.json (members array with name, agentId, agentType), task list ~/.claude/tasks/{team-name}/. Both removed on cleanup. NO project-level teams.json recognized.", "dataModel": "InProcessTeammateTaskState: type 'in_process_teammate', identity, prompt, messages? (UI cap 50), pendingUserMessages[], isIdle, shutdownRequested, awaitingPlanApproval, permissionMode, onIdleCallbacks?, currentWorkAbortController (distinct from main kill controller \u2014 cancels current turn only, redirect pattern). TeamContext: {teamName, teammates:{[id]:{name,color}}}. agentNameRegistry: Map.", "mechanism": "Leader spawns teammates (in-process via AsyncLocalStorage, or split-pane via tmux/iTerm2). SendMessage routes by `to`: bridge: (remote relay, needs consent) > uds: (local IPC) > agentNameRegistry lookup (running->queuePendingMessage; terminal->resumeAgentBackground; not in AppState->resume from disk transcript) > team mailbox fallback. Mailbox = writeToMailbox() file per recipient; to:\"*\" broadcasts to all members except sender (no fan-out opt). Structured protocols: shutdown_request/response (cooperative, teammate may reject), plan_approval_response (only lead approves). Auto-resume: SendMessage to dead agent reads sidechain JSONL, filters orphaned thinking/tool blocks, rebuilds content-replacement state, re-registers as background task, runs runAgent() with restored history + new message. Workers cannot spawn sub-teams (INTERNAL_WORKER_TOOLS deny set). Known bug: SendMessage by agent NAME for completed/resumed agents may silently fail \u2014 agent ID is reliable (GitHub issue #42999).", "name": "SendMessage + agent teams (inter-agent messaging)", "purpose": "Universal communication primitive across subagents, coordinator workers, swarm teammates, and remote/UDS peers. Single tool, 4 routing modes by shape of `to` field."}, {"config": "builtIn always registered in interactive sessions; disable specific via permissions.deny=[\"Agent()\"] or --disallowedTools. Resume requires non-one-shot agent (general-purpose/custom); Explore/Plan cannot resume. CLAUDE_CODE_DISABLE_BACKGROUND_TASKS=1 disables all background; CLAUDE_CODE_FORK_SUBAGENT=1 forces all spawns to background.", "dataModel": "Agent tool output discriminated union: {status:'completed', prompt, ...AgentToolResult} | {status:'async_launched', agentId, description, prompt, outputFile}. (Internal-only TeammateSpawnedOutput & RemoteLaunchedOutput excluded from exported schema for dead-code-elimination.)", "mechanism": "When subagent completes, Agent tool result includes text block 'agentId: '. Explore/Plan are one-shot (no agentId, cannot resume). To resume: parent uses SendMessage({to: agentId}) (only available with CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1) OR SDK resumes by passing resume: + naming agentId in prompt. Transcripts at ~/.claude/projects/{project}/{sessionId}/subagents/agent-{agentId}.jsonl \u2014 persist independently of main conversation (main compaction doesn't touch them); cleaned up via cleanupPeriodDays (default 30). Stopped subagent receiving SendMessage auto-resumes in background without new Agent invocation.", "name": "Termination & resume contract", "purpose": "How subagents end, how their result returns to parent, and how they can be continued."}], "confidence": "high", "dimension": "subagents-task", "externalInterfaces": ["Tool name: 'Agent' (primary), 'Task' (legacy alias) \u2014 emitted in tool_use blocks; system:init tools list & result.permission_denials[].tool_name still use 'Task' in some SDK versions", "Agent tool input: {description, prompt, subagent_type?, model?, run_in_background?, name?, team_name?, mode?, isolation?, cwd?}", "Agent tool output: {status:'completed', prompt, ...result} | {status:'async_launched', agentId, description, prompt, outputFile}", "SendMessage tool input: {to: name|'*'|'uds:'|'bridge:'|agentId, summary?, message: string | {type:'shutdown_request'|'shutdown_response'|'plan_approval_response', ...}}", "TaskStop tool input: {task_id?, shell_id? (deprecated)} \u2014 legacy alias 'KillShell'", "TaskOutput tool input: {task_id, block=true, timeout=30000}", "File formats: .claude/agents/*.md & ~/.claude/agents/*.md (YAML frontmatter + markdown body); --agents JSON (prompt field = body); subagent transcripts ~/.claude/projects/{project}/{sessionId}/subagents/agent-{agentId}.jsonl", "CLI flags: --agent , --agents '', --disallowedTools 'Agent(Explore)', --teammate-mode in-process|tmux|auto, settings 'agent' & 'teammateMode'", "Env vars: CLAUDE_CODE_SUBAGENT_MODEL, CLAUDE_CODE_DISABLE_BACKGROUND_TASKS, CLAUDE_CODE_FORK_SUBAGENT, CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS, CLAUDE_AGENT_SDK_DISABLE_BUILTIN_AGENTS, CLAUDE_CODE_COORDINATOR_MODE", "Permission rule forms: 'Agent', 'Agent(worker, researcher)' (allowlist only when main --agent), 'Agent(Explore)' in permissions.deny"], "keyBehaviors": ["The Task->Agent rename (v2.1.63) is a BREAKING CHANGE for hook scripts: PreToolUse/PostToolUse hooks that string-match the tool name must now check BOTH 'Task' and 'Agent' for cross-version compatibility. The SDK still emits 'Agent' in tool_use blocks but 'Task' in system:init tools list and result.permission_denials[].tool_name.", "Model resolution order is FIXED and non-obvious: CLAUDE_CODE_SUBAGENT_MODEL env > per-invocation model param > frontmatter model > main conversation model. 'inherit' resolves to parent's model. Explore defaults to Haiku for external users via GrowthBook gating.", "Subagent receives ONLY: its own system prompt + Agent tool prompt + project CLAUDE.md (except Explore/Plan) + git status snapshot (except Explore/Plan) + preloaded skills. It does NOT receive parent conversation history, parent system prompt, or preloaded skill content unless in AgentDefinition.skills. The parent->child channel is ONLY the prompt string.", "The parent receives the subagent's FINAL message VERBATIM as the Agent tool_result (may be summarized by parent in its own response). To preserve verbatim subagent output in user-facing response, instruct the main query() to do so \u2014 the contract is not automatic.", "Foreground subagents share the parent's abort controller (Escape kills both); background subagents get an independent controller (Escape on parent does NOT kill them). Backgrounding mid-execution re-spawns with same ID and flips isBackgrounded.", "Background subagents auto-deny ANY tool call that would prompt (no terminal attached); foreground passes prompts through to user. Named/background subagents auto-deny prompting tools; 'bubble' mode is the exception that surfaces prompts to parent terminal.", "If 'Agent' is omitted from a subagent's tools list, it CANNOT spawn nested subagents. 'Agent(worker, researcher)' allowlist syntax ONLY applies when running as main thread via --agent; in a subagent definition, any type list in parens is IGNORED (bare Agent enables nesting).", "Nested subagent depth limit (v2.1.172): foreground can spawn at any depth (self-limiting via blocking); background subagent at depth 5 gets NO Agent tool and cannot spawn further. The limit is fixed and NOT configurable. Fork still cannot spawn another fork (querySource==='agent:builtin:fork' guard + isInForkChild scan for ).", "Permission mode cascade: if parent is bypassPermissions, acceptEdits, or auto mode, the PARENT'S mode always wins \u2014 the subagent's permissionMode frontmatter is IGNORED. Otherwise the agent's mode applies. This prevents a custom agent from downgrading security the user explicitly set.", "Auto-resume via SendMessage: sending a message to a completed/killed agent transparently resurrects it from its disk JSONL transcript (filters orphaned thinking/tool blocks, rebuilds content-replacement state for cache stability). Coordinators do not need to track agent liveness. CAVEAT: GitHub issue #42999 reports SendMessage by agent NAME silently fails for some resume paths \u2014 agent ID is the reliable target.", "transcripts persist separately from main conversation: main-conversation compaction does NOT touch subagent transcripts. They survive session restart and are cleaned up via cleanupPeriodDays (default 30 days). Sidechain recording is O(1) per message (append-only, previous-UUID reference).", "Plugin subagents CANNOT use hooks, mcpServers, or permissionMode frontmatter fields (silently ignored for security). Copy into .claude/agents/ if you need them. As of v2.1.153, main-session MCP restrictions (--strict-mcp-config, --bare, managed MCP, allowedMcpServers/deniedMcpServers) also cover servers declared in subagent frontmatter (but --strict-mcp-config does NOT filter inline --agents/SDK agents servers \u2014 those are explicit caller input).", "Filesystem-based agents load at SESSION START only. Editing a .claude/agents/*.md on disk requires a session restart. /agents UI edits take effect immediately. Windows: very long subagent prompts may fail (>8191 char command-line limit) \u2014 use filesystem agents.", "Explore/Plan are the ONLY agents that skip CLAUDE.md and git status, and there is NO frontmatter field to change which agents skip them. If a rule must reach Explore/Plan, restate it in the delegation prompt.", "In agent teams: subagent definitions used as teammates apply ONLY tools + model; the body is APPENDED to teammate system prompt (not replacing). skills and mcpServers fields are NOT applied on the teammate path (teammates load those from project/user settings like a regular session). Team coordination tools (SendMessage, task tools) are ALWAYS available even when tools restricts others."], "openQuestions": ["Exact content/wording of the Explore agent's 'CRITICAL: READ-ONLY MODE' system prompt section and the general-purpose system prompt (described but not quoted verbatim in sources)", "Full list and exact gating conditions of the ~12 feature flags + GrowthBook experiments (FORK_SUBAGENT, BUILTIN_EXPLORE_PLAN_AGENTS, VERIFICATION_AGENT, KAIROS, TRANSCRIPT_CLASSIFIER, PROACTIVE, tengu_amber_stoat, tengu_hive_evidence, tengu_slim_subagent_claudemd, tengu_scratch) \u2014 which are compile-time vs runtime A/B", "Exact AgentProgress type fields and the ASYNC_AGENT_ALLOWED_TOOLS allowlist contents", "Whether the 'dream' task type (speculative background thinking) and 'local_workflow' Workflow tool are GA or still feature-gated as of v2.1.175", "Whether coordinator mode (CLAUDE_CODE_COORDINATOR_MODE) is GA or still behind COORDINATOR_MODE feature flag for general users"], "sources": [{"title": "Create custom subagents \u2014 Claude Code Docs (official)", "url": "https://code.claude.com/docs/en/sub-agents", "why": "PRIMARY source. Full frontmatter field table, 5 scope priorities, built-in subagent details (Explore/Plan/general-purpose), isolation:worktree, what-loads-at-startup matrix, resume contract, nested depth rules."}, {"title": "Subagents in the SDK \u2014 Claude Code Docs (official)", "url": "https://code.claude.com/docs/en/agent-sdk/subagents", "why": "AgentDefinition field table (description/prompt/tools/disallowedTools/model/skills/memory/mcpServers/initialPrompt/maxTurns/background/effort/permissionMode), what-subagents-inherit matrix, v2.1.63 Task->Agent rename + dual-name detection guidance, resume via agentId, v2.1.172 nested depth rule."}, {"title": "Orchestrate teams of Claude Code sessions \u2014 Claude Code Docs (official)", "url": "https://code.claude.com/docs/en/agent-teams", "why": "Agent teams architecture (lead/teammates/task list/mailbox), team+task disk paths, subagent-definitions-for-teammates (tools+model honored, body appended, skills/mcpServers ignored), mailbox messaging, plan approval protocol, v2.1.32 minimum."}, {"title": "Ch 8. Spawning Sub-Agents \u2014 Claude Code from Source", "url": "https://claude-code-from-source.com/ch08-sub-agents/", "why": "Authoritative internals: AgentTool base+full input schema with feature-gated field omissions, 10-step call() decision tree, full 15-step runAgent() lifecycle, 6 built-in agent types with feature gates, fork guard mechanics, output schema discriminated union."}, {"title": "Ch 10. Tasks, Coordination, and Swarms \u2014 Claude Code from Source", "url": "https://claude-code-from-source.com/ch10-coordination/", "why": "Task state machine (7 types, 5 statuses, TaskStateBase/LocalAgentTaskState fields), 3 background comms channels (disk/notifications/queue), SendMessage 4-mode routing + auto-resume, TaskStop kill switch, coordinator mode internals, swarm mailbox."}, {"title": "Claude Code changelog \u2014 Claude Code Docs (official)", "url": "https://code.claude.com/docs/en/changelog", "why": "Confirms version-specific facts: v2.1.172 'Sub-agents can now spawn sub-agents up to 5 levels deep'; Workflow tool agent() attribution."}, {"title": "v2.1.63 Task->Agent tool rename breaking hooks \u2014 GitHub Issue #29677", "url": "https://github.com/anthropics/claude-code/issues/29677", "why": "Confirms the v2.1.63 Task->Agent rename is a breaking change for PreToolUse/PostToolUse hook scripts that check the tool name."}, {"title": "SendMessage silently fails when using agent name \u2014 GitHub Issue #42999", "url": "https://github.com/anthropics/claude-code/issues/42999", "why": "Documents the gotcha that SendMessage with agent NAME may silently fail for resuming completed agents; only agent ID works reliably."}, {"title": "Claude Code v2.1.172 Release Notes \u2014 claudeupdates.dev", "url": "https://www.claudeupdates.dev/version/2.1.172", "why": "Independent corroboration of v2.1.172 nested subagent (5-level) release and the agent-lifecycle stability fixes (stuck-active panel, fixed background agent project-settings isolation)."}, {"title": "Task tool input schema (TaskArgs) \u2014 letta-ai/letta-code Task.ts", "url": "https://github.com/letta-ai/letta-code/blob/32e042d5/src/tools/impl/Task.ts", "why": "Third-party reimplementation confirming exact Task tool args: command/subagent_type/prompt/description/model/agent_id/conversation_id/run_in_background, validating the schema shape from primary sources."}], "summary": "Claude Code's subagent system is orchestrated by a single model-facing meta-tool: the \"Agent\" tool (legacy alias \"Task\", renamed in v2.1.63). When the parent model calls Agent with {subagent_type, prompt, description, model, run_in_background}, it spawns a child agent that runs its own full conversation loop in an isolated context window with its own system prompt, tool pool, permission boundary, and abort controller. The child does its work and returns ONLY its final message verbatim as the tool result \u2014 the parent never sees intermediate tool calls or reasoning. Subagents are defined as Markdown files with YAML frontmatter at .claude/agents/ (project), ~/.claude/agents/ (user), via --agents CLI JSON, in plugins, or via managed settings, with a fixed 5-level precedence. Each subagent's \"description\" field drives automatic delegation, but users can force invocation via natural-language naming, @-mention, or --agent (run whole session as that agent). Parallel spawning happens naturally when the model emits multiple Agent tool calls in one turn; background subagents (run_in_background:true or background:true frontmatter or Ctrl+B) run concurrently and auto-deny any prompt. As of v2.1.172, subagents can spawn nested subagents (foreground at any depth, background capped at depth 5). Communication beyond prompt/result uses the \"SendMessage\" tool (only with CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1), which routes by recipient name/ID/UDS-socket/bridge-session and auto-resumes dead agents from their disk transcript."}, "skills": {"asOfDate": "2026-06", "claimsToVerify": ["The per-skill description+when_to_use listing is hard-capped at 1,536 characters each (configurable via maxSkillDescriptionChars), and the total skill-listing budget is 1% of the model context window (configurable via skillListingBudgetFraction or SLASH_COMMAND_TOOL_CHAR_BUDGET env var), dropping least-invoked descriptions first.", "On compaction, each invoked skill is re-attached with only its first 5,000 tokens, sharing a combined 25,000-token budget filled most-recent-first (older skills can be dropped entirely).", "Plugin skills are namespaced 'plugin-name:skill-name' and cannot conflict with enterprise/personal/project levels; the plugin root SKILL.md is the ONLY case where the frontmatter 'name' field sets the command name (otherwise directory name / filename governs).", "disable-model-invocation: true removes the skill's description from context AND blocks preloading into subagents; allowed-tools grants permission-without-approval but does not restrict the callable tool set; disallowed-tools is cleared on the next user message.", "The Skill tool input schema is a single 'command' string field; skill body text never lives in the system prompt but is injected as a hidden (isMeta:true) user message plus a visible metadata message (//)."], "components": [{"config": "Frontmatter keys (all optional unless noted): name (defaults to dir name), description (recommended; default = first markdown paragraph), when_to_use (appended to description with ' - ', counts toward 1,536 cap), disable-model-invocation (bool, default false), user-invocable (bool, default true), allowed-tools (space/comma string or YAML list; supports Bash(git add *) / Skill(name *) syntax), disallowed-tools (same format, clears on next user message), model, effort, context (set to 'fork'), agent (Explore/Plan/general-purpose/custom), hooks, paths (globs limiting auto-activation), argument-hint, arguments (space string or YAML list), shell (bash default | powershell, requires CLAUDE_CODE_USE_POWERSHELL_TOOL=1).", "dataModel": "YAML frontmatter block delimited by --- at file start. Fields use kebab-case (name, description, allowed-tools, disable-model-invocation, user-invocable, disallowed-tools, model, effort, context, agent, hooks, paths, shell, argument-hint, arguments, when_to_use). Note the snake_case when_to_use is the YAML-source key, mapped internally to whenToUse. JSON tool schema entry: { type:'skill', name, description, allowedTools:[...], disallowedTools:[...], model, isSkill:true, disableModelInvocation, userInvocable, context, agent, hooks, paths, promptContent }.", "mechanism": "Startup scan loads skills/commands from user (~/.claude/skills/), project (.claude/skills/), parent dirs up to repo root, nested .claude/skills/ on demand (monorepo), --add-dir directories' .claude/skills/, plugins, and bundled set. Each SKILL.md parsed: frontmatter (between --- markers) becomes metadata; remainder is promptContent. Directory name (or plugin:dir name for plugins, or filename for legacy commands) becomes the command name typed after /. The frontmatter 'name' is the DISPLAY label only, EXCEPT for a plugin root SKILL.md where name (or plugin dir name fallback) sets the command. Live change detection watches SKILL.md text only (hooks/MCP/agents need /reload-plugins).", "name": "Skill definition file (SKILL.md)", "purpose": "The single required entrypoint for each skill; carries metadata frontmatter + markdown body instructions."}, {"config": "Filter predicate: type==='prompt' && isSkill===true && !disableModelInvocation && (source!=='builtin' || isModeCommand===true) && (description || when_to_use present). Format: `\"\": - `.", "dataModel": "Tool schema: name='Skill', input_schema={command:string (skill name, no args)}, output_schema={success:boolean, commandName:string}. Prompt generated via async prompt() function.", "mechanism": "Unlike static tools (Read/Bash), the Skill tool's 'description' field is a dynamic async generator. At each API request it aggregates ALL skills eligible for model invocation, formats each as `\"name\": description - when_to_use` (when_to_use appended with ' - ' separator), and wraps them in + XML inside the description. Claude picks a skill via tool_use with input {command:'skill-name'}. Validation: errorCode 1 empty, 2 unknown, 3+ can't-load/permission/already-running. The Skill tool is gated by permission rules Skill / Skill(name) / Skill(name *) and the skills filter; when set, 'Skill' is auto-added to allowedTools.", "name": "Skill tool (model-invoked meta-tool)", "purpose": "The single meta-tool exposed to the model that dispatches to any individual skill; implements progressive disclosure level 1."}, {"config": "budget knobs: skillListingBudgetFraction (fraction of context, default 0.01), SLASH_COMMAND_TOOL_CHAR_BUDGET (fixed char env var), maxSkillDescriptionChars (per-entry cap, default 1536). skillOverrides states: on / name-only / user-invocable-only / off (written to settings.local.json via /skills menu; absent = on; does NOT affect plugin skills).", "dataModel": "ContextWindow = systemPrompt + [skill listing inside Skill tool desc] + conversation. Budget = 1% of model context window (default) OR SLASH_COMMAND_TOOL_CHAR_BUDGET fixed chars.", "mechanism": "Level 1 = name+description preloaded into Skill tool description every turn (subject to char budget: scales at 1% of context window, least-invoked skills' descriptions dropped first when overflow, run /doctor to see). Level 2 = full SKILL.md body loaded only when Claude/user invokes the skill, injected as a single message persisting for the session. Level 3+ = supporting files (scripts/, references/, assets/) read on demand via Read/Bash by Claude. On auto-compaction: most recent invocation of each skill re-attached keeping first 5,000 tokens each, sharing a 25,000-token combined budget, filled most-recent-first so older skills can be dropped.", "name": "Progressive disclosure + listing budget", "purpose": "Keep token cost near-zero until a skill is actually needed; bound the always-loaded metadata."}, {"config": "Strings honored: $ARGUMENTS, $ARGUMENTS[N] / $N (0-based, shell-style quoting), $name (declared via arguments: list), ${CLAUDE_SESSION_ID}, ${CLAUDE_EFFORT} (low/medium/high/xhigh/max; ultracode reports as xhigh), ${CLAUDE_SKILL_DIR} (skill's own dir, not plugin root). disableSkillShellExecution:true in settings replaces !`cmd` with '[shell command execution disabled by policy]' (bundled/managed unaffected).", "dataModel": "Skill invocation = metadata message + isMeta:true prompt message + optional command_permissions message ({type:'command_permissions', allowedTools, model}).", "mechanism": "Before the body reaches Claude, substitutions run ONCE over the original file (command output is plain text, not re-scanned). Inline !`cmd` recognized only when ! starts a line or follows whitespace (KEY=!`cmd` is left literal). Multi-line via ```! fenced block. shell frontmatter selects bash (default) or powershell. Arguments: $ARGUMENTS (or appended as 'ARGUMENTS: ' if absent), $ARGUMENTS[N]/$N positional, $name from arguments list. \\$ escapes a literal $. On invocation Claude receives base dir path so bundled resources are reachable.", "name": "Argument + shell-context injection", "purpose": "Pass user/model args into the skill and inline live command output before Claude sees the body."}, {"config": "skills filter accepts: omitted (all discovered on + Skill tool auto-added), 'all', [name,...] (only those; plugin skills as plugin:skill), or [] (disable all). Unlisted skills' files remain reachable via Read/Bash (filter, not sandbox).", "dataModel": "Sources: enterprise/managed (all users) > personal (~/.claude) > project (.claude) \u2014 same-name overrides in that order. Plugins are namespaced plugin:skill and never collide. Skill takes precedence over same-named command.", "mechanism": "Precedence enterprise > personal > project; plugin skills namespaced plugin-name:skill-name so they never conflict. SDK: settingSources/setting_sources controls loading (must include 'user'/'project'); skills option on query() is a filter ('all' | [names] | [] disable all).", "name": "Discovery precedence + SDK integration", "purpose": "Resolve which skill wins when names collide across scopes; expose skills programmatically in the Agent SDK."}], "confidence": "high", "dimension": "skills", "externalInterfaces": ["Skill tool (model-invoked meta-tool): name='Skill', input_schema={command:string}, output_schema={success,commandName}", "CLI flag --add-dir and command /add-dir load .claude/skills from extra dirs (NOT permissions.additionalDirectories)", "Settings.json keys: disableBundledSkills, skillOverrides (object: skill->{on|name-only|user-invocable-only|off}), skillListingBudgetFraction, maxSkillDescriptionChars, disableSkillShellExecution", "Env vars: SLASH_COMMAND_TOOL_CHAR_BUDGET, CLAUDE_CODE_USE_POWERSHELL_TOOL=1, CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1", "Built-in vars injected into skill body: $ARGUMENTS, $ARGUMENTS[N]/$N, $name, ${CLAUDE_SESSION_ID}, ${CLAUDE_EFFORT}, ${CLAUDE_SKILL_DIR}", "Slash menus: /skill-name, /skills (Space=cycle state, Enter=save), /doctor (budget overflow), /reload-plugins, /plugin (plugin skills)", "Permission rule syntax: Skill, Skill(name), Skill(name *)", "Agent SDK (Python/TS): setting_sources, skills option, allowed_tools; auto-adds 'Skill' to allowed_tools when skills set", "Plugin manifest: .claude-plugin/plugin.json; plugin root SKILL.md single-skill fallback uses name field or install-dir fallback"], "keyBehaviors": ["DEFAULTS: user-invocable=true, disable-model-invocation=false; a skill with neither description nor when_to_use is FILTERED OUT of the Skill tool entirely (won't be model-invoked).", "allowed-tools GRANTS approval-without-prompt for listed tools while skill is active but does NOT restrict the callable set; disallowed-tools REMOVES tools from the pool but CLEARS on the next user message (transient). Both support space/comma strings or YAML lists and Bash(git add *) wildcard syntax.", "Commands were MERGED into skills: .claude/commands/deploy.md and .claude/skills/deploy/SKILL.md both produce /deploy identically; a skill wins over a same-named command. legacy commands keep working and support the same frontmatter.", "In the SDK, SKILL.md allowed-tools is IGNORED \u2014 control tool access via the query() allowedTools option; passing skills=[...] adds 'Skill' to allowedTools automatically, but if you pass an explicit tools list you must include 'Skill' yourself.", "Plugin skills use namespace plugin-name:skill-name and CANNOT conflict with other levels; they are NOT affected by skillOverrides (manage via /plugin). Plugin root SKILL.md is the only place frontmatter name sets the command name.", "disable-model-invocation:true removes the skill's description from Claude's context entirely (level-0 disclosure) AND blocks preloading into subagents; user-invocable:false only hides from the / menu, NOT from Skill-tool access.", "context: fork runs the skill body as the subagent TASK prompt (no conversation history); agent: defaults to general-purpose; Explore/Plan agents skip CLAUDE.md+git status so a forked skill using them sees only SKILL.md + agent system prompt.", "Live change detection covers SKILL.md text only; if the skill folder is also a plugin, hooks/MCP/agents/output-styles changes need /reload-plugins. Creating a NEW top-level skills dir that didn't exist at startup requires a restart.", "Skill descriptions must be SINGLE-LINE in the YAML (multi-line breaks discovery \u2014 known gotcha). Keep SKILL.md body <500 lines; recommend <5,000 words.", "Security: project skills' allowed-tools take effect only after workspace trust dialog; bundled skills can be globally disabled via disableBundledSkills; malicious skills can exfiltrate data so audit before use.", "A few built-in commands (/init, /review, /security-review) are reachable via the Skill tool, but /compact and /help are NOT.", "ultrathink keyword in skill body requests deeper reasoning when the skill runs."], "openQuestions": ["Exact precedence ordering when enterprise/managed vs plugin vs MCP-provided skills collide (docs say enterprise>personal>project and plugins can't conflict, but MCP-server-provided skill precedence relative to these is under-specified).", "Whether disallowed-tools clearing is strictly 'next user message' or 'end of turn' \u2014 docs say 'next message you send' which needs confirming against harness behavior.", "Precise behavior of effort override (low/medium/high/xhigh/max) interaction with model-specific level availability and the ultracode=>xhigh mapping."], "sources": [{"title": "Extend Claude with skills - Claude Code Docs", "url": "https://code.claude.com/docs/en/skills", "why": "Primary authoritative spec: full frontmatter field reference, precedence, budget knobs (skillListingBudgetFraction/SLASH_COMMAND_TOOL_CHAR_BUDGET/maxSkillDescriptionChars/1536 cap), skillOverrides states, live change detection, bundled skills, lifecycle/compaction (5k/25k budgets), substitution vars."}, {"title": "Agent Skills in the SDK - Claude Code Docs", "url": "https://code.claude.com/docs/en/agent-sdk/skills", "why": "Authoritative SDK behavior: skills option ('all'|list|[]), auto-add of Skill to allowedTools, setting_sources gating, allowed-tools IGNORED in SDK, filesystem-only registration (no programmatic API)."}, {"title": "Plugins reference - Claude Code Docs", "url": "https://code.claude.com/docs/en/plugins-reference", "why": "Plugin skill location/format, plugin-root SKILL.md fallback using name field vs install-dir fallback, plugin agent frontmatter fields, hook event list (SubagentStart etc.)"}, {"title": "Equipping agents for the real world with Agent Skills - Anthropic Engineering", "url": "https://www.anthropic.com/engineering/equipping-agents-for-the-real-world-with-agent-skills", "why": "Design rationale: three-level progressive disclosure (metadata -> SKILL.md -> bundled files), name+description preloaded into system prompt at startup, SKILL.md body loaded via Bash/Read on demand, Agent Skills open standard (Dec 18 2025)."}, {"title": "Claude Agent Skills: A First Principles Deep Dive - Han, Not Solo", "url": "https://leehanchung.github.io/blogs/2025/10/26/claude-skills-deep-dive/", "why": "Reverse-engineered internals: Skill tool input_schema {command}/output_schema {success,commandName}, dynamic async prompt() generator, isMeta dual-message injection (visible // + hidden full prompt), when_to_use->whenToUse mapping, filter predicate requiring description|when_to_use, plugin name format plugin:skill and (plugin:name) suffix."}, {"title": "Create custom subagents - Claude Code Docs", "url": "https://code.claude.com/docs/en/sub-agents", "why": "Subagent skills: preload field, cannot preload skills with disable-model-invocation:true, Explore/Plan skip CLAUDE.md."}], "summary": "The Skills system lets Claude Code (and the Agent SDK) extend itself via directories each containing a SKILL.md with YAML frontmatter (metadata) + markdown body (instructions). It implements THREE levels of progressive disclosure: (1) at startup only each skill's name+description+when_to_use are loaded into the Skill tool's dynamically-generated description (not the system prompt), bounded by a char budget; (2) when the model (or user) invokes a skill the full SKILL.md body is read and injected as a hidden user message (isMeta:true) plus a visible loading-status message; (3) supporting files (scripts/, references/, assets/) are loaded on demand by Claude. Skills are NOT executable code \u2014 they are prompt templates that modify conversation + execution context (allowed-tools, model, effort). The model invokes them through a single meta-tool named \"Skill\" (capital S) whose input is just {command:\"\"}; Claude decides which skill to call via pure LLM reasoning over the description list, with no algorithmic routing. Custom commands (legacy .claude/commands/) have been merged into skills: both produce /name and behave identically. Skills follow the open Agent Skills standard (agentskills.io) extended by Claude Code with invocation-control frontmatter, subagent execution (context:fork), and dynamic shell-context injection."}, "slash-commands-plan": {"asOfDate": "2026-06", "claimsToVerify": ["The built-in tool is named exactly 'ExitPlanMode' (both EXIT_PLAN_MODE_TOOL_NAME and EXIT_PLAN_MODE_V2_TOOL_NAME constants resolve to the string 'ExitPlanMode'); the tool does NOT accept plan content as a parameter and instead reads it from a file on disk whose default location is /plans/.md, overridable by the settings.json 'plansDirectory' key.", "Slash commands and skills have been merged: .claude/commands/deploy.md and .claude/skills/deploy/SKILL.md both create /deploy, and frontmatter fields are SHARED \u2014 including allowed-tools, model, argument-hint, disable-model-invocation, plus skill-only fields arguments, user-invocable, disallowed-tools, effort, context(fork), agent, hooks, paths, shell.", "The 5 ExitPlanMode approval options presented to the user are exactly: 'Approve and start in auto mode', 'Approve and accept edits', 'Approve and review each edit manually', 'Keep planning with feedback', 'Refine with Ultraplan'; each approve option switches the permission mode accordingly."], "components": [{"config": "YAML frontmatter: description (recommended, ~60 chars for /help; combined description+when_to_use truncated at 1,536 chars in listing, configurable via maxSkillDescriptionChars); allowed-tools (string|array); disallowed-tools (clears on next user message); model (sonnet|opus|haiku|inherit, or full values like /model; session resumes next turn); effort (low|medium|high|xhigh|max); argument-hint; arguments; disable-model-invocation (bool, default false \u2014 hides description from Claude's context and blocks Skill tool); user-invocable (bool default true; false hides from / menu but Claude can still Skill-invoke); context: fork; agent (Explore|Plan|general-purpose|custom); hooks; paths (glob activation filter); shell (bash|powershell, needs CLAUDE_CODE_USE_POWERSHELL_TOOL=1); name (display name, defaults to dir/file name). Settings: disableBundledSkills, disableSkillShellExecution, skillListingBudgetFraction / SLASH_COMMAND_TOOL_CHAR_BUDGET, skillOverrides, maxSkillDescriptionChars.", "dataModel": "File: .claude/commands/.md OR .claude/skills//SKILL.md. Body = markdown prompt. Supported substitutions: $ARGUMENTS (whole string; auto-appended as 'ARGUMENTS: ' if absent), $ARGUMENTS[N] / $N (0-based; shell-style quoting, $0 = first), $name (declared arg), ${CLAUDE_SESSION_ID}, ${CLAUDE_EFFORT}, ${CLAUDE_SKILL_DIR}, ${CLAUDE_PLUGIN_ROOT}. Inline shell injection: !`command` (recognized only at line start or after whitespace; KEY=!`cmd` is literal). Multi-line shell: fenced block opened with ```! . Escaping: \\$1 yields literal; only single backslash directly before token escapes. @file refs inline file contents.", "mechanism": "Discovery scans project, personal, and plugin trees; command name is derived from filename (commands/) or directory name (skills/), namespaced for plugins as plugin-name:command-name. When the user types '/cmd args', the harness parses args (positional, shell-style quoting), reads the .md file, resolves frontmatter, then RENDERs the body in this order: (1) expand string substitutions ($ARGUMENTS, $N, ${CLAUDE_*}); (2) execute !`cmd` / ```! blocks (preprocessing, output inserted as plain text, NOT re-scanned); (3) inline @file references. The rendered markdown is injected as a single user message. allowed-tools are pre-approved for that turn (permission grant, not availability restriction); model/effort override the session for the turn. disable-model-invocation:true removes it from the Skill tool's catalog so the model cannot self-invoke it. Descriptions are loaded into context (budget = 1% of context window, scales with skillListingBudgetFraction/SLASH_COMMAND_TOOL_CHAR_BUDGET) so Claude knows what is available; full body loads only on invocation.", "name": "Custom slash commands / Skills (merged system)", "purpose": "Reusable, parameterized prompts invoked by typing /name or auto-invoked by the model via the Skill tool."}, {"config": "N/A (hardcoded in CLI)", "dataModel": "Recognized only at start of message. Each command has a purpose string shown in /help. Aliases map to canonical (/reset,/new\u2192/clear; /quit\u2192/exit; /continue\u2192/resume; /checkpoint,/undo\u2192/rewind; /allowed-tools\u2192/permissions; /bg\u2192/background; /cost,/stats\u2192/usage; /ios,/android\u2192/mobile; /rc\u2192/remote-control; /tp\u2192/teleport; /proactive\u2192/loop). Version-gated commands report 'Unknown command: /cd' on older versions. Many appear only on certain platforms/plans (/desktop macOS+Windows+subscription; /upgrade Pro/Max; /setup-bedrock needs CLAUDE_CODE_USE_BEDROCK=1; /sandbox supported platforms only).", "mechanism": "These are hardcoded behaviors in the CLI (not markdown prompts). When the first whitespace-delimited token of a user message starts with '/', the harness looks it up in the built-in registry; if matched, it executes native logic (e.g. /clear empties context but keeps project memory; /compact summarizes; /model opens a picker or sets the model and saves it; /plan enters plan mode with an optional immediate task). MCP servers expose prompts as commands using the format /mcp____ (dynamically discovered). Any remaining text after the command is passed as arguments. A few built-in commands (/init, /review, /security-review, /fewer-permission-prompts, /simplify, /code-review, /run, /verify) are exposed to the model via the Skill tool; most (/compact, /clear, etc.) are NOT.", "name": "Built-in commands", "purpose": "Hardcoded session-control commands parsed at the start of a user message."}, {"config": "Entry vectors: Shift+Tab cycle (default \u2192 acceptEdits \u2192 plan, with auto/bypassPermissions/dontAsk gated in), --permission-mode plan startup flag, /plan [description] command, or the model calling EnterPlanMode tool. settings.json: permissions.defaultMode = 'plan'.", "dataModel": "Tool name (both constants resolve to the string 'ExitPlanMode'). inputSchema = z.strictObject({ allowedPrompts?: array of {tool: enum['Bash'], prompt: string} }).passthrough(). Note: the INTERNAL inputSchema does NOT include plan content (plan is read from disk by call()). The SDK-facing _sdkInputSchema EXTENDS inputSchema with plan? and planFilePath? injected by normalizeToolInput (CCR web UI can send an edited plan via permissionResult.updatedInput). outputSchema = { plan: string|null, isAgent: bool, filePath?: string, hasTaskTool?: bool, planWasEdited?: bool, awaitingLeaderApproval?: bool, requestId?: string }.", "mechanism": "EnterPlanMode (no parameters) switches the permission context mode to 'plan', saving the prior mode as prePlanMode. While mode==='plan', a recurring plan-mode system prompt is injected (read-only enforcement + 4-phase workflow: Understanding \u2192 Design \u2192 Review \u2192 Final Plan), and the ONLY file the model may edit is the plan file. The model writes/edits the plan using the standard Edit/Write tools (Edit is NOT disabled; it's permitted specifically for the plan path). The model then calls ExitPlanMode when done. ExitPlanMode.isReadOnly() returns false (it writes to disk); shouldDefer:true; isEnabled gated (disabled when --channels active). validateInput rejects if called outside plan mode (errorCode 1, message 'You are not in plan mode...'). checkPermissions returns behavior:'ask' with message 'Exit plan mode?' (for non-teammates) \u2014 this is the approval prompt. On approval, call() reads the plan from disk (getPlan(agentId)), restores prePlanMode (with circuit-breaker fallback to 'default' if auto gate now off), sets hasExitedPlanMode + needsPlanModeExitAttachment flags, and the tool_result echoes the approved plan back to the model.", "name": "Plan Mode (EnterPlanMode / ExitPlanMode tool pair)", "purpose": "A read-only permission mode where Claude researches and writes a plan to a file, then requests user approval before making any changes."}, {"config": "settings.json: plansDirectory (relative path resolved against cwd; must stay within project root or falls back to default ~/.claude/plans). Slug generated via generateWordSlug() with up to 10 retries to avoid filename collisions. Per-session cache keyed by sessionId. clearPlanSlug on /clear; copyPlanForResume on resume; copyPlanForFork generates a NEW slug to avoid clobbering.", "dataModel": "getPlanFilePath(agentId?): main session \u2192 /.md; subagent \u2192 /-agent-.md. getPlan() returns file contents or null (ENOENT tolerated). Recovery sources scanned backwards in transcript: (1) ExitPlanMode tool_use.input.plan (injected by normalizeToolInput), (2) user message .planContent field (set during clear-context-and-implement flow), (3) attachment of type 'plan_file_reference' with .planContent (created by auto-compact). File snapshots are SystemFileSnapshotMessage { type:'system', subtype:'file_snapshot', snapshotFiles:[{key,path,content}], isMeta:true } written incrementally in remote (CCR) sessions.", "mechanism": "getPlansDirectory() (memoized): reads settings.plansDirectory; if set, resolves relative to cwd and validates it stays within project root (path-traversal guard, else falls back to ~/.claude/plans); default = join(getClaudeConfigHomeDir(), 'plans'). mkdirSync(recursive) ensures it exists. getPlanSlug(sessionId): lazily generates a random word slug (generateWordSlug), retries up to MAX_SLUG_RETRIES=10 to find a non-colliding filename, caches per session. getPlanFilePath(agentId): main \u2192 /.md; subagent \u2192 /-agent-.md. getPlan(agentId) reads the file (ENOENT\u2192null). On resume (copyPlanForResume), slug is restored from transcript log.messages[].slug; if the file is missing, recovery attempts file snapshot first, then message-history scan. On fork (copyPlanForFork), a NEW slug is generated and the original content is copied so sessions don't clobber each other. persistFileSnapshotIfRemote() writes incremental plan snapshots to the transcript (only in remote/CCR environments).", "name": "Plan file location & persistence", "purpose": "Where the plan markdown lives on disk and how it survives clear/resume/fork."}, {"config": "Ctrl+G opens the plan file in the user's $EDITOR. When showClearContextOnPlanAccept is enabled, each approve option also offers to clear the planning context first. Accepting also auto-names the session from plan content (unless --name / /rename set). Gate-off fallback: if prePlanMode was 'auto' but the auto gate is now off (circuit breaker / settings), restoreMode falls back to 'default' and a notification is shown.", "dataModel": "ExitPlanMode tool_result for non-agent approval: content includes 'User has approved your plan. You can now start coding...' + filePath + optional TeamCreate hint (if Agent tool available) + '## Approved Plan:' / '## Approved Plan (edited by user):' header + plan text. For teammates with plan_mode_required: a plan_approval_request mailbox message { type:'plan_approval_request', from, timestamp, planFilePath, planContent, requestId } is sent to 'team-lead', and tool_result tells the teammate to wait for inbox approval. For subagents (isAgent): tool_result = 'User has approved the plan... respond with ok'. Empty plan: tool_result = 'User has approved exiting plan mode. You can now proceed.'", "mechanism": "ExitPlanMode.checkPermissions returns behavior:'ask', message:'Exit plan mode?'. The UI renders the plan file contents and presents 5 options: (1) Approve and start in auto mode \u2192 sets mode to 'auto'; (2) Approve and accept edits \u2192 mode 'acceptEdits'; (3) Approve and review each edit manually \u2192 mode 'default'; (4) Keep planning with feedback \u2192 stays in plan mode, feeds user feedback back; (5) Refine with Ultraplan \u2192 hands off to a browser-based Claude Code on the web session. On approve, call() restores the chosen mode (from prePlanMode or the chosen option), sets hasExitedPlanMode=true and needsPlanModeExitAttachment=true (which injects a plan-exit attachment into subsequent context). User can press Ctrl+G to edit the plan file in $EDITOR before approving; an edited plan is written back to disk and planWasEdited=true is flagged. For plan_mode_required teammates (isTeammate() && isPlanModeRequired()), no local approval dialog: a plan_approval_request is written to the team-lead mailbox and the teammate awaits an inbox response.", "name": "Plan approval flow", "purpose": "The 5-option UX presented when the model calls ExitPlanMode, and how approval mutates session state."}], "confidence": "high", "dimension": "slash-commands-plan", "externalInterfaces": ["File paths: .claude/commands/.md, ~/.claude/commands/.md, .claude/skills//SKILL.md, ~/.claude/skills//SKILL.md, /skills//SKILL.md, ~/.claude/plans/.md, ~/.claude/plans/-agent-.md", "CLI flags: --permission-mode plan, --add-dir , -p (non-interactive), --dangerously-skip-permissions, --allow-dangerously-skip-permissions, --name", "Interactive: type / for command menu, Shift+Tab to cycle modes (default\u2192acceptEdits\u2192plan), Ctrl+G to edit the plan file in $EDITOR", "settings.json keys: permissions.defaultMode, permissions.disableAutoMode, permissions.disableBypassPermissionsMode, plansDirectory, showClearContextOnPlanAccept, disableBundledSkills, disableSkillShellExecution, skillOverrides (values: on|name-only|user-invocable-only|off), skillListingBudgetFraction, maxSkillDescriptionChars", "Env vars: SLASH_COMMAND_TOOL_CHAR_BUDGET, CLAUDE_CODE_USE_POWERSHELL_TOOL=1, CLAUDE_CODE_ENABLE_AUTO_MODE, CLAUDE_CODE_NEW_INIT=1, CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1", "Tool names: Skill (model-invoked), ExitPlanMode (a.k.a. EXIT_PLAN_MODE_V2_TOOL_NAME), EnterPlanMode, Agent (Task), TeamCreate, AskUserQuestion", "Substitution vars in command/skill bodies: $ARGUMENTS, $ARGUMENTS[N], $N, $, ${CLAUDE_SESSION_ID}, ${CLAUDE_EFFORT}, ${CLAUDE_SKILL_DIR}, ${CLAUDE_PLUGIN_ROOT}", "MCP prompts as commands: /mcp____"], "keyBehaviors": ["Slash commands and skills are ONE merged system. .claude/commands/deploy.md and .claude/skills/deploy/SKILL.md both create /deploy and behave identically. If a skill and a command share a name, the SKILL takes precedence. Existing commands keep working; skills add: a supporting-file directory, richer frontmatter (arguments, user-invocable, disallowed-tools, effort, context, agent, hooks, paths, shell).", "A command/skill is ONLY recognized at the START of a user message. Text after the name is arguments. /plan [description] both enters plan mode AND immediately starts on the task; /plan with no arg just enters plan mode.", "String substitution runs ONCE over the original file. !`cmd` output is plain text and is NOT re-scanned for further placeholders, so a command cannot emit a placeholder for a later pass. Inline ! is only recognized at line start or after whitespace; 'KEY=!`cmd`' is left literal.", "$ARGUMENTS: if the placeholder is absent from the body but args were provided, the harness APPENDS 'ARGUMENTS: ' to the end. Indexed args use shell-style quoting: /my-skill \"hello world\" second \u2192 $0='hello world', $1='second'. Escape literal $ with a single backslash directly before the token (\\$1.00); doubled backslash (\\\\$1) leaves both backslashes and still expands $1.", "Skill descriptions load into context so the model knows what is available, but full content loads only on invocation. The listing budget = 1% of the model's context window (configurable via skillListingBudgetFraction or SLASH_COMMAND_TOOL_CHAR_BUDGET); on overflow, least-invoked skills lose descriptions first. Per-entry combined description+when_to_use is capped at 1,536 chars (configurable via maxSkillDescriptionChars).", "Read-only enforcement in plan mode is PROMPT-BASED, not a hard tool toggle. The plan-mode system message explicitly forbids edits/commits/non-readonly tools, but the Edit/Write tools themselves remain available \u2014 the harness permits Edit specifically against the plan file path. Other mutating tools (Bash that writes, MCP mutators) are blocked by the plan permission mode (mode==='plan' auto-denies writes like default mode, EXCEPT the plan file).", "ExitPlanMode does NOT take plan content as a parameter \u2014 it reads the plan from the file the model wrote. The plan is loaded from disk in call() via getPlan(agentId). If the file is missing/empty, the approval dialog can still be presented and tool_result says 'User has approved exiting plan mode. You can now proceed.' (This is why the dialog can appear with 'no plan' unprompted.)", "planWasEdited is tracked separately: when CCR web UI (or Ctrl+G) sends an edited plan via permissionResult.updatedInput, the edited plan is written back to disk (writeFile) and re-snapshotted (persistFileSnapshotIfRemote), and tool_result labels it 'Approved Plan (edited by user)' so the model knows the user changed something.", "ExitPlanMode has a circuit-breaker fallback: if prePlanMode was 'auto' but the auto-mode gate is now off (circuit breaker or settings disable), restoreMode falls back to 'default' instead of calling setAutoModeActive(true) directly \u2014 prevents ExitPlanMode from bypassing the auto-mode gate.", "ExitPlanMode.validateInput rejects with errorCode 1 if called when mode !== 'plan' ('You are not in plan mode. This tool is only for exiting plan mode...'). This happens because the tool is announced in the deferred-tool list regardless of mode so the model can call it after plan approval (fresh delta on compact/clear).", "Teammates bypass the local approval dialog entirely (checkPermissions returns behavior:'allow'; requiresUserInteraction() returns false). If isPlanModeRequired() is true, a plan_approval_request is written to the team-lead mailbox and the teammate blocks on an inbox response; if voluntary plan mode, it exits locally without approval.", "plansDirectory in settings.json is resolved relative to cwd and validated to stay within project root; a path-traversal attempt falls back to ~/.claude/plans. The new (V2) plan mode FORCES using ~/.claude/plans unless plansDirectory is set, which breaks workflows using plan files elsewhere (known issue #12707).", "Plan slug is a random word slug (generateWordSlug) with up to 10 collision retries; main session file is .md, subagent plan is -agent-.md. /clear clears the slug; resume restores it from transcript; fork generates a NEW slug (copyPlanForFork) to avoid clobbering.", "Protected paths (`.git`, `.vscode`, `.claude` except `.claude/worktrees`, shell rc files, etc.) are NEVER auto-approved in plan/default/acceptEdits modes \u2014 they prompt. Even in plan mode, editing the plan file is allowed because it lives in the plans directory (not a protected path).", "live change detection: adding/editing/removing a skill under ~/.claude/skills/ or project .claude/skills/ takes effect mid-session without restart; but creating a top-level skills dir that didn't exist at startup needs a restart, and plugin folder changes (hooks/, agents/, .mcp.json, output-styles/) need /reload-plugins."], "openQuestions": ["Exact contents of the EnterPlanMode tool's prompt and the FULL verbatim plan-mode system message (the 4-phase workflow text) \u2014 only paraphrased excerpts are publicly documented; the exact strings live in the bundled CLI.", "Whether there is a distinct EnterPlanMode tool definition beyond the permission-mode transition handler, or whether entering plan mode is purely a /plan + Shift+Tab + mode-transition mechanism (sources suggest EnterPlanMode exists as a callable tool that the model can invoke itself, equivalent to Shift+Tab).", "Exact behavior of `allowedPrompts` in the ExitPlanMode inputSchema (the Ant-internal prompt-based permission section is stubbed out in the public leaf-kit repo) \u2014 whether/how it pre-approves Bash categories post-approval.", "Whether /plan with a description arg bypasses the EnterPlanMode tool call entirely (UI-level mode switch) or still routes through the tool."], "sources": [{"title": "Commands reference \u2014 Claude Code Docs (code.claude.com/docs/en/commands)", "url": "https://code.claude.com/docs/en/commands", "why": "Official authoritative table of ALL built-in slash commands (/help, /clear, /init, /agents, /mcp, /memory, /model, /plan, /compact, etc.) with purposes, aliases, arguments, version gates, and Skill/Workflow markers."}, {"title": "Extend Claude with skills \u2014 Claude Code Docs (code.claude.com/docs/en/slash-commands)", "url": "https://code.claude.com/docs/en/slash-commands", "why": "Official doc confirming commands\u2194skills merge, file locations, the full frontmatter reference table (name/description/when_to_use/argument-hint/arguments/disable-model-invocation/user-invocable/allowed-tools/disallowed-tools/model/effort/context/agent/hooks/paths/shell), string substitutions ($ARGUMENTS/$N/${CLAUDE_*}), !`cmd` rules, skillOverrides states, skillListingBudgetFraction, disableSkillShellExecution."}, {"title": "Command Frontmatter Reference (anthropics/claude-plugins-official)", "url": "https://github.com/anthropics/claude-plugins-official/blob/main/plugins/plugin-dev/skills/command-development/references/frontmatter-reference.md", "why": "Official Anthropic plugin repo's full field specs: description (~60 chars), allowed-tools (string|array|Bash(git:*)), model (sonnet/opus/haiku), argument-hint, disable-model-invocation, with validation rules and complete examples."}, {"title": "Command Development Skill README (anthropics/claude-code)", "url": "https://github.com/anthropics/claude-code/blob/main/plugins/plugin-dev/skills/command-development/README.md", "why": "Official Anthropic command-development skill: file format, locations (project/personal/plugin), $ARGUMENTS/$1/$2 positional args, @file refs, !`bash` execution, ${CLAUDE_PLUGIN_ROOT}."}, {"title": "ExitPlanModeV2Tool.ts (leaf-kit/claude-analysis)", "url": "https://github.com/leaf-kit/claude-analysis/blob/main/src/tools/ExitPlanModeTool/ExitPlanModeV2Tool.ts", "why": "Reverse-engineered source: exact tool name 'ExitPlanMode', input/output zod schemas, validateInput/checkPermissions/call logic, plan-read-from-disk, teammate mailbox approval, circuit-breaker fallback, tool_result formats."}, {"title": "ExitPlanModeTool/prompt.ts (leaf-kit/claude-analysis)", "url": "https://github.com/leaf-kit/claude-analysis/blob/main/src/tools/ExitPlanModeTool/prompt.ts", "why": "Verbatim EXIT_PLAN_MODE_V2_TOOL_PROMPT: 'does NOT take plan content as a parameter', 'read from file', 'Only use when task requires planning implementation steps... not for research', AskUserQuestion separation."}, {"title": "utils/plans.ts (leaf-kit/claude-analysis)", "url": "https://github.com/leaf-kit/claude-analysis/blob/main/src/utils/plans.ts", "why": "Exact plan file path logic: getPlansDirectory (plansDirectory setting, cwd-relative, path-traversal guard, default ~/.claude/plans), getPlanSlug (generateWordSlug, MAX_SLUG_RETRIES=10), getPlanFilePath (main .md, subagent -agent-.md), copyPlanForResume/copyPlanForFork, recoverPlanFromMessages (3 recovery sources)."}, {"title": "Choose a permission mode \u2014 Claude Code Docs", "url": "https://code.claude.com/docs/en/permission-modes", "why": "Official: plan mode is read-only, Shift+Tab cycle, /plan prefix, --permission-mode plan, the 5 approval options, Ctrl+G plan editing, defaultMode:'plan' setting, protected paths list."}, {"title": "What Actually Is Claude Code's Plan Mode? (Armin Ronacher / lucumr.pocoo.org)", "url": "https://lucumr.pocoo.org/2025/12/17/what-is-plan-mode/", "why": "Deep independent analysis confirming read-only enforcement is prompt-based (not tool removal), plan file edited via Edit tool, EnterPlanMode/ExitPlanMode tool pair, and paraphrased 4-phase plan-mode system prompt."}, {"title": "[Feature Request] Plan mode should support plan files outside ~/.claude/plans (anthropics/claude-code#12707)", "url": "https://github.com/anthropics/claude-code/issues/12707", "why": "Confirms the new/V2 plan mode FORCES using ~/.claude/plans unless plansDirectory is configured, and references env vars for the V2 plan mode."}], "summary": "Claude Code's slash-command system is split into (a) built-in commands hardcoded in the CLI (/help, /clear, /init, /model, /plan, /mcp, /agents, /memory, /compact, /permissions, etc.) and (b) user-defined commands, which since the 2025-2026 \"skills merge\" are implemented identically whether they live at .claude/commands/*.md or .claude/skills//SKILL.md \u2014 both create the same / command and share the same YAML frontmatter (description, allowed-tools, disallowed-tools, model, argument-hint, arguments, disable-model-invocation, user-invocable, etc.). Commands support $ARGUMENTS/$1/$N positional substitution, @file inlining, and !`bash`/```! fenced pre-processing of the prompt before it reaches the model. Plan Mode is a permission mode (mode === 'plan') that is read-only by enforcement: it is a permission context plus a recurring plan-mode system prompt, plus an EnterPlanMode/ExitPlanMode tool pair (the public tool name is literally \"ExitPlanMode\" \u2014 both V1 and V2 constants resolve to that string). The model writes a markdown plan to a file under the plans directory (default ~/.claude/plans/.md, or -agent-.md for subagents; configurable via settings.json plansDirectory), then calls ExitPlanMode (which takes NO plan content parameter \u2014 it reads the file from disk) to trigger a 5-option approval UI; on approval the session switches to the chosen permission mode (default/acceptEdits/auto) and the approved plan text is echoed back into the tool_result so the model can act on it."}, "tui-ide-config": {"asOfDate": "2026-06", "claimsToVerify": ["IDE discovery lock file path/format: IDE extensions write ~/.claude/ide/.lock (also seen as -.lock) containing JSON {pid, workspaceFolders, ideName, transport:\"ws\", authToken: 32-char lowercase hex (128-bit CSPRNG)}; launch env sets CLAUDE_CODE_SSE_PORT= and ENABLE_IDE_INTEGRATION=true; Claude authenticates over WS with header x-claude-code-ide-authorization: .", "Settings precedence is Managed > Local (.claude/settings.local.json) > Project (.claude/settings.json) > User (~/.claude/settings.json), with managed settings un-overridable and delivered via server / MDM plist com.anthropic.claudecode / registry HKLM\\SOFTWARE\\Policies\\ClaudeCode (Settings REG_SZ) / file managed-settings.json at /Library/Application Support/ClaudeCode/ (mac) or /etc/claude-code/ (linux) or C:\\Program Files\\ClaudeCode\\ (win); legacy C:\\ProgramData\\ClaudeCode dropped in v2.1.75.", "defaultMode=auto is ignored in project/local settings since v2.1.142 (only ~/.claude/settings.json can grant auto); the 12 IDE MCP tools are openFile, openDiff, getCurrentSelection, getLatestSelection, getOpenEditors, getWorkspaceFolders, getDiagnostics, checkDocumentDirty, saveDocument, close_tab, closeAllDiffTabs, executeCode; render throttle is 16ms (60fps) via lodash throttle with leading+trailing, using BSU/ESU (DEC mode 2026, ESC[?2026h/l) for atomic frame updates."], "components": [{"config": "FRAME_INTERVAL_MS=16; scroll frame=4ms; CLAUDE_CODE_DEBUG_REPAINTS to attribute full repaints; CLAUDE_CODE_ALT_SCREEN_FULL_REPAINT=1 forces full repaint each frame", "dataModel": "DOMElement { yogaNode, style, attributes, childNodes, dirty, _eventHandlers, scrollTop, pendingScrollDelta, stickyScroll }. Frame { screen:Screen, viewport:Size, cursor:{x,y,visible}, scrollHint, scrollDrainPending }. Packed cell (2x Int32): word0=charId; word1=styleId[31:17]|hyperlinkId[16:2]|width[1:0]. Parallel arrays: noSelect(Uint8Array), softWrap(Int32Array), damage(Rectangle).", "mechanism": "react-reconciler host config creates a custom in-memory DOM (7 element types: ink-root, ink-box, ink-text, ink-virtual-text, ink-link, ink-progress, ink-raw-ansi) reconciled in ConcurrentRoot mode. resetAfterCommit() triggers Yoga calculateLayout() then onRender(). Each frame: Stage1 React commit + Yoga layout -> Stage2 DOM-to-screen (walk tree into packed-cell Screen buffer) -> Stage3 overlay (selection/search highlight mutate buffer in-place, set prevFrameContaminated) -> Stage4 diff vs front frame (2 Int32 compares per cell, walks only damage rectangle) -> Stage5 optimize (merge adjacent row patches, cache style transitions) -> Stage6 write stdout as a SINGLE write() wrapped in BSU/ESU (ESC[?2026h ... ESC[?2026l) atomic updates. Blit optimization: clean unchanged-position nodes copy cells straight from prevScreen. Double buffer: front/back Frame swapped by pointer; pools shared across frames so IDs valid across swap.", "name": "Custom React+Ink Terminal Renderer", "purpose": "Render the whole TUI: streaming markdown, permission dialogs, spinners, scrollback, diff, vim-mode editor. NOT a Bubble Tea loop \u2014 it is a browser-grade retained-mode renderer."}, {"config": "tui: 'fullscreen' | 'default' (set via /tui or CLAUDE_CODE_NO_FLICKER); editorMode: 'normal'|'vim' (default normal); statusLine: {type:'command', command:''}; viewMode: 'default'|'verbose'|'focus'; autoScrollEnabled (default true); spinnerTipsEnabled; spinnerVerbs; prefersReducedMotion; terminalProgressBarEnabled (ConEmu/Ghostty 1.2.0+/iTerm2 3.6.6+)", "dataModel": "Settings keys: tui, editorMode, statusLine, viewMode, autoScrollEnabled, spinnerTipsEnabled, spinnerTipsOverride, spinnerVerbs, prefersReducedMotion, terminalProgressBarEnabled, syntaxHighlightingDisabled, autoMode {environment,allow,soft_deny,hard_deny arrays with literal \"$defaults\" inheritance}", "mechanism": "tui setting: 'fullscreen' = flicker-free alt-screen (DEC 1049) with virtualized scrollback and BSU/ESU atomic paints; 'default' = classic main-screen renderer. CLAUDE_CODE_NO_FLICKER env selects fullscreen; CLAUDE_CODE_DISABLE_ALTERNATE_SCREEN=1 forces default (and wins over the setting and CLAUDE_CODE_NO_FLICKER). Background sessions from agent view ALWAYS use fullscreen regardless. editorMode 'vim' adds a vim-mode editor in the prompt box (normal/insert). The /config tabbed Settings UI exposes status (model, account), and toggles like Auto-scroll, Editor mode, Show turn duration, Notifications, Terminal progress bar. statusLine: {type:'command', command:'~/.claude/statusline.sh'} runs a user script whose stdout is shown as the status line; disableAllHooks:true also kills the custom status line. Slash menu opens on '/' showing commands like /model, /usage, /compact, /remote-control, plus a Customize group (MCP, hooks, memory, permissions, plugins). IDE diff: when a connected IDE exists and diff tool is 'auto', edits open in the IDE diff viewer (openDiff blocks for user accept/reject); 'terminal' keeps them in-TUI.", "name": "TUI Modes & Status Line", "purpose": "User-facing controls over rendering mode, themes, editor bindings, and the custom status line."}, {"config": "Plugin settings: Claude command path, suppress not-found, Option+Enter multiline, auto-update. Diff tool setting: auto|terminal (via /config). VS Code ext settings include claudeCode.useTerminal, claudeCode.initialPermissionMode {default,plan,acceptEdits,bypassPermissions}, claudeCode.preferredLocation {panel|sidebar}, claudeCode.autosave, claudeCode.claudeProcessWrapper.", "dataModel": "Lock file JSON: {pid:int, workspaceFolders:[path], ideName:string, transport:'ws', authToken:32-hex-string}. Internal transport type tags: {type:'sse-ide'|'ws-ide', url, ideName, authToken?}. Messages: JSON-RPC 2.0 {jsonrpc:'2.0', method, params, id}. Methods IDE->Claude: selection_changed {text,filePath,fileUrl,selection{start{line,character},end{line,character},isEmpty}}, at_mentioned {filePath,lineStart,lineEnd}. Claude->IDE tools (12): openFile, openDiff, getCurrentSelection, getLatestSelection, getOpenEditors, getWorkspaceFolders, getDiagnostics, checkDocumentDirty, saveDocument, close_tab, closeAllDiffTabs, executeCode.", "mechanism": "On IDE launch: (1) extension starts a localhost WebSocket (or SSE) MCP server on a random port 10000-65535; (2) writes a lock file to ~/.claude/ide/.lock (also documented as -.lock) containing {pid, workspaceFolders, ideName, transport:'ws', authToken (32-char lowercase hex, 128-bit from OS CSPRNG)}; (3) sets env vars CLAUDE_CODE_SSE_PORT= and ENABLE_IDE_INTEGRATION=true when spawning claude. Claude reads the lockfile, matches the port, connects, and authenticates with HTTP header x-claude-code-ide-authorization: . Protocol = MCP spec 2025-03-26 over WS (JSON-RPC 2.0). Internal transport types are 'sse-ide' (url http://localhost:PORT/sse) and 'ws-ide' (url ws://localhost:PORT/ws). VS Code: extension BUNDLES its own CLI copy (run via bundled binary or claudeProcessWrapper); JetBrains plugin does NOT bundle \u2014 runs the `claude` command from PATH in the IDE terminal. From external terminal run /ide to connect. autoInstallIdeExtension (default true) auto-installs VS Code ext when launched inside a VS Code/JetBrains terminal; autoConnectIde (default false) connects when launched from an external terminal. /ide flag auto-connects if exactly one IDE available. WSL2 NAT/firewall can block the localhost socket (WSL1 unaffected); wslInheritsWindowsSettings lets WSL read Windows managed settings.", "name": "IDE Integration (VS Code / JetBrains bridge)", "purpose": "Connect the CLI TUI to a graphical IDE for diff viewing, selection sharing, file opening, diagnostics."}, {"config": "Drop-in dir managed-settings.d/ (systemd convention: base merged first, then *.json sorted alphabetically, scalars override, arrays concat+dedupe, objects deep-merge, dotfiles ignored; numeric prefixes control order). policyHelper {path} computes managed settings dynamically. requiredMinimumVersion/requiredMaximumVersion (fail open if invalid). forceRemoteSettingsRefresh blocks startup until remote settings fetched (fail closed).", "dataModel": "managed-settings.json schema keys include: allowedMcpServers, deniedMcpServers, allowManagedMcpServersOnly, availableModels, enforceAvailableModels, forceLoginMethod (claudeai|console), forceLoginOrgUUID, requiredMinimumVersion, requiredMaximumVersion, allowManagedPermissionRulesOnly, allowManagedHooksOnly, claudeMd, strictKnownMarketplaces, blockedMarketplaces, allowedChannelPlugins, channelsEnabled, companyAnnouncements, policyHelper, parentSettingsBehavior, wslInheritsWindowsSettings, allowAllClaudeAiMcps. permissions object: {allow:[rule], ask:[rule], deny:[rule], additionalDirectories:[path], defaultMode:default|acceptEdits|plan|auto|dontAsk|bypassPermissions, disableBypassPermissionsMode:'disable', skipDangerousModePermissionPrompt}. Permission rule = `Tool` or `Tool(specifier)` e.g. Bash(npm run test *), Read(./.env), mcp__github__get_*.", "mechanism": "Merged at session start. Precedence (low->high): User(~/.claude/settings.json) < Project(.claude/settings.json) < Local(.claude/settings.local.json) < Managed(server-managed / MDM plist / registry / managed-settings.json). Managed CANNOT be overridden. Managed delivery: (a) server-managed from Claude.ai Admin; (b) MDM \u2014 macOS com.anthropic.claudecode plist domain, Windows HKLM\\SOFTWARE\\Policies\\ClaudeCode (Settings REG_SZ/REG_EXPAND_SZ containing JSON), Windows user-level HKCU\\SOFTWARE\\Policies\\ClaudeCode (lowest policy priority); (c) file-based managed-settings.json (+ managed-mcp.json) in /Library/Application Support/ClaudeCode/ (mac), /etc/claude-code/ (linux/WSL), C:\\Program Files\\ClaudeCode\\ (win). Legacy Windows path C:\\ProgramData\\ClaudeCode dropped in v2.1.75. Most keys hot-reload (file watcher + ConfigChange hook); model & outputStyle read once at start. Managed settings parse tolerantly (strip+warn invalid entries, enforce rest; v2.1.169+). A few keys are stored in ~/.claude.json (OAuth, MCP user/local servers, per-project state, caches) NOT settings.json; before v2.1.119 autoScrollEnabled/editorMode/showTurnDuration/teammateMode/terminalProgressBarEnabled lived in ~/.claude.json. ~5 timestamped backups retained. Schema: $schema https://json.schemastore.org/claude-code-settings.json.", "name": "settings.json Config Hierarchy", "purpose": "Merge 4 scopes + managed layer into one effective config; cannot be overridden by user/project once set in managed."}, {"config": "Not a settings.json key (CLI-time); also spawned into subprocesses via CLAUDECODE=1 (all spawned procs incl MCP/IDE terminals) and CLAUDE_CODE_CHILD_SESSION=1 (only Claude's own Bash/PowerShell/hook/statusline spawns, NOT IDE/stdio-MCP, v2.1.172+) which excludes nested interactive TUIs from --resume/--continue/history.", "dataModel": "Key env vars: ANTHROPIC_API_KEY, ANTHROPIC_AUTH_TOKEN (-> Authorization: Bearer), ANTHROPIC_BASE_URL, ANTHROPIC_MODEL, MAX_THINKING_TOKENS=0 (disable thinking, except Fable 5), DISABLE_AUTOUPDATER, CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC (= DISABLE_AUTOUPDATER+DISABLE_FEEDBACK_COMMAND+DISABLE_ERROR_REPORTING+DISABLE_TELEMETRY), BASH_DEFAULT_TIMEOUT_MS (120000), BASH_MAX_TIMEOUT_MS (600000), API_TIMEOUT_MS (600000), CLAUDE_CODE_SSE_PORT+ENABLE_IDE_INTEGRATION (IDE bridge), CLAUDE_CODE_AUTO_CONNECT_IDE, CLAUDE_CODE_IDE_SKIP_AUTO_INSTALL, CLAUDE_CODE_NO_FLICKER / CLAUDE_CODE_DISABLE_ALTERNATE_SCREEN, CLAUDE_CODE_DISABLE_VIRTUAL_SCROLL, CLAUDE_CODE_DISABLE_MOUSE, CLAUDE_CODE_FORCE_SYNC_OUTPUT, CLAUDE_CODE_SAFE_MODE, CLAUDE_CODE_EFFORT_LEVEL, CLAUDE_CODE_AUTO_COMPACT_WINDOW, CLAUDE_AUTOCOMPACT_PCT_OVERRIDE.", "mechanism": "Env vars generally take precedence over settings fields (e.g. ANTHROPIC_MODEL > model setting; CLAUDE_CODE_AUTO_CONNECT_IDE > autoConnectIde). Exceptions: --model and /model override ANTHROPIC_MODEL; CLAUDE_CODE_EFFORT_LEVEL overrides /effort and effortLevel. NO_COLOR/FORCE_COLOR in settings.env (v2.1.143+) pass to subprocesses but do NOT change CC's own colors (set them in shell pre-launch instead). settings.env injects vars into every session + spawned subprocess. Many feature flags are env-only (no settings.json equivalent).", "name": "Env Vars (CLAUDE_CODE_* / ANTHROPIC_*)", "purpose": "Per-process overrides; higher precedence than settings.json keys for the same feature."}, {"config": "Flags map 1:1 to many settings keys for one session: --model->model, --permission-mode->defaultMode, --effort->effortLevel, --fallback-model->fallbackModel, --teammate-mode->teammateMode, --verbose->viewMode, --settings (inline override), --setting-sources (which scopes to load), --add-dir->permissions.additionalDirectories.", "dataModel": "Modes/payloads: --output-format text|json|stream-json; --input-format text|stream-json; --permission-mode default|acceptEdits|plan|auto|dontAsk|bypassPermissions; --setting-sources user,project,local.", "mechanism": "CLI flags override settings + env for ONE session. Headless/print mode (-p) uses --output-format text|json|stream-json, --input-format, --max-turns, --max-budget-usd, --session-id (UUID), --include-partial-messages, --include-hook-events, --json-schema, --permission-prompt-tool (MCP tool for non-interactive perms). --bare strips auto-discovery (hooks/skills/plugins/MCP/CLAUDE.md) sets CLAUDE_CODE_SIMPLE. --safe-mode disables all customizations (CLAUDE_CODE_SAFE_MODE) but keeps auth/model/built-in tools/permissions AND managed policy. --dangerously-skip-permissions == --permission-mode bypassPermissions. --ide auto-connects if exactly one IDE. --setting-sources picks which of user/project/local to load.", "name": "Key CLI Flags", "purpose": "Per-invocation overrides of model, permissions, system prompt, output format, IDE connection, and customization scope."}], "confidence": "high", "dimension": "tui-ide-config", "keyBehaviors": ["RENDERER IS REACT+INK, NOT BUBBLE TEA. Claude Code's TUI is a TypeScript React app (ConcurrentRoot) with a custom react-reconciler host config and a Yoga flexbox layout engine, writing to stdout via a packed-cell Screen buffer with BSU/ESU (DEC mode 2026, ESC[?2026h/l) atomic frame updates. A Go replica must NOT model a Bubble-Tea Model/Update/View loop \u2014 it needs a retained-mode renderer with dirty-tracking, double buffering, and a diff/blit pipeline.", "Fullscreen (alt-screen) is the modern default; 'default' main-screen is legacy. Background/agent-view sessions ALWAYS use fullscreen regardless of the setting. Selection overlay and search highlight mutate the screen buffer in-place (set prevFrameContaminated), forcing a full-damage next frame \u2014 a deliberate tradeoff to avoid a separate overlay buffer.", "settings.json hot-reloads on file change (permissions/hooks/apiKeyHelper/statusLine reload live); only model and outputStyle require restart. ConfigChange hook fires per detected change. Files are watched across all 4 scopes.", "Managed settings are un-overridable and parse tolerantly (strip invalid entry, warn, enforce rest; v2.1.169+). User/project/local are strict (whole-file reject on validation error). requiredMinimumVersion/requiredMaximumVersion FAIL OPEN (invalid value stripped, not enforced) so a bad policy push can't brick startup; forceRemoteSettingsRefresh makes startup BLOCK and fail-closed on fetch failure.", "As of v2.1.142, defaultMode:'auto' set in project or local settings (.claude/settings.json, .claude/settings.local.json) is IGNORED \u2014 only ~/.claude/settings.json can grant auto mode. A repository cannot self-grant auto. Also skipDangerousModePermissionPrompt is ignored in project settings to block untrusted repos from auto-bypassing the bypass prompt.", "IDE bridge: IDE extension owns the WebSocket MCP server on localhost; CLI is the client. VS Code ext bundles its own CLI copy; JetBrains plugin runs PATH `claude` (no bundle). WSL2 NAT/firewall commonly blocks the localhost socket (WSL1 fine). JetBrains Remote Dev: install plugin on the REMOTE host not local client.", "Auto-discovery: when claude is launched inside a VS Code/JetBrains integrated terminal, autoInstallIdeExtension (default true) installs the ext and autoConnect connects. From an external terminal, autoConnectIde (default false) is off \u2014 run /ide or pass --ide. The lock file (~/.claude/ide/.lock) is the discovery mechanism.", "Env precedence nuance: env vars generally beat settings, BUT --model and /model beat ANTHROPIC_MODEL, and CLAUDE_CODE_EFFORT_LEVEL beats /effort. NO_COLOR/FORCE_COLOR in settings.env affect subprocesses only (v2.1.143+), not CC's own colors \u2014 set in shell pre-launch to change CC UI colors.", "Per-cell packed format is 2x Int32 (word0=charId, word1=styleId[31:17]|hyperlinkId[16:2]|width[1:0]); CharPool/StylePool/HyperlinkPool are interned and SHARED across front+back frames so blit can copy cells without re-interning. StylePool bit-0 encodes whether a style is visible on spaces (odd=visible) so invisible-space cells are skipped with one bitmask. Pools reset every 5 min with a migration pass to bound growth.", "Render scheduling: lodash throttle at 16ms (leading+trailing) via queueMicrotask after layout effects but same event-loop tick; scroll uses a separate 4ms setTimeout and bypasses React entirely (mutates DOM node scrollTop directly + markDirty). Resize is synchronous, not debounced."], "openQuestions": ["Exact keystroke-level behavior of the vim-mode input editor (modes, registers, motions) \u2014 only confirmed it exists via editorMode:'vim'; the vim implementation file/grammar not located in public sources.", "Custom theme file format and discovery path (customThemes referenced in --safe-mode disables 'custom themes' but the theme JSON schema and load path are not documented in fetched sources \u2014 likely ~/.claude/themes/ but unverified).", "Precise multi-source merge semantics for every array vs scalar setting (the docs specify 'arrays merge across sources' generally and explicit exceptions like fallbackModel does NOT merge); a per-key merge table would be needed for an exact replica.", "Whether the SSE transport (sse-ide) is still actively used by current VS Code ext or if WS is now the only transport \u2014 sources describe both as internal types but don't pin which is default in v2.1.17x."], "sources": [{"title": "Claude Code settings (official docs)", "url": "https://code.claude.com/docs/en/settings", "why": "Authoritative settings.json hierarchy, all setting keys, managed-settings delivery (plist/registry/file paths), drop-in dir merge rules, hot-reload + ConfigChange hook, invalid-entry tolerance, permission rule syntax, legacy ~/.claude.json storage."}, {"title": "Use Claude Code in VS Code (official docs)", "url": "https://code.claude.com/docs/en/vs-code", "why": "VS Code extension: bundles own CLI, all extension settings (useTerminal/initialPermissionMode/preferredLocation/claudeProcessWrapper), shortcuts, vscode://anthropic.claude-code/open URI handler with prompt/session params, IDE diff accept/reject semantics."}, {"title": "Claude Code JetBrains IDEs (official docs)", "url": "https://code.claude.com/docs/en/jetbrains", "why": "JetBrains plugin runs PATH claude (no bundle), /ide connects from external terminal, diff tool auto|terminal, diagnostic + selection sharing, supported IDEs, WSL2 firewall/NAT workaround, Remote Dev host install."}, {"title": "Environment variables (official docs)", "url": "https://code.claude.com/docs/en/env-vars", "why": "Definitive env var reference: env>settings precedence rule with exceptions, CLAUDECODE vs CLAUDE_CODE_CHILD_SESSION distinction, ANTHROPIC_*/CLAUDE_CODE_* full table, NO_COLOR/FORCE_COLOR v2.1.143 behavior, IDE bridge vars."}, {"title": "CLI reference (official docs)", "url": "https://code.claude.com/docs/en/cli-reference", "why": "Complete CLI command + flag table including --bare, --safe-mode, --setting-sources, --settings, --permission-mode, --ide, --output-format, --session-id, --mcp-config, model/prompt/permission flags and their settings mappings."}, {"title": "PROTOCOL.md - claudecode.nvim (reverse-engineered IDE protocol)", "url": "https://github.com/coder/claudecode.nvim/blob/main/PROTOCOL.md", "why": "Definitive IDE bridge protocol: lock file JSON shape + ~/.claude/ide/.lock path, CLAUDE_CODE_SSE_PORT + ENABLE_IDE_INTEGRATION env vars, x-claude-code-ide-authorization header, MCP-over-WS JSON-RPC 2.0, all 12 IDE MCP tools (openFile/openDiff/getCurrentSelection/...)."}, {"title": "Ch 13. The Terminal UI - Claude Code from Source", "url": "https://claude-code-from-source.com/ch13-terminal-ui/", "why": "Deep technical write-up of the React+Ink renderer: custom DOM element types, Yoga host config, ConcurrentRoot, 7-stage render pipeline, double buffering, packed-cell Int32 format, CharPool/StylePool/HyperlinkPool interning, blit fast-path, BSU/ESU atomic updates, 16ms throttle, REPL.tsx structure."}, {"title": "Bridge & IDE Integration - Claude Code Internals", "url": "https://claude-code-explain.helmcode.com/bridge-ide/", "why": "Internal transport types sse-ide/ws-ide, lockfile naming ~/.claude/ide/-.lock, distinction between local IDE integration (MCP localhost) vs remote Bridge (claude.ai), claude-vscode bidirectional channel, 15 JetBrains IDEs, VS Code auto-install command."}, {"title": "Configure server-managed settings (official docs)", "url": "https://code.claude.com/docs/en/server-managed-settings", "why": "Server-managed settings delivery via Claude.ai Admin > Claude Code > Managed settings, all settings.json keys supported except OS-policy-restricted list."}], "summary": "Claude Code's \"terminal UI\" is NOT a Bubble Tea-style Model/Update/View loop. It is a TypeScript React (ConcurrentRoot) application rendered to the terminal via Ink + a heavily customized react-reconciler host config and Yoga flexbox layout engine, writing ANSI to stdout through a packed-cell Screen buffer with dirty-tracking, double-buffering, and atomic BSU/ESU frame updates. Two renderers exist: 'fullscreen' (alt-screen, virtualized scrollback, flicker-free \u2014 the modern default) and 'default' (classic main-screen). IDE integration is local-only: VS Code/Cursor/Windsurf/JetBrains extensions run a WebSocket-or-SSE MCP server on localhost, write a lockfile to ~/.claude/ide/.lock, set CLAUDE_CODE_SSE_PORT + ENABLE_IDE_INTEGRATION, and the CLI auto-connects (auth via x-claude-code-ide-authorization header); VS Code bundles its own CLI binary, JetBrains runs the PATH `claude`. Configuration is a 4-scope hierarchy (User < Project < Local < Managed) where managed settings (server-managed / MDM plist / Windows registry / system managed-settings.json) cannot be overridden and merge first as a base with systemd-style drop-in directory support. Environment variables (CLAUDE_CODE_*, ANTHROPIC_*) generally override settings keys, and CLI flags override for a single session."}, "sandbox-security": {"asOfDate": "2026-06", "claimsToVerify": ["The Bash tool exposes a `dangerouslyDisableSandbox` parameter that Claude auto-retries failed sandboxed commands with; setting sandbox `allowUnsandboxedCommands: false` makes this parameter ignored entirely (Strict sandbox mode).", "macOS sandbox enforcement generates SBPL with separate `(deny file-read* (subpath \"...\"))` + `(allow file-read* (subpath \"...\"))` rules and CANNOT use `require-not` inside a deny clause because that aborts sandbox-exec (confirmed issue #39635, v2.1.85); Linux/WSL2 uses bubblewrap `--unshare-net` with a socat-relayed Unix socket to the host proxy.", "Permission rule precedence is strict deny\u2192ask\u2192allow with NO specificity override, and Adversa AI found deny checks silently stop being applied after 50 subcommands in a single pipeline (v2.1.88) \u2014 compound commands are split on `&& || ; | |& &` and newlines, each subcommand matched independently.", "Dangerous-command auto-mode stripping (dangerousPatterns.ts) removes interpreter rules (python, node, ruby, perl, php, lua, deno, tsx, npx, npm/yarn/pnpm/bun run, bash, sh, ssh) for ALL users, but the extended list (curl, wget, git, gh, kubectl, aws, gcloud, gsutil, sudo, zsh, fish, eval, exec, env, xargs) is ant-internal only (USER_TYPE==='ant')."], "components": [{"config": "settings.json `permissions.allow/ask/deny` arrays; `permissions.defaultMode`; `permissions.disableBypassPermissionsMode`; `permissions.disableAutoMode`. CLI flags `--allowedTools`, `--disallowedTools`. Managed-only: `allowManagedPermissionRulesOnly`.", "dataModel": "Rule = {tool: string, behavior: 'allow'|'deny'|'ask', specifier: string|undefined}. Settings shape: {permissions:{allow:[...],deny:[...],ask:[...],defaultMode:'default'|'acceptEdits'|'plan'|'auto'|'dontAsk'|'bypassPermissions'}}. Known source files: utils/permissions/PermissionMode.ts, PermissionRule.ts, permissionRuleParser.ts, bashPermissions.ts, permissionSetup.ts.", "mechanism": "Each Bash command is parsed (Stage 1, see Bash wrapper) and split on separators && || ; | |& & and newlines into independent subcommands; each must independently match an allow rule for a compound command to be allowed. Before matching, a fixed built-in set of process wrappers is stripped: timeout, time, nice, nohup, stdbuf, and bare xargs (only when flag-less). Dev runners like npx/docker exec/devbox run/mise exec are NOT stripped. Read-only command set (ls, cat, echo, pwd, head, tail, grep, find, wc, which, diff, stat, du, cd, read-only git) is auto-allowed in every mode. Known issue (Adversa AI, v2.1.88): deny checks silently stop after 50 subcommands in one pipeline. Symlink-aware: allow requires BOTH symlink path and target to match; deny triggers if EITHER matches.", "name": "Permission rule engine (deny\u2192ask\u2192allow)", "purpose": "Decides whether a tool call (Bash, Read, Edit, WebFetch, MCP, Agent, Cd) is allowed, denied, or must prompt \u2014 before the tool runs."}, {"config": "sandbox.enabled (bool); sandbox.autoAllowBashIfSandboxed (default true); sandbox.allowUnsandboxedCommands (bool/array); sandbox.failIfUnavailable (bool); sandbox.excludedCommands (array, e.g. ['docker *']); sandbox.network.httpProxyPort / socksProxyPort; sandbox.network.allowUnixSockets / allowAllUnixSockets / allowLocalBinding / allowMachLookup (macOS XPC); sandbox.network.allowManagedDomainsOnly (managed-only).", "dataModel": "{sandbox:{enabled:bool, autoAllowBashIfSandboxed:bool, allowUnsandboxedCommands:bool, failIfUnavailable:bool, excludedCommands:[...], filesystem:{allowRead:[...], allowWrite:[...], denyRead:[...], denyWrite:[...], allowManagedReadPathsOnly:bool}, network:{allowedDomains:[...], deniedDomains:[...], httpProxyPort:int, socksProxyPort:int, allowUnixSockets:[...], allowAllUnixSockets:bool, allowLocalBinding:bool, allowMachLookup:[...]}}}. Filesystem arrays MERGE across scopes (managed+user+project+local). enableWeakerNestedSandbox and enableWeakerNetworkIsolation are top-level booleans.", "mechanism": "When enabled, every Bash invocation is wrapped by the sandbox-runtime (standalone `@anthropic-ai/sandbox-runtime`, CLI `srt`, Rust crate `sandbox-runtime-rs`) before spawn. (1) Filesystem: default write = cwd subtree + session $TMPDIR; default read = whole machine except certain denied dirs (note: ~/.aws/credentials and ~/.ssh/ are readable by default \u2014 admins must add denyRead). Writable region extended via allowWrite. git worktree shared .git is writable for refs/index but .git/hooks and .git/config remain denied. settings.json files at every scope and the managed-settings dir are always write-denied inside the sandbox so a command can't edit its own policy. (2) Network: all outbound traffic is forced through a host-side proxy (loopback). The sandbox grants socket access only to the proxy; the proxy consults allowedDomains/deniedDomains by requested hostname (no TLS termination, no inspection \u2014 documented domain-fronting limitation). On Linux the inner net namespace is unshared (bubblewrap --unshare-net) and socat relays localhost to the host proxy via a mounted Unix socket; on macOS Seatbelt blocks non-loopback traffic at the socket layer as a backstop for tools ignoring proxy env vars. First request to a new domain prompts the user (auto-allow mode) or is blocked (allowManagedDomainsOnly). (3) Escape hatch: if a sandboxed command fails due to restrictions, Claude may re-invoke the Bash tool with dangerouslyDisableSandbox=true; that retry runs UNSANDBOXED and goes through the regular permission flow. Setting allowUnsandboxedCommands:false ('Strict sandbox mode') ignores dangerouslyDisableSandbox entirely.", "name": "Bash sandbox \u2014 OS-level isolation", "purpose": "Wraps each Bash subprocess (and all its children) in an OS-enforced filesystem + network boundary so commands can be auto-allowed without per-command prompts."}, {"config": "Drives sandbox selection via runtime probe. failIfUnavailable converts the silent unsandboxed fallback into a hard startup failure (for managed deployments).", "dataModel": "macOS Seatbelt profile is SBPL text emitted with separate rules: `(allow file-write* (subpath ...))`, `(deny file-read* (subpath ...))` + re-allow `(allow file-read* (subpath ...))`. BUG (issue #39635, v2.1.85): the profile historically used `require-not` inside a deny clause, which is invalid SBPL and makes sandbox-exec abort \u2192 all bash silently fails exit 1. Valid generation requires separate deny then allow rules.", "mechanism": "At startup Claude Code probes for the platform backend. macOS: /usr/bin/sandbox-exec present \u2192 Seatbelt. Linux/WSL2: bubblewrap (bwrap) + socat + (optional) the seccomp filter from @anthropic-ai/sandbox-runtime which blocks Unix domain sockets. If the backend is missing or platform unsupported (native Windows, WSL1), Claude warns and runs unsandboxed unless sandbox.failIfUnavailable=true. WSL1 unsupported (bubblewrap needs WSL2 kernel features). Ubuntu 24.04+ needs an AppArmor profile granting bwrap userns.", "name": "Platform backends (Seatbelt / bubblewrap)", "purpose": "Provide the actual OS primitives that enforce fs+net restrictions per platform."}, {"config": "sandbox.filesystem.allowWrite / denyWrite / allowRead / denyRead; sandbox.network.allowedDomains / deniedDomains.", "dataModel": "denyWrite/allowWrite/allowRead/denyRead are string arrays. Path-prefix table: '/' absolute; '~/' home; './' or bare project-root-relative. Distinct from Read/Edit permission rule path syntax (which uses '//abs', '/proj', '~/home'). Network: allowedDomains/deniedDomains are hostname strings with '*' wildcards.", "mechanism": "Default read = entire machine minus denied set; default write = cwd + $TMPDIR. Path-prefix resolution table: '/x' absolute (stays /x), '~/x' -> $HOME/x, './x' or bare 'x' -> relative to project root for project settings OR relative to ~/.claude for user settings (so '.' in user settings resolves to ~/.claude, not the project \u2014 a known footgun). allowRead re-allows inside a denyRead region. Filesystem arrays from multiple scopes MERGE (combined, not replaced). Permission rules (Read/Edit allow and deny) and sandbox.filesystem paths are MERGED into the final sandbox boundary. Network merges WebFetch allow rules + sandbox.allowedDomains; deniedDomains blocks even when a wildcard would otherwise allow. Managed-only lockdowns: allowManagedReadPathsOnly and allowManagedDomainsOnly ignore user/project/local entries.", "name": "Filesystem & network boundary config", "purpose": "Define exactly which paths and domains the sandbox permits/blocks."}, {"config": "Gated by build-time `USER_TYPE === 'ant'` for the extended list (curl/wget/git/gh/kubectl/aws/gcloud/gsutil/sudo/zsh/fish/eval/exec/env/xargs). TRANSCRIPT_CLASSIFIER build flag gates the auto-mode ML classifier.", "dataModel": "BASH_SECURITY_CHECK_IDS enum (23+ ids, bashSecurity.ts lines 76-101). DANGEROUS_BASH_PATTERNS list (all-users) + ANT-only extension list (dangerousPatterns.ts lines 58-79). Unknown AST nodes become `too-complex` sentinel. Failed parse -> PARSE_ABORTED sentinel.", "mechanism": "Stage 1 AST parse (tree-sitter-bash; fallback shell-quote+regex in external builds) with allowlist of safe node types \u2014 anything unhandled -> 'too-complex' requiring approval (fail-closed; PARSE_ABORTED distinguishes timeout/panic). Stage 2 (bashSecurity.ts): 23+ checks for command substitution $(...) and backticks, process substitution <(...) >(..), IFS injection, control chars, Unicode whitespace (U+00A0, U+2000-200B), brace expansion with quotes, heredoc extraction; plus zsh-specific bypass detection (=cmd expansion, =(cmd) process sub, zmodload/zpty/ztcp, PowerShell <# comments). Stage 3 semantic: only static >/dev/null and 2>&1 redirections are stripped; dynamic targets (vars, command subst, globs, tilde) reject and prompt. Stage 4 permission match against argv[0]+subcommands. In auto mode, dangerous-pattern rules are auto-stripped so Bash(python:*) etc. can't auto-approve code execution.", "name": "Bash wrapper multi-stage validation", "purpose": "Parse, classify, and gate Bash command text before execution / permission matching; defends against parser-differential and shell-quoting attacks."}, {"config": "Process wrapper stripping list is hardcoded and NOT configurable. Exec wrappers (watch, setsid, ionice, flock) and find -exec/-delete always prompt.", "dataModel": "Token normalization uses a cryptographic placeholder salt (8 random bytes hex) so injected placeholder tokens can't collide. Quoted patterns preserved; unquoted globs allowed only when every flag is read-only.", "mechanism": "spawn() with a separate args array, never shell:true with raw input. The shell provider wraps the command: bash disables extglob and wraps the payload in eval for alias expansion; PowerShell uses -EncodedCommand base64 UTF-16LE (not -Command). pwd captured via `pwd -P >| quoted_path`. O_NOFOLLOW on file opens prevents symlink attacks. Heredocs are extracted before parsing and restored after to work around shell-quote limitations. Command separators recognized for splitting: && || ; | |& & and newlines. 'Yes dont ask again' on a compound command saves up to 5 separate per-subcommand rules.", "name": "Shell quoting & provider security", "purpose": "Prevent injection when assembling the command line passed to the shell."}, {"config": "sandbox.autoAllowBashIfSandboxed (default true). bypassPermissions gated by remote killswitch gate `tengu_disable_bypass_permissions_mode` (GrowthBook/Statsig, fail-open). permissions.disableBypassPermissionsMode and permissions.disableAutoMode = 'disable' to forbid.", "dataModel": "PermissionMode enum: default, plan, acceptEdits, bypassPermissions, dontAsk, auto. Modes default to prompting; deny rules from ANY scope (managed/user/project/local) always win and cannot be overridden at any other scope.", "mechanism": "Auto-allow mode (default when sandbox enabled) runs sandboxed commands without prompts; the sandbox boundary substitutes for the prompt. Even so, these always still apply: explicit deny rules; rm/rmdir targeting /, home, or critical system paths; content-scoped ask rules like Bash(git push *); a bare Bash ask rule is skipped for sandboxed commands but still applies to commands that fall back to unsandboxed. bypassPermissions mode (--dangerously-skip-permissions) skips prompts but STILL prompts for explicit ask rules and for rm -rf /, rm -rf ~, and writes to protected dirs (.git, .claude, .vscode, .idea, .husky, .cargo, .devcontainer, .yarn, .mvn, .config/git); blocked entirely when running as root/sudo on Linux/macOS unless inside a recognized sandbox.", "name": "Sandbox\u2194permission interaction & circuit breakers", "purpose": "Define how the OS sandbox boundary composes with the in-process permission system and which prompts can never be suppressed."}, {"config": "CLAUDE_CODE_SUBPROCESS_ENV_SCRUB=1. plainTextStorage path ~/.claude/.credentials.json (0o600). Keychain uses hex encoding. redactSensitiveUrlParams strips state/nonce/code_challenge/code_verifier/code.", "dataModel": "Scrubbed env var categories: Anthropic (ANTHROPIC_API_KEY, CLAUDE_CODE_OAUTH_TOKEN, ANTHROPIC_AUTH_TOKEN, ANTHROPIC_FOUNDRY_API_KEY, ANTHROPIC_CUSTOM_HEADERS), OTEL (*_HEADERS for LOGS/METRICS/TRACES), cloud (AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, AWS_BEARER_TOKEN_BEDROCK, GOOGLE_APPLICATION_CREDENTIALS, AZURE_CLIENT_SECRET, AZURE_CLIENT_CERTIFICATE_PATH), GitHub Actions (ACTIONS_ID_TOKEN_REQUEST_TOKEN/URL, ACTIONS_RUNTIME_TOKEN/URL, ALL_INPUTS, OVERRIDE_GITHUB_TOKEN, DEFAULT_WORKFLOW_TOKEN, SSH_SIGNING_KEY) plus INPUT_ duplicates. GITHUB_TOKEN/GH_TOKEN intentionally NOT scrubbed. secretScanner.ts: 40+ gitleaks rules -> [REDACTED].", "mechanism": "Credentials: macOS Keychain (hex-encoded so invisible in process monitors) with plaintext fallback to ~/.claude/.credentials.json at 0o600 with explicit user warning. API keys never logged; auth status logged only as booleans; keys truncated in UI (sk-ant-...{last}). When CLAUDE_CODE_SUBPROCESS_ENV_SCRUB is set (auto in GitHub Actions with untrusted content), subprocessEnv.ts strips Anthropic/cloud/GitHub-Actions secrets from child envs before spawning Bash. Client-side secretScanner (40+ gitleaks rules) replaces detected secrets with [REDACTED] before uploading to team memory. OAuth params (state/nonce/code_challenge/code_verifier/code) redacted from logs via redactSensitiveUrlParams. Undercover mode (ant-only) strips internal codenames/versions from commits and PRs.", "name": "Secret/PII handling in tool results & subprocess env", "purpose": "Prevent credential leakage via subprocess env, tool output, logs, team-memory sync, and error messages."}, {"config": "permissions.deny WebFetch(domain:...) and sandbox.network.deniedDomains combine. WebFetch allow/deny rules and sandbox allowedDomains merge for the sandbox network boundary.", "dataModel": "Preapproved list is WebFetch-GET-only and explicitly NOT inherited by the sandbox fs/net boundary. Path-prefix match uses segment boundary: pathname===p || pathname.startsWith(p+'/').", "mechanism": "Max URL length 2000 chars, max HTTP content 10MB, fetch timeout 60s, max 10 redirects, markdown truncation 100K chars. Blocks embedded user:password URLs, single-label hostnames (<2 domain parts), HTTP->HTTPS auto-upgrade. Only same-origin redirects allowed (www. variants OK); cross-domain needs approval. Preflight domain_info query to api.anthropic.com (10s timeout, 5-min LRU TTL; URL content cached 15 min). 130+ preapproved doc/registry domains for GET-only WebFetch (curated; not inherited by sandbox; some allow uploads so unsafe for unrestricted net). file:// implicitly blocked via empty-hostname parts<2 check.", "name": "WebFetch security (preapproved domains, SSRF)", "purpose": "Constrain Claude's own web fetches against SSRF, malicious domains, and redirect loops."}], "confidence": "high", "dimension": "sandbox-security", "externalInterfaces": ["settings.json keys: sandbox.{enabled,autoAllowBashIfSandboxed,allowUnsandboxedCommands,failIfUnavailable,excludedCommands}, sandbox.filesystem.{allowRead,allowWrite,denyRead,denyWrite,allowManagedReadPathsOnly}, sandbox.network.{allowedDomains,deniedDomains,httpProxyPort,socksProxyPort,allowUnixSockets,allowAllUnixSockets,allowLocalBinding,allowMachLookup,allowManagedDomainsOnly}, enableWeakerNestedSandbox, enableWeakerNetworkIsolation", "settings.json keys: permissions.{allow,deny,ask,defaultMode,disableBypassPermissionsMode,disableAutoMode,additionalDirectories}, and bare allow/deny/ask/defaultMode shorthands", "Permission rule syntax: Tool / Tool(specifier); Bash(npm run *) / Bash(ls:*) (= Bash(ls *)); WebFetch(domain:example.com); Read(//abs|~/home|/proj|./cwd); mcp__server__tool and mcp__server__*; Agent(Name); Cd(path)", "Env vars: CLAUDE_CODE_SUBPROCESS_ENV_SCRUB (strip secrets from child envs), CLAUDE_CODE_UNDERCOVER=1 (force undercover), USER_TYPE=ant (build-time internal gating)", "CLI flags: --dangerously-skip-permissions (bypass mode), --allowedTools / --disallowedTools, --add-dir ", "Bash tool parameter: dangerouslyDisableSandbox (bool) \u2014 retry outside sandbox; ignored under allowUnsandboxedCommands:false", "/sandbox slash command (panel: Mode/Overrides/Config/Dependencies); /permissions; /add-dir; /cd (v2.1.169+)", "Remote gates (GrowthBook/Statsig): tengu_disable_bypass_permissions_mode (bypass killswitch), TRANSCRIPT_CLASSIFIER (auto-mode gate)", "External tool: `srt` / `@anthropic-ai/sandbox-runtime` (npm) / sandbox-runtime-rs (Rust crate) \u2014 sandbox-exec (macOS) + bubblewrap + socat + seccomp filter (Linux/WSL2)", "WebFetch domain preflight: POST api.anthropic.com/api/web/domain_info (10s timeout, 5-min cache TTL)"], "keyBehaviors": ["Default read policy is the WHOLE machine (including ~/.ssh and ~/.aws/credentials) \u2014 only writes are confined to cwd+$TMPDIR. Add denyRead for credential dirs. This is a frequent footgun for re-implementors who assume read is also confined.", "Permission precedence is deny>ask>allow with NO specificity override: a matching ask rule prompts even when a more specific allow also matches. Deny from ANY settings scope (managed>CLI>local project>shared project>user) cannot be overridden by allow at any other scope.", "Bash compound commands are split on && || ; | |& & and newlines; EACH subcommand must independently pass. Approving a compound with 'Yes, dont ask again' saves up to 5 separate per-subcommand rules (not one rule for the whole string).", "Process wrappers stripped before matching: timeout, time, nice, nohup, stdbuf, and bare (flag-less) xargs only. npx/docker exec/devbox run/mise exec are NOT stripped \u2014 Bash(devbox run *) matches everything after 'run' including 'devbox run rm -rf .'. Exec wrappers watch/setsid/ionice/flock always prompt.", "Space before '*' matters: Bash(ls *) matches 'ls -la' (word boundary) but not 'lsof'; Bash(ls*) matches both. Trailing ':*' is equivalent to trailing ' *' and is only recognized at the very end of a pattern.", "A bare tool-name deny (e.g. 'Bash' or 'mcp__*') REMOVES the tool from Claude's context entirely (Claude never sees it). A scoped deny ('Bash(rm *)') leaves the tool visible and blocks matching calls at runtime.", "Sandbox fs path-prefix syntax differs from Read/Edit permission syntax: sandbox uses '/abs', '~/', './proj' (standard); Read/Edit use '//abs', '/proj', '~/home'. Do NOT reuse one parser for the other.", "Filesystem arrays MERGE across scopes (managed+user+project+local) \u2014 they are combined, not replaced. But boolean keys (enabled, failIfUnavailable) take the managed value and ignore local. excludedCommands always merges and has no managed-only lockdown, so a developer can always append escape-hatch commands.", "'.' in sandbox fs config resolves to the project root only inside project settings; in user settings (~/.claude/settings.json) it resolves to ~/.claude \u2014 placing the denyRead ~/ + allowRead . example in user settings would NOT protect the project.", "Two sandbox modes: auto-allow (sandboxed commands run unprompted) and regular permissions (sandboxed commands still prompt). Auto-allow works independently of permission mode \u2014 even outside acceptEdits, sandboxed Bash modifying files runs without prompt.", "autoAllowBashIfSandboxed (default true) means a bare Bash ask rule is SKIPPED for sandboxed commands (sandbox substitutes for the prompt), but content-scoped ask rules like Bash(git push *) STILL force a prompt, deny rules still apply, and rm/rmdir of /, home, or critical paths still prompts.", "Sandbox does NOT cover built-in file tools (Read/Edit/Write \u2014 those use the permission system), computer use (runs on real desktop), or environment inheritance (sandboxed Bash inherits parent env incl. credentials unless CLAUDE_CODE_SUBPROCESS_ENV_SCRUB is set). Subagents share the parent sandbox config.", "bypassPermissions skips prompts but still prompts for: explicit ask rules, rm -rf / and rm -rf ~ (circuit breaker), and writes to protected dirs (.git/.claude/.vscode/.idea/.husky/.cargo/.devcontainer/.yarn/.mvn/.config/git). --dangerously-skip-permissions is BLOCKED when running as root/sudo on Linux/macOS unless inside a recognized sandbox.", "seatbelt SBPL generation must NOT use require-not inside a deny clause (aborts sandbox-exec, silent exit 1 \u2014 issue #39635). Emit separate (deny file-read* (subpath ...)) then (allow file-read* (subpath ...)) rules.", "Known parser-differential risk: tree-sitter-bash is the primary parser; external builds fall back to shell-quote+regex which is less robust. Fail-closed: unknown AST node -> 'too-complex' -> approval required.", "dangerousPatterns auto-mode stripping is split: python/node/ruby/perl/php/lua/deno/tsx/npx/npm|yarn|pnpm|bun run/bash/sh/ssh are stripped for ALL users; curl/wget/git/gh/kubectl/aws/gcloud/gsutil/sudo/zsh/fish/eval/exec/env/xargs are ant-internal only (USER_TYPE==='ant'). External users get weaker protection for those.", "Adversa AI disclosed deny-rule bypass: deny checks silently stop after 50 subcommands in a single pipeline (v2.1.88). A reimplementation must cap/iterate all subcommands, not just the first 50.", "bypassPermissions killswitch via GrowthBook gate `tengu_disable_bypass_permissions_mode` is one-way (Anthropic can revoke, not grant) and FAIL-OPEN (defaults to not-disable if GrowthBook unreachable). Checked once before first query per session; reset on /login.", "Domain safety preflight is cached 5 min (LRU), so a newly-compromised/-blocklisted domain stays reachable up to 5 min. URL content cached 15 min.", "Preapproved WebFetch domains (130+) are GET-only and explicitly NOT shared with the sandbox network boundary \u2014 some (huggingface.co, kaggle.com, nuget.org) allow uploads and would be unsafe as general sandbox egress.", "macOS Seatbelt + Go caveat: a faithful Go replica cannot use sandbox-exec's require-not-in-deny and must generate valid SBPL; also note enableWeakerNetworkIsolation (allow system TLS trust service) and enableWeakerNestedSandbox (bind-mount container /proc) deliberately weaken isolation and should only be opt-in."], "openQuestions": ["Exact shape of the dynamically generated SBPL profile emitted for arbitrary allowWrite/denyRead combinations post-fix for issue #39635 (need to read sandbox-runtime source for the canonical generator).", "Whether the `allowUnsandboxedCommands` setting is a boolean (Strict mode toggle) or an array of commands permitted unsandboxed \u2014 the gist lists it as an array while docs describe it as bool false=Strict; likely both forms exist (bool false disables the escape hatch, array lists allowed unsandboxed commands).", "The full current DANGEROUS_BASH_PATTERNS + ant-only list as of the latest 2026 build (the v2.1.88 reconstruction may be slightly stale).", "Whether the 50-subcommand deny bypass is fixed in current 2026 builds and what the new cap is."], "sources": [{"title": "Configure the sandboxed Bash tool \u2014 Claude Code Docs", "url": "https://code.claude.com/docs/en/sandboxing", "why": "Official, authoritative reference for sandbox modes, fs/network config, allowedDomains/deniedDomains, excludedCommands, dangerouslyDisableSandbox escape hatch, Seatbelt/bubblewrap platform mapping, WSL2 details, security limitations."}, {"title": "Configure permissions \u2014 Claude Code Docs", "url": "https://code.claude.com/docs/en/permissions", "why": "Authoritative permission rule syntax: deny\u2192ask\u2192allow order, Bash wildcard/compound/wrapper rules, read-only command set, Read/Edit path anchors, WebFetch domain rules, MCP/Agent/Cd rules, managed-only keys, settings precedence."}, {"title": "Beyond permission prompts: making Claude Code more secure and autonomous with sandboxing \u2014 Anthropic Engineering", "url": "https://www.anthropic.com/engineering/claude-code-sandboxing", "why": "Anthropic engineering post confirming fs+network isolation built on macOS Seatbelt and Linux bubblewrap, the Unix-socket\u2192host-proxy network architecture, 84% prompt reduction, and the open-sourced sandbox-runtime."}, {"title": "Security \u2014 Claude Code Docs", "url": "https://code.claude.com/docs/en/security", "why": "Official statement of read-only-by-default, built-in read-only Bash command set, write confined to launch dir, command-injection detection, fail-closed matching, network command approval, WebDAV/UNC warnings, macOS Keychain credential storage."}, {"title": "Security Analysis of Claude Code v2.1.88 \u2014 Source Reconstructed from Source Maps", "url": "https://b.zzn.im/blog/claude-code-v2.1.88-security-analysis/", "why": "Source-map reconstruction giving internal file paths and mechanisms: 4-stage Bash validation, bashSecurity 23+ checks, dangerousPatterns ant-only split, subprocessEnv scrub var list, secretScanner, bypassPermissions killswitch gate name tengu_disable_bypass_permissions_mode, WebFetch limits, preapproved domains."}, {"title": "Seatbelt sandbox silently blocks all bash commands when denyRead is configured \u2014 anthropics/claude-code#39635", "url": "https://github.com/anthropics/claude-code/issues/39635", "why": "Primary evidence for the exact SBPL generation bug (require-not in deny aborts sandbox-exec) and that valid generation uses separate (deny file-read* (subpath ...)) + (allow ...) rules."}, {"title": "anthropic-experimental/sandbox-runtime", "url": "https://github.com/anthropic-experimental/sandbox-runtime", "why": "The open-sourced runtime Claude Code wraps: confirms sandbox-exec (macOS Seatbelt) + bubblewrap (Linux) + proxy-based network filtering; CLI srt / npm @anthropic-ai/sandbox-runtime."}, {"title": "Claude Code \u2014 Complete settings.json Reference (v2.1.104) \u2014 gist", "url": "https://gist.github.com/mculp/c082bd1e5a439410158974de90c89db7", "why": "Compiled settings key catalog (~125 keys) including the full sandbox.* and permissions.* schema, enableWeakerNestedSandbox/enableWeakerNetworkIsolation, network sub-keys (allowUnixSockets, allowMachLookup, allowLocalBinding)."}, {"title": "Critical Claude Code vulnerability: Deny rules silently bypassed after 50 subcommands \u2014 Adversa AI", "url": "https://adversa.ai/blog/claude-code-security-bypass-deny-rules-disabled/", "why": "Documents the 50-subcommand deny-rule bypass disclosed by Adversa AI Red Team (v2.1.88) \u2014 load-bearing for the reimplementation to cap iteration correctly."}, {"title": "How /sandbox Works \u2014 Claude Code Camp", "url": "https://www.claudecodecamp.com/p/claude-code-sandboxing-how-sandbox-works-and-what-it-doesn-t-protect", "why": "Confirms Seatbelt backstop blocking non-loopback traffic at the socket layer for tools that ignore proxy env vars, and the .git/hooks deny that breaks git init under sandbox."}, {"title": "Claude Code's Deny Rules Don't Protect You \u2014 adamkinney (AI All The Things)", "url": "https://adamkinney.com/aatt/claude-code/deny-rules-dont-protect-you-sandbox-does/", "why": "Clarifies that permission deny rules are in-process (not OS-level), why Read deny doesn't stop `python -c 'open(...)'`, and that sandbox.filesystem.denyRead is the OS-enforced layer."}], "summary": "Claude Code's sandbox-security subsystem (v2.1.x, 2025-2026) is a defense-in-depth layering of three mechanisms: (1) an in-process permission rule engine (deny\u2192ask\u2192allow, with gitignore-style path and Bash-wildcard specifiers), (2) a 4-stage Bash-command static-analysis wrapper that classifies command text as read-only / dangerous / too-complex before it is matched against rules or executed, and (3) an OS-level Bash sandbox (macOS Seatbelt via sandbox-exec; Linux/WSL2 bubblewrap+bwrap+socat+seccomp) that confines filesystem writes to cwd+$TMPDIR and forces all network egress through a host-side allowlist proxy over a Unix socket. The sandbox was introduced Oct 20 2025 (Anthropic engineering blog) and open-sourced as @anthropic-ai/sandbox-runtime. Two sandbox modes exist: \"auto-allow\" (sandboxed Bash runs unprompted; the sandbox boundary replaces the prompt) and \"regular permissions\" (sandboxed commands still prompt). Even in auto-allow, explicit deny rules, content-scoped ask rules (e.g. Bash(git push *)), and rm/rmdir targeting /, $HOME, or critical paths still force prompts. Secrets/PII are handled by subprocess-env scrubbing (CLAUDE_CODE_SUBPROCESS_ENV_SCRUB), a 40+-rule gitleaks-based client-side secret scanner that redacts tool output before team-memory sync, OAuth-param redaction, and API-key truncation in the UI. The bypassPermissions mode (--dangerously-skip-permissions) is gated by a remote GrowthBook killswitch (tengu_disable_bypass_permissions_mode) and blocked when running as root/sudo."}}, "audit": {"A1-agent-loop-runner": {"area": "A1-agent-loop-runner (CustomRunner, agent-loop execution, confirmation/tool-wrapper bridge, autonomous polling, protocol handshake, CLI entrypoint)", "capabilities": [{"detail": "NewCustomRunner() wires up Genkit registry, an llm.Adapter (model.LLM), GetSWETools(), a DynamicLLMDelegator wrapping the adapter, an llmagent.New agent, a persistent session service, and finally runner.New(...) -> adkRunner. The struct stores adkRunner, llmModel, delegator, provider/model/api fields, GenkitRegistry, and a RunnerDeps bag of all global managers. This is a constructor, not a loop.", "name": "Runner construction & dependency wiring (NewCustomRunner / CustomRunner struct)", "status": "implemented"}, {"detail": "THE CRITICAL GAP. runner_exec.go: Execute() does NOT implement a model->tool-call->model iteration. It (1) resets circuit breaker, (2) drains bg/cron notifs and prepends them, (3) runs HookUserPrompt (can block/inject), (4) builds a *genai.Content user msg, then calls cr.adkRunner.Run(ctx, userID, sessionID, msg, RunConfig{StreamingMode:SSE}) and just ranges over the returned iter.Seq2[*session.Event,error], forwarding each ev to onEvent. The ACTUAL loop (for { runOneStep }) is ADK's internal llminternal.Flow.Run (adk base_flow.go:101). Termination, function-call dispatch, max-iterations, before/after model+tool callbacks all live in ADK, opaque to iroha.", "name": "Agent loop driver (model-call -> tool-call -> model-call iteration)", "status": "missing"}, {"detail": "Execute() emits run.accepted/started/cancelled/failed/completed via Logger.LogRunEvent with a uuid runID, atomic sequence, and a terminal-once guard. ctx.Done() triggers run.cancel_requested + Bridge.Cancel(). Panic in the goroutine is recovered, rolls back pending edits, emits run.failed.", "name": "Per-run event lifecycle & instrumentation (runID, run.accepted/started/cancelled/failed/completed)", "status": "implemented"}, {"detail": "run_exec.go appends and XML blocks in front of the user prompt each turn, draining BackgroundManager.DrainNotifications() and CronScheduler.DrainNotifications().", "name": "Pre-LLM prompt enrichment (bg/cron notifications, hook messages)", "status": "implemented"}, {"detail": "After the event stream completes: fires HookAgentResponse, computes editedPaths (filtered against initially-dirty git paths), commits the edit snapshots, and if files were edited generates a semantic commit message via a SECOND direct cr.llmModel.GenerateContent call then GitCommitPaths with '[iroha] ' prefix. Finally runs HookSessionEnd.", "name": "Post-run Git auto-commit (aider-style)", "status": "implemented"}, {"detail": "blockingConfirmationTool embeds tool.Tool and implements ProcessRequest (rewrites req.Tools[name] to itself so ADK dispatches through it), Run (permission check -> auto-review -> human y/n/always/explain/edit/bypass via Bridge channels), and Declaration. This is the permission+confirmation layer.", "name": "Tool wrapping / dispatch interception (blockingConfirmationTool)", "status": "implemented"}, {"detail": "GlobalPermissionManager.Check returns allow/deny/ask. allow->runWithHooks silently; deny->error with safety-fuse warning after 3 consecutive denials; ask-> ReviewCommand/ReviewFileOperation, auto-approve only in ModeAuto, else block on Bridge.PromptChan<-promptMsg and <-Bridge.ResponseChan. Supports 'explain' (calls globalLLMModel for a 1-2 sentence rationale), 'edit:' (rewrites command/content/path arg then auto-approves), 'always' (adds session allow rule), 'bypass' (returns synthetic success).", "name": "Permission gating + interactive confirmation (y/n/always/explain/edit/bypass)", "status": "implemented"}, {"detail": "runWithHooks: Stage A PreToolUse (block / rewrite args via UpdatedInput json round-trip / inject messages), Stage B runnable.Run + ToolCircuitBreaker.Track (3 consecutive identical-arg failures -> hard block), Stage C PostToolUse (inject messages, AdditionalContext). After file_edit/write/batch runs `go build ./pkg/agent/...` and injects compile errors as additional_context. Cancels respect Bridge.CancelChanRead.", "name": "Hook pipeline integration around every tool call", "status": "implemented"}, {"detail": "DynamicLLMDelegator wraps model.LLM, rebuilds system prompt each turn via SystemPromptUpdater, runs CompactContents when len(Contents)>12 or estimate>50k tokens, and on first-error context-length-exceeded force-compacts+retries once. For DirectHTTPAdapter models, adds retryable-temporary-error retry with budget, delay, and user-visible RetryNotice.", "name": "Dynamic model delegator (prompt rebuild, auto-compact, context-length recovery, retry)", "status": "implemented"}, {"detail": "SwitchModel swaps the delegator's adapter and updates GlobalAgentPool fields + AutoReviewConfig at runtime without rebuilding the runner. Thread-safe via RWMutex on both delegator and pool.", "name": "Runtime model switching (SwitchModel)", "status": "implemented"}, {"detail": "ConfirmationBridge (singleton Bridge) with PromptChan/ResponseChan/CancelChan + Reset/Cancel; ToolStatusBridge (singleton ToolBridge) with a 100-buffered StatusChan and a goroutine drain that preserves order. ToolStatus carries Name/Args/Running/Success/Error/Duration/StreamLines.", "name": "Foreground<->background bridges (ConfirmationBridge, ToolStatusBridge)", "status": "implemented"}, {"detail": "pendingEditSnapshots map[path]->originalContent; rollbackPendingEdits restores (removes if empty), commitPendingEdits clears after a successful turn, pendingEditPaths lists. findGoModuleRoot walks up to go.mod. Used by Execute on panic/cancel for rollback and on success for commit.", "name": "Atomic edit snapshot/rollback (pendingEditSnapshots)", "status": "implemented"}, {"detail": "AutonomousManager with StateWork/StateIdle, AutoClaimTasks (pending+unblocked+keyword match -> sets in_progress+owner), StartAutoPolling/StopAutoPolling ticker loop that claims while IDLE. Only relevant for teammate/multi-agent mode; NOT part of the single-user agent loop. GlobalMessageCount and GetIdentityTagBlock also live here.", "name": "Autonomous task polling (AutonomousManager)", "status": "partial"}, {"detail": "ProtocolManager persists ProtocolRequest (shutdown/plan_approval) JSON files under .team/requests/, with CreateRequest/GetRequest/RespondToRequest. This is teammate-to-teammate durable handshake storage, decoupled from the runner loop and from ADK entirely.", "name": "Inter-agent protocol handshake (ProtocolManager)", "status": "implemented"}, {"detail": "Flags: provider/model/apikey/baseurl/api-format/teammate+socket/config-wizard/resume/last/session/fork/yes/plan/default/permission-mode. Resolves priority override hierarchy (flag > config > default > env), runs config wizard if key missing, constructs NewCustomRunner, resolves session id (new/resume/last/fork), parses initial PermissionMode, then hands off to tui.RunApp(runner, sessionID, startInSessionPicker, initialMode, startupPrompt). Teammate mode short-circuits to agent.RunTeammateMode over a unix socket.", "name": "CLI entrypoint (cmd/agent-cli/main.go)", "status": "implemented"}], "couplingNotes": "This area is DEEPLY coupled to Google ADK and cannot be decoupled incrementally \u2014 the agent loop itself is outsourced to ADK, so a native (Claude-Code-style) refactor means replacing the loop driver, not just swapping types.\n\nLOAD-BEARING ADK types in this area:\n- runner.Runner (google.golang.org/adk/runner) \u2014 adkRunner field on CustomRunner (runner.go:337). Its Run(ctx,userID,sessionID,*genai.Content,agent.RunConfig,...RunOption) iter.Seq2[*session.Event,error] is the entire execution entry point (runner_exec.go:139). Replacing this means writing the native loop ourselves.\n- llmagent.New / llmagent.Config (google.golang.org/adk/agent/llmagent) \u2014 the rootAgent (runner.go:404). The actual model<->tool iteration lives in ADK's internal llminternal.Flow.Run (adk internal/base_flow.go:101, the `for { runOneStep }` loop). iroha has NO equivalent; ADK owns: termination detection (IsFinalResponse / no FunctionCall / no Partial), function-call dispatch, before/after model+tool callbacks, max-iterations. A native replacement must reimplement this Flow.\n- model.LLM / model.LLMRequest / model.LLMResponse (google.golang.org/adk/model) \u2014 the contract the llm.Adapter implements and the type DynamicLLMDelegator wraps (runner.go:62,109). GenerateContent returns iter.Seq2[*model.LLMResponse,error]. This is the model-call surface a native loop needs to drive.\n- session.Event / session.InMemoryService / session.Session (google.golang.org/adk/session) \u2014 events streamed to the TUI (runner_exec.go:144), and GlobalSessionService wraps session.InMemoryService (runner.go:416-417). session.Event embeds model.LLMResponse + Actions + LongRunningToolIDs and has IsFinalResponse(). A native design would define its own streaming event type.\n- tool.Tool / tool.Context (google.golang.org/adk/tool) \u2014 blockingConfirmationTool embeds tool.Tool (runner_confirmation.go:28), implements ProcessRequest(ctx tool.Context, *model.LLMRequest) and Run(ctx tool.Context, args any)(map[string]any,error) and Declaration()*genai.FunctionDeclaration. The requestProcessor interface (runner_confirmation.go:16) mirrors ADK's internal toolinternal.RequestProcessor and the req.Tools map[string]any rewrite trick (runner_confirmation.go:42-47) is a hack to force ADK to dispatch through the wrapper. A native tool registry removes this indirection entirely.\n- agent.RunConfig / agent.StreamingModeSSE (google.golang.org/adk/agent) \u2014 passed to adkRunner.Run (runner_exec.go:139-141).\n- genai.Content / genai.Part / genai.FunctionDeclaration / genai.Schema (google.golang.org/genai v1.57.0) \u2014 the message/tool-declaration wire format used everywhere (runner_exec.go:132, runner_confirmation.go:371-404, compaction estimate). This is Google's genai SDK, shared with ADK.\n\nLOAD-BEARING Genkit types:\n- genkit.Genkit registry + api.Plugin + googlegenai.GoogleAI + anthropic.Anthropic (firebase/genkit/go) \u2014 initGenkit (runner.go:350-364) builds a registry for Gemini/Claude providers; nil for OpenAI-compatible. The GenkitRegistry is stored on CustomRunner and GlobalAgentPool and threaded into llm.NewAdapter. Only the GenkitModelAdapter path actually uses it; the direct-HTTP adapters (OpenAI/Anthropic/GLM/DeepSeek/Kimi/SiliconFlow) ignore it.\n\nWHAT A NATIVE LOOP REQUIRES (decoupling work):\n1. A new AgentLoop type owning: build request (system prompt + session contents + tool declarations) -> call model.GenerateContent -> inspect response Parts for FunctionCall -> dispatch to the tool registry (running permission + hooks + circuit-breaker inline) -> append FunctionResponse -> repeat until a response with no FunctionCall (or max-iterations / cancel). This is exactly what ADK Flow.Run owns today and iroha has zero of.\n2. Replace session.Event with a native streaming event union (text delta / tool_call_start / tool_result / final / error).\n3. Replace tool.Tool/tool.Context with a native Tool interface (Name/Declaration/Run(ctx, args)) and a registry; drop the ProcessRequest/req.Tools-map hack.\n4. Replace llmagent+runner with a single Session+Loop struct. PersistentSessionService already wraps session.InMemoryService, so the storage layer is partially ours but still speaks session.Event/session.Session.\n5. The genai wire types (Content/Part/FunctionCall/Schema) are the largest cross-cutting dependency \u2014 either keep genai as the canonical message format (lowest-effort path) or define native equivalents and translate at the adapter boundary.\nGenkit can be dropped almost entirely since most providers already use direct HTTP adapters; only Gemini and the Anthropic-via-Genkit path need it, and Anthropic already has a direct adapter.", "divergences": ["NO native agent loop: iroha's Execute() is a thin event-forwarder around ADK's runner.Run/Flow.Run. Real Claude Code owns its own loop (model turn -> tool-use detection -> execution -> feedback) in-process with explicit max-turns, sidechain/secondary-turn forking, and interrupt handling. iroha cannot implement these without forking or replacing ADK's Flow.", "Auto-commit on every turn: Execute() stages+commits the turn's edited paths and LLM-generates a commit message with a '[iroha] ' prefix (runner_exec.go:189-242). Real Claude Code never auto-commits; commits are an explicit user action. This is a material behavioral divergence baked into the loop tail.", "Identity is a fixed persona: GetIdentityTagBlock() hardcodes an 'iroha' cybernetic-anime-girl SWE assistant persona addressing the user as 'Developer' (autonomous.go:138-146), and GlobalMessageCount starts at 10 (autonomous.go:135). Claude Code has no fixed persona and no synthetic message-count seeding.", "No native streaming event taxonomy: iroha consumes opaque session.Event (which embeds model.LLMResponse). Claude Code defines its own granular assistant-message/tool-use/content-block streaming model. Mapping ADK events to a Claude-Code-equivalent UI requires interpretation not present here.", "Post-edit go-build self-heal is hardcoded to './pkg/agent/...' (runner_confirmation.go:157) \u2014 runs regardless of which project/module was edited, so it will misreport or no-op outside this repo.", "Circuit breaker is global and exact-arg only (runner_confirmation.go:219-256, acknowledged limitations): single shared breaker, fmt.Sprintf('%v') arg comparison, no time window, no per-tool threshold. Claude Code has per-tool, typed, time-windowed loop protection.", "Dynamic system-prompt rebuild happens inside the model delegator (DynamicLLMDelegator.GenerateContent, runner.go:118-125) keyed off GlobalMessageCount, rather than at the loop-turn boundary as Claude Code does (system prompt assembled once per turn before the model call).", "Confirmation 'explain' and 'edit' flows (runner_confirmation.go:259-320) spawn extra direct model.GenerateContent calls for rationales/arg-rewrites \u2014 there is no equivalent in Claude Code's permission model, which is rule-based + user prompt only.", "ToolCircuitBreaker.Reset is called at the top of every Execute (runner_exec.go:19) and breaker state is process-global, so concurrent runs (teammates) interfere \u2014 diverges from Claude Code's per-session isolation."], "externalDeps": ["google.golang.org/adk v1.2.1-0.20260519122726-f2aee5301649 \u2014 runner.Runner (loop entry), agent/llmagent (rootAgent + Flow loop owner), model (LLM/LLMRequest/LLMResponse contract), session (Event/InMemoryService/Session), tool (Tool/Context), agent (RunConfig/StreamingMode). internal/llminternal.Flow.Run is the opaque loop driver.", "google.golang.org/genai v1.57.0 \u2014 Content/Part/FunctionCall/FunctionResponse/FunctionDeclaration/Schema wire types used across runner, confirmation, and compaction.", "github.com/firebase/genkit/go v1.8.0 \u2014 genkit.Genkit registry + api.Plugin; googlegenai.GoogleAI and anthropic.Anthropic plugins used in initGenkit for Gemini/Claude. Storable but only load-bearing for the Genkit adapter path; direct HTTP adapters (openai.go/anthropic.go) bypass it.", "github.com/google/uuid \u2014 runID + session ID generation."], "filesAudited": ["/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_bridge.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_exec.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_edit.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_confirmation.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_confirmation_hooks.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/autonomous.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/protocol.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/cmd/agent-cli/main.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/adapter.go (interface contract verification)", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/go.mod (ADK/Genkit/genai versions)", "ADK source (module cache) runner/runner.go, session/session.go, agent/llmagent/llmagent.go, internal/llminternal/base_flow.go, tool/tool.go \u2014 to verify the real loop owner and event/tool shapes"], "qualityNotes": "Code is genuinely functional and reasonably well-factored for an ADK-based design: clean RunnerDeps injection bag, atomic run-event instrumentation with terminal-once guard, panic recovery with edit rollback, real hook pipeline (PreToolUse/PostToolUse/ToolError) with arg-rewrite and AdditionalContext injection, and a working permission/confirmation/auto-review/circuit-breaker stack. Honest self-documentation of limitations exists (e.g. ToolCircuitBreaker docstring at runner_confirmation.go:201-218). HOWEVER the area is architecturally the OPPOSITE of Claude Code: it is a framework-hosted agent, not a native loop. The 'agent loop' capability that defines Claude Code is entirely missing from iroha and delegated to ADK. Key smells: (1) ProcessRequest rewrites req.Tools map to force dispatch through the wrapper (fragile ADK-internals coupling); (2) post-edit go-build is hardcoded to ./pkg/agent/...; (3) Global* singletons (GlobalSessionService, globalLLMModel, GlobalMessageCount, GlobalToolCircuitBreaker, Bridge, ToolBridge) make per-session/concurrent-run isolation impossible; (4) auto-commit is baked into the loop tail with no opt-out; (5) GlobalMessageCount is seeded to 10 with no comment. Test coverage in the area is heavy (runner_test.go, runner_ext_test.go, runner_edit_integration_test.go, runner_confirmation tests) but mostly exercises the wrapper/bridge/permission layers, not a loop (because there is no loop to test)."}, "A2-tools": {"area": "A2-tools (tool registry, tool handlers, sandbox, web, MCP, subagents, teams, todo, schedule, worktree, auto-review)", "capabilities": [{"detail": "ToolRegistry + generic register[TArgs,TResults]() in tools.go:24 wraps functiontool.New(Config{Name,Description}, handler). 40 tools registered across 14 register* funcs in GetSWETools() (tools.go:359). Table-driven, append-only, first-error-wins. Real, works.", "name": "Tool registration framework (table-driven, generic)", "status": "implemented"}, {"detail": "tools_file.go:25-71. 10MB cap, rejects dirs, supports 1-based start/end line slicing with 'N\\t' formatting (mimics Read tool cat -n). Sandbox-validated (validateSandboxPath). Matches Claude Code Read semantics closely.", "name": "file_read", "status": "implemented"}, {"detail": "tools_file.go:88-159. Exact-match first, then whitespace-tolerant line-based fallback (normalizeLine collapses runs). Enforces uniqueness unless replace_all. Generates unified diff. Dry-run support. snapshotFile() for rollback. No 'Read before edit' hard requirement like real CC.", "name": "file_edit (exact + whitespace-tolerant)", "status": "implemented"}, {"detail": "tools_file_batch.go:22-123. Two-phase (validate-all then apply-all) with rollbackPendingEdits() on any failure. Max 50 edits. Reuses whitespaceTolerantEdit fallback. Diff per edit.", "name": "file_edit_batch (atomic multi-edit)", "status": "implemented"}, {"detail": "tools_file.go:391-410. MkdirAll parents, snapshot+overwrite. No diff display, no line-numbering. Diverges from CC Write (which enforces Read-before-overwrite).", "name": "file_write", "status": "implemented"}, {"detail": "tools_shell.go:43-136. exec.CommandContext via 'sh -c', WrapSandboxCommand applied, StdoutPipe+StderrPipe merged, line streaming via ToolBridge.Send(ToolStatus{StreamLines}), 500-line stream cap, 30s timeout. Exit code reported. checkShellCommandSandbox enforces cwd containment.", "name": "shell_run (streaming, sandboxed)", "status": "implemented"}, {"detail": "tools.go:151-202 + tokenizeCommand/splitShellPipeline/tokenizeAllowedReadOnlyPipeline. Blocks relative '../' escape, out-of-cwd absolute paths (except safePrefixes from tokenizer.go), env-var expansion ($VAR/${VAR}). Allows find|grep|git|ls|rg ... | head readonly pipelines. Real but heuristic-only (tokenized, not a real shell parser).", "name": "Shell command sandbox (path/static analysis)", "status": "implemented"}, {"detail": "tools_shell.go:147-179. Delegates to GlobalBackgroundManager.RunContext/Check. checkShellCommandSandbox applied. Emits task_id; results drained via drain_notifications.", "name": "background_run / check_background", "status": "implemented"}, {"detail": "tools_web.go:31-114. SSRF guard (checkSSRF + ssrfSafeTransport DNS-rebinding-safe DialContext, privateNets incl. fc00::/7), 5MB cap, htmlToText conversion, rate-limit 10/min. http/https only.", "name": "web_fetch", "status": "implemented"}, {"detail": "tools_web.go:135-330. HTML scraping of html.duckduckgo.com (parseDDGResults/extractDDGResult decoding uddg redirect) OR SearXNG JSON backend from config.WebSearchSearXNGURL. 10/min rate limit. No real search-API integration (CC uses hosted search).", "name": "web_search (DuckDuckGo scrape / SearXNG)", "status": "partial"}, {"detail": "tools_file_search.go:104-152. regexp.Compile, filepath.Walk, skips grepExcludedDirs (.git/node_modules/etc), 1MB file cap, 50 match cap. NOT ripgrep-backed (pure Go walk). No -i/-g/file filters like CC Grep.", "name": "search_grep", "status": "implemented"}, {"detail": "tools_file_search.go:165-255. Custom matchGlob with ** support (recursive), 100-file cap, skips excluded dirs. Bubble-sort (O(n^2)) \u2014 diverges from CC Glob.", "name": "find_files (glob)", "status": "implemented"}, {"detail": "tools_file_search.go:24-85. filepath.Walk, depth cap 4, grepExcludedDirs skip, 200-entry cap. dirs get '/' suffix.", "name": "list_directory", "status": "implemented"}, {"detail": "tools_memory.go. CRUD over GlobalMemoryManager + memory_dream (4-phase DreamConsolidator). Persisted to disk. Roughly maps to CC memory/save_search semantics but types (user/feedback/project/reference) differ.", "name": "memory_save/list/search/update/delete/dream", "status": "implemented"}, {"detail": "tools_task.go + tools_todo.go over GlobalTaskManager (DAG with DFS cycle validation) and GlobalTodoManager. Mirrors CC TaskCreate/TaskUpdate/TaskList/TaskGet + TodoWrite (single in_progress rule encoded in description only).", "name": "task_create/update/list/get + todo", "status": "implemented"}, {"detail": "tools_schedule.go over GlobalCronScheduler. One-shot/recurring + durable persistence. Real local cron. Maps loosely to CC scheduled-task MCP, not native.", "name": "schedule_create/list/delete", "status": "implemented"}, {"detail": "tools_team.go. Spawn/list/message/inbox/broadcast + protocol_shutdown/plan_approval request/response + agent_claim_task/agent_set_state. Over GlobalTeamManager/GlobalProtocolManager/GlobalAutonomyManager. Parallel to CC TeamCreate/TaskUpdate/SendMessage but bespoke protocol set.", "name": "spawn_teammate + team comms + protocol + autonomy", "status": "implemented"}, {"detail": "tools_subagent.go:8-19. Thin wrapper calling GlobalSubagentManager.RunSubagent(ctx, args). Synchronous. No parallel/non-blocking option (CC Task supports background).", "name": "spawn_subagent", "status": "partial"}, {"detail": "tools_worktree.go over GlobalWorktreeManager (Create/List/Status/Enter/Closeout with keep|remove). Real git worktree-backed isolation.", "name": "worktree_create/list/status/enter/closeout", "status": "implemented"}, {"detail": "tools_mcp.go + mcp.go. GlobalMCPRouter.LoadAndStartPlugins + DiscoverTools returns []tool.Tool. DynamicMCPTool implements tool.Tool + Declaration()/ProcessRequest injecting genai.FunctionDeclaration with ParametersJsonSchema. Real MCP-protocol client integration.", "name": "MCP plugin discovery + dynamic tool registration", "status": "implemented"}, {"detail": "lsp_utils.go:105 + lsp_tools.go. LSPGotoDefinition/FindReferences/DocumentSymbols/Hover/Diagnostics via getLSPClient per-language (Go/TS/Python/Rust from config). json.RawMessage fallback parsing. Uses textDocument/diagnostic (pull, 3.17+). Rough analog of CC LSP MCP server but native.", "name": "LSP tools (5)", "status": "implemented"}, {"detail": "ci_watcher.go:91. agent_watch_ci starts background GitHub Actions monitor -> inbox notifications on failure.", "name": "CI watcher", "status": "implemented"}, {"detail": "auto_review.go. RiskTier enum + ClassifyTool/classifyShellCommand (trusted/low/medium/high) and ReviewCommand/ReviewFileOperation with LLM fallback. SetAutoReviewConfig(model.LLM). Dangerous-pattern hard-filter re-checks LLM approval. callLLMForReview via llm.CollectNonStreaming. Heuristic-only fallback when no model.", "name": "Auto-review (4-tier risk + LLM judge)", "status": "implemented"}, {"detail": "runner_edit.go snapshotFile/rollbackPendingEdits + per-run commitEditedFiles. On tool failure or ctx cancel, restores originals. CC has no equivalent (uses git).", "name": "Edit snapshot/rollback", "status": "implemented"}, {"detail": "tools.go:401-451. RebuildToolPool (re-discover, bump version) + CheckPluginsFileChanged (mtime of .iroha/plugins.json). Enables /mcp reload.", "name": "Tool pool hot-reload", "status": "implemented"}, {"detail": "Not in registry. CC has NotebookEdit. Absent.", "name": "Notebook tools (NotebookEdit)", "status": "missing"}, {"detail": "Grep has no -i/--include/--exclude/-A/-B/-C flags; no JSON/structured output; 50-line cap. CC Grep is ripgrep-backed with rich flags.", "name": "Grep tool flag parity (output_mode/-i/-g/context)", "status": "missing"}, {"detail": "CC Task supports run_in_background / TaskStop / non-blocking spawn. spawn_subagent here is strictly synchronous via RunSubagent.", "name": "Task (background agent) tool", "status": "missing"}, {"detail": "web_fetch truncates at 5MB and htmlToText is naive (no readability/JS rendering). No URL-context extraction.", "name": "Large output auto-compression / headroom", "status": "missing"}, {"detail": "register functions set description strings but there is no CC-style 'dict' arg schema with required fields. functiontool derives schema from json tags; no explicit required/enum validation at registration.", "name": "Tool description schema validation", "status": "missing"}], "couplingNotes": "This area is HEAVILY coupled to google.golang.org/adk and is the single hardest decoupling point for a native rewrite. Concrete load-bearing dependencies:\n\n1. tool.Tool interface (adk/tool/tool.go:42) \u2014 every registered tool must implement Name()/Description()/IsLongRunning(). GetSWETools returns []tool.Tool. A native replacement needs an equivalent interface (Name/Description/IsLongRunning/Declaration/Run).\n\n2. tool.Context (adk/tool/tool.go:55) \u2014 NOT a context.Context alias. It embeds agent.CallbackContext and exposes FunctionCallID()/Actions()/*session.EventActions/SearchMemory() (returns *memory.SearchResponse)/ToolConfirmation()/*toolconfirmation.ToolConfirmation/RequestConfirmation(hint,payload). CRITICAL: iroha's handlers declare `ctx tool.Context` but ONLY use it as a bare context.Context via ctx.Value(WorkdirKey) (tools.go:70, pool.go:25). The rich ADK Context surface (confirmation, actions, memory search) is UNUSED by the handlers \u2014 confirmation is instead implemented ad-hoc via runner_confirmation*.go + ToolBridge + ReviewCommand. This means the handlers are 'decoupling-ready': replacing `tool.Context` with a plain `context.Context` (or a tiny native ToolCtx{context.Context; Workdir string}) requires changing only the handler signatures, not their bodies.\n\n3. functiontool.New + functiontool.Func[TArgs,TResults] (adk/tool/functiontool/function.go:71,78) \u2014 the generic register[TArgs,TResults] in tools.go:24 depends on functiontool.New(Config{Name,Description}, handler). This auto-derives the JSON schema from struct field tags (`json:\\\"x\\\" description:\\\"...\\\"`) and auto-marshals args/results to map[string]any. A native rewrite must replicate this schema-from-struct-tags reflection (iroha already relies on the `description:` struct tag everywhere \u2014 e.g. tools_file.go FileReadArgs). This is the largest mechanical port: write a generic `register[TArgs,TResults]` that reflect-walks TArgs to produce a genai.FunctionDeclaration-style schema and a JSON-(un)marshal dispatcher.\n\n4. genai.FunctionDeclaration / genai.Tool / genai.Part / genai.Content (google.golang.org/genai v1.57.0) \u2014 used by DynamicMCPTool.Declaration/ProcessRequest (mcp.go:267-283), by runner_exec.go building *genai.Content user messages, and indirectly by functiontool. NOTE: genai is the Google GenAI SDK, not ADK itself \u2014 it is the wire format for tool declarations and messages. Decoupling from ADK does NOT remove the genai dependency unless the native loop also replaces genai with Anthropic-native message/tool-use types.\n\n5. model.LLM + model.LLMRequest (adk/model) + agent.Runner/agent.RunConfig/agent.StreamingModeSSE (adk/agent) \u2014 auto_review.go uses model.LLM/model.LLMRequest/llm.CollectNonStreaming (auto_review.go:12,166-168,278-298) and the runner dispatches via cr.adkRunner.Run(...) (runner_exec.go:139). Tool execution itself does NOT call model.LLM, but the auto-review subsystem does, and tools are ultimately driven by the ADK runner's event stream. Decoupling tools from ADK therefore also requires replacing the runner (A1/A3 area).\n\n6. Indirect via Genkit: tools themselves do NOT import firebase/genkit. The only Genkit coupling is in pkg/llm/adapter.go (NewAdapter(*genkit.Genkit,...)) which produces the model.LLM that SetAutoReviewConfig consumes. So Genkit reaches A2 only through the LLM handle handed to auto-review \u2014 replacing the LLM adapter removes it.\n\nNATIVE REPLACEMENT REQUIREMENTS (what a CC-style no-framework port needs):\n- A native `Tool` interface: { Name, Description, IsLongRunning, Declaration()*Schema, Run(ctx, args any)(map[string]any,error) }.\n- A native `ToolCtx` carrying workdir + function_call_id + a confirmation channel (replacing tool.Context's RequestConfirmation/ToolConfirmation), OR keep confirmation outside tools entirely (iroha already does this via ReviewCommand in runner_confirmation \u2014 the cleaner path).\n- A generic schema-from-struct-tags reflector to replace functiontool.New (iroha's struct tags already encode everything needed).\n- Replace genai.FunctionDeclaration with an Anthropic-tool-use schema type (or keep a thin genai-compatible shim if the wire layer stays genai).\n- auto_review.go must call the native LLM client, not model.LLM/llm.CollectNonStreaming.\n\nBOTTOM LINE: The tool HANDLERS are ~90% decoupling-ready (they only need context.Context + WorkdirKey). The coupling is concentrated in (a) the registration/reflection layer (functiontool) and (b) the types tool.Tool/tool.Context/genai.FunctionDeclaration/model.LLM. A native port is feasible and mostly mechanical for handlers, but requires building a small schema-reflection + Tool-interface + dispatch layer to replace functiontool + tool.Tool.", "divergences": ["file_write has NO Read-before-overwrite enforcement \u2014 real CC refuses to overwrite a file you haven't Read in this session; iroha just overwrites (tools_file.go:391).", "file_edit does NOT require a prior file_read; CC's Edit requires the file to have been Read first. iroha allows blind edits (tools_file.go:88).", "search_grep is a pure-Go filepath.Walk regex matcher, NOT ripgrep. No -i/--include/--exclude/-A/-B/-C/output_mode flags, hard 50-match cap, 1MB-per-file skip. Semantics and ergonomics differ materially from CC Grep (tools_file_search.go:104).", "find_files uses an O(n^2) bubble sort and a hand-rolled ** glob matcher, not doublestar/fsnotify; 100-result cap (tools_file_search.go:247).", "web_search scrapes DuckDuckGo HTML or hits a self-hosted SearXNG; CC uses a hosted search backend with structured results. Rate-limited to 10/min (tools_web.go:135).", "web_fetch truncates at 5MB and uses a naive htmlToText (no readability extraction, no JS rendering); CC WebFetch has richer extraction + URL-context modes.", "shell_run always uses 'sh -c' with a 30s timeout and 500-line stream cap; CC Bash supports configurable timeout up to 600000ms, run_in_background, and richer sandboxing (iroha's sandbox is static token analysis, not a true seccomp/seatbelt sandbox).", "spawn_subagent is SYNCHRONOUS only (RunSubagent blocks). CC Task supports background dispatch + TaskStop + multiple agents (tools_subagent.go:8).", "todo enforces 'exactly one in_progress' only via description text, not structurally; CC TodoWrite enforces it at the tool layer.", "snapshotFile/rollbackPendingEdits (runner_edit.go) provide a per-run undo that CC does NOT have \u2014 CC relies on git. This is an iroha-specific divergence.", "Confirmation model differs: iroha uses ReviewCommand (heuristic+LLM) + 4-tier RiskTier + ToolBridge status bridge, whereas real CC uses permission rules in settings.json + explicit per-tool allow/deny + can_use_tool hooks. ADK's native tool.Context.RequestConfirmation/ToolConfirmation is NOT used by the handlers.", "Auto-review LLM judge (callLLMForReview) re-checks LLM 'safe' verdicts against hardcoded dangerous-pattern lists to resist prompt injection \u2014 CC has no equivalent LLM-judge layer (it uses deterministic rules + hooks).", "LSP tools are first-class native tools (lsp_*) rather than an MCP server as in CC; pull-diagnostics-only (LSP 3.17+), no workspace diagnostics fallback.", "mcp_server_list is the only MCP-meta tool; CC exposes richer MCP resource/prompt tooling. Dynamic MCP tool discovery IS implemented (mcp.go DiscoverTools) but plugin lifecycle is bespoke (.iroha/plugins.json), not the standard MCP config.", "All struct-tag-based arg schemas have no 'required' field tracking (CC uses explicit required arrays in JSON schema)."], "externalDeps": ["google.golang.org/adk v1.2.1-... \u2014 tool.Tool, tool.Context, tool/functiontool (registration+schema reflection). Load-bearing across every tools_*.go.", "google.golang.org/genai v1.57.0 \u2014 genai.FunctionDeclaration/Tool/Content/Part/GenerateContentConfig used by DynamicMCPTool (mcp.go), runner_exec.go message building, and indirectly functiontool. NOT ADK but is the wire schema.", "github.com/firebase/genkit/go v1.8.0 \u2014 used ONLY in pkg/llm/adapter.go to build model.LLM; reaches A2 solely via SetAutoReviewConfig(model.LLM) consumed by auto_review.go.", "google.golang.org/adk/model \u2014 model.LLM + model.LLMRequest used by auto_review.go for the LLM safety judge.", "google.golang.org/adk/agent + adk/session \u2014 referenced by tool.Context (CallbackContext, EventActions) and by the runner (adkRunner.Run). Tools do not import these directly except in tests (tools_shell_test.go imports adk/agent, adk/memory, adk/session, adk/tool/toolconfirmation, genai).", "golang.org/x/net/html \u2014 HTML parsing for web_fetch/web_search (tools_web.go, tools_web_safety.go).", "iroha/pkg/config \u2014 WebSearchSearXNGURL + LSPServers config (tools_web.go:150, lsp_utils.go:108).", "iroha/pkg/llm \u2014 CollectNonStreaming helper used by auto_review.go (auto_review.go:298,443)."], "filesAudited": ["/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_file.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_file_batch.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_file_search.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_shell.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_web.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_web_safety.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_mcp.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_memory.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_schedule.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_subagent.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_task.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_team.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_todo.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_worktree.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/auto_review.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/lsp_tools.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/lsp_utils.go (registerLSPTools)", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/ci_watcher.go (registerCITools)", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/mcp.go (DynamicMCPTool)", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_exec.go (dispatch)", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_edit.go (snapshot/rollback)", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/pool.go (WorkdirKey)", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tokenizer.go (safePrefixes)", "/Users/akiwayne/go/pkg/mod/google.golang.org/adk@v1.2.1-0.20260519122726-f2aee5301649/tool/tool.go (tool.Tool/tool.Context)", "/Users/akiwayne/go/pkg/mod/google.golang.org/adk@v1.2.1-0.20260519122726-f2aee5301649/tool/functiontool/function.go (Func/New)"], "qualityNotes": "The tool layer is broad (40 tools) and mostly functionally complete, with genuinely thoughtful security work: SSRF protection includes DNS-rebinding-safe DialContext (tools_web_safety.go:117), symlink-resolving sandbox (validatePathForSandbox, tools.go:124), env-var-expansion blocking, and an LLM-judge with anti-injection re-checking (auto_review.go:229-272). However several rough edges: (1) sortFiles is O(n^2) bubble sort (tools_file_search.go:247); (2) shell sandbox is static tokenization, not a real sandbox (no seatbelt/seccomp) \u2014 WrapSandboxCommand exists but its strength wasn't verified here; (3) findLineMatches caps at 100 matches silently (tools_file.go:223); (4) GrepHandler ignores binary files only by size (1MB), not by content sniff \u2014 will feed binaries through regexp; (5) web_search DuckDuckGo scraping is brittle to DDG HTML changes; (6) snapshotFile reads the file again even though FileEditHandler already read it (double read); (7) no per-tool 'required args' validation \u2014 relies entirely on LLM correctness; (8) memory_dream and schedule durable persistence are real but their storage formats weren't audited here (in memory.go / schedule.go, A2-adjacent). Test coverage is strong for handlers (tools_*_test.go present for most). The codebase is internally consistent but the divergence from CC's exact tool semantics (Read-before-edit, Grep flags, Task backgrounding, NotebookEdit) is the main parity gap, not capability gaps per se."}, "A3-permission-hooks-sandbox": {"area": "A3 \u2014 Permission, Hooks & Sandbox (iroha: pkg/agent permission.go, hooks*.go, sandbox.go, auto_review*.go)", "capabilities": [{"detail": "permission.go:12-19. All 6 real Claude Code modes present: default/plan/auto/acceptEdits/dontAsk/bypassPermissions. ParsePermissionMode (permission.go:143-168) does aggressive fuzzy normalization (strips ()-_, spaces, 'mode' suffix) and accepts aliases like 'ci'->dontAsk, 'dangerous'->bypass, 'y'/'yes'->auto. Behavior matches Claude Code semantically.", "name": "Permission modes (6 modes incl. bypass/acceptEdits)", "status": "implemented"}, {"detail": "permission.go:71-139. ~30 built-in allow rules per tool name (file_read/list/grep/find/todo/task/schedule/team/protocol/worktree/mcp_server_list/web). Two hard deny rules (rm -rf /, sudo *). builtinRuleCount tracked so dontAsk mode skips auto-approving built-in mutation allow-rules (permission.go:295). AddRule/GetRules/SetMode/GetMode all thread-safe.", "name": "PermissionManager: rule engine (allow/deny/ask)", "status": "implemented"}, {"detail": "permission.go:210-428. Eval order: (0) BashSecurityValidator on shell_run/background_run, (1) deny rules, (2) mode dispatch (dontAsk/plan/bypass/acceptEdits/auto with 4-tier classifier ClassifyTool), (3) allow rules, (4) fall-through to ask. consecutiveDenials counter with NoteApproval/NoteDenial/Reset. Returns (decision, reason) tuple.", "name": "PermissionManager.Check decision pipeline", "status": "implemented"}, {"detail": "permission.go:28-69. 14 regex patterns: shell_metachar, sudo, rm_rf, cmd_substitution, ifs_injection, heredoc, process_substitution, named_pipe, terminal_escape, file_descriptor, unsafe_source, encoding_attack, proxy_injection, unsafe_find_pipe. Severe subset (sudo/rm_rf/unsafe_find_pipe/proxy_injection) -> immediate deny; others -> ask (or deny in plan/dontAsk mode).", "name": "BashSecurityValidator (regex allowlist/blocklist)", "status": "implemented"}, {"detail": "auto_review.go:24-113. trusted/low/medium/high tiers. Trusted set for read-only tools + known safe cmds; shell classified via classifyShellCommand; unknown tools -> high. Used by ModeAuto (permission.go:362-402) to auto-approve trusted/low and escalate medium/high.", "name": "Risk classifier (4-tier: trusted/low/medium/high)", "status": "implemented"}, {"detail": "auto_review.go:198-275 ReviewCommand + auto_review_apply.go heuristicReview + auto_review_diff.go regex checks. Hard rule filter runs BEFORE LLM; if heuristic says safe OR hard-unsafe, LLM is skipped. LLM approval is re-validated by a 'safety fuse' (auto_review.go:230-272) that overrides LLM 'safe' if local patterns disagree. Hybrid security model is sound.", "name": "Hybrid shell auto-review (heuristic + LLM safety judge)", "status": "implemented"}, {"detail": "auto_review.go:323-411 ReviewFileOperation + fileHeuristicReview. Blocks system dirs (/etc,/usr,...), sensitive patterns (.ssh,.aws,.env,credentials,*.pem,*private key*), secret indicators in content, unknown extensions -> LLM semantic review via callLLMForFileReview. Wired into acceptEdits mode (permission.go:338-359) and Auto mode.", "name": "File-mutation safety review (path + content + secret detection)", "status": "implemented"}, {"detail": "hooks_types.go:12-37. 12 events: SessionStart/End, UserPrompt, AgentResponse, PreToolUse, PostToolUse, ToolError, Compaction, SubagentStop, Notification, PreCompact, PostCompact. Matches Claude Code's event taxonomy closely (PreCompact/PostCompact + Compaction all present; Notification present).", "name": "Hook lifecycle events", "status": "implemented"}, {"detail": "hooks_types.go:39-46 + hooks.go:52-132. Reads ~/.iroha/hooks.json (user) + ./.iroha/hooks.json (project), with migration shim from legacy .go-claude/ dir. Tracks per-hook source (hookSourceUser/hookSourceProject). Timeout configurable per-file.", "name": "HookManager config loading (user + project layered)", "status": "implemented"}, {"detail": "hooks.go:20-74 RunHooks. Matcher filters by tool name. Project-sourced command hooks require IROHA_TRUST_PROJECT_HOOKS=1 (hooks_exec.go:78-98) \u2014 correct trust-boundary behavior. Async hooks fire-and-forget with panic recovery; sync hooks short-circuit on Blocked. Aggregates Messages/UpdatedInput/AdditionalContext across hooks.", "name": "Hook execution (3 types: command/http/llm-prompt) + matchers + async", "status": "implemented"}, {"detail": "hooks_exec.go:113-200 runHTTP + headers env expansion (AllowedEnvVars-restricted) + parseJSONResult. Non-2xx blocks; timeout honors def.OnTimeout='block'.", "name": "HTTP hook type", "status": "implemented"}, {"detail": "hooks_exec.go:203-298 runLLMPrompt. Interpolates $TOOL_NAME/$TOOL_INPUT/$PROMPT/etc into def.Prompt, calls globalLLMModel (model.LLM) GenerateContent, parses decision JSON. THIS IS AN IROHA EXTENSION \u2014 real Claude Code has no native llm-prompt hook type (hooks are subprocess/http only).", "name": "LLM-prompt hook type (custom, non-Claude-Code)", "status": "implemented"}, {"detail": "hooks_exec.go:301-469 runCommand + hooks_types.go:104-191 parseJSONResult. Whitelisted env (HOME/PATH/LANG/TERM/USER/TMPDIR/SHELL/PWD only \u2014 good secret hygiene, hooks_exec.go:345). JSON stdin payload. Supports Claude Code's hookSpecificOutput.permissionDecision/updatedInput/additionalContext AND exit-code protocol (0=ok,1=deny,2=message). JSON-first-then-exitcode ordering matches Claude Code.", "name": "Command hook: stdin JSON + stdout JSON + exit-code protocol", "status": "implemented"}, {"detail": "sandbox.go:1-168. GlobalSandboxEnabled flag. darwin -> sandbox-exec with generated Seatbelt profile (deny writes to /System,/Library,/usr,/bin,/sbin,/private/etc,~/.ssh,~/.aws,~/.kube,~/.gemini; allow workdir + tmp + caches). linux -> bwrap --ro-bind / --bind workdir. Graceful no-op fallback if binary missing. This is an Iroha-native addition; real Claude Code uses a different (seatbelt-exec on mac, landlock on linux via its own CLI binary) mechanism.", "name": "OS-level sandbox (macOS sandbox-exec + Linux bubblewrap)", "status": "implemented"}, {"detail": "tools.go:151+ checkShellCommandSandbox. Separate from OS sandbox \u2014 tokenizes command (handles read-only pipelines) and blocks relative '../' escape + absolute paths outside CWD (whitelisting safePrefixes). Runs inside ShellRunHandler BEFORE the OS sandbox wrap (tools_shell.go:44 vs :55). Defense-in-depth.", "name": "Path-escape sandbox (command tokenizer + CWD bounding)", "status": "implemented"}, {"detail": "runner_confirmation.go:17-98. adkRunnableTool embeds tool.Tool; ProcessRequest overwrites req.Tools entry so ADK dispatches through Run() which calls GlobalPermissionManager.Check then underlying tool. This is the ONLY point where permission checks meet tool execution \u2014 and it is structurally dependent on ADK's tool.Tool/tool.Context/model.LLMRequest/req.Tools map.", "name": "Permission gating integration via blockingConfirmationTool wrapper", "status": "partial"}, {"detail": "Real Claude Code has NO equivalent of GlobalAutoReviewConfig (an LLM safety judge that pre-approves shell/file ops). This is an Iroha-original feature layered on top of Claude Code's model. Mode-dependent (only invoked in ModeAuto / acceptEdits 'ask' path, runner_confirmation.go:130,179). Conceptually diverges from Claude Code's 'ask human' default.", "name": "LLM-based auto-review config wiring", "status": "implemented"}, {"detail": "No settings.local.json/enterprise managed-settings.json rule merging, no 'additionalDirectories' workspace expansion, no pattern-prefix precedence semantics beyond substring+glob. matchesPattern (permission.go:626-655) is a custom glob (not gitignore-style). Acceptable but not 1:1.", "name": "Real Claude Code permission JSON schema fidelity (.claude/settings.json 'permissions.allow/deny/ask')", "status": "partial"}], "couplingNotes": "COUPLING IS MODERATE AND CLUSTERED \u2014 permission.go, hooks.go, sandbox.go, auto_review_apply.go, auto_review_diff.go are FRAMEWORK-FREE (pure Go, only stdlib + iroha/pkg/llm). The ADK/Genkit coupling is concentrated in exactly THREE spots:\\n\\n(1) auto_review.go:12-13 imports `google.golang.org/adk/model` + `google.golang.org/genai`. autoReviewConfig.Model is typed `model.LLM` (auto_review.go:166-168). callLLMForReview (auto_review.go:278-319) and callLLMForFileReview (auto_review.go:413-463) build `*model.LLMRequest` with `[]*genai.Content`/`*genai.Part`/`*genai.GenerateContentConfig`, then call `llm.CollectNonStreaming(ctx, cfg.Model, req)` (pkg/llm/helpers.go:12). pkg/llm/helpers.go itself imports `google.golang.org/adk/model`.\\n\\n(2) hooks_exec.go:16-17 imports `google.golang.org/adk/model` + `google.golang.org/genai`. The llm-prompt hook (runLLMPrompt, hooks_exec.go:203-298) uses the package-global `globalLLMModel model.LLM` (declared runner.go:62) and calls `globalLLMModel.GenerateContent(ctx, req, false)` iterating `iter.Seq2[*model.LLMResponse, error]`, building `*model.LLMRequest`/`*genai.Content`/`*genai.Part`.\\n\\n(3) runner_confirmation.go:10-12 imports `google.golang.org/adk/model`, `google.golang.org/adk/tool`, `google.golang.org/genai`. The blockingConfirmationTool wrapper embeds `tool.Tool`, implements `ProcessRequest(ctx tool.Context, req *model.LLMRequest)` and `Run(ctx tool.Context, args any)`. It hijacks `req.Tools map[string]any` to force ADK to dispatch through the permission-checking Run(). This is the structural seam where permission gating meets the agent loop \u2014 and it is the MOST load-bearing ADK coupling in this area.\\n\\nA native rewrite needs to replace: (a) the `model.LLM` interface with a plain `type LLMClient interface { Generate(ctx, messages, system) (string, error) }`; (b) `*model.LLMRequest`/`*genai.Content`/`*genai.Part` with a native Message{Role,Parts} struct; (c) `llm.CollectNonStreaming` with a thin local collector; (d) the `tool.Tool`/`tool.Context`/`req.Tools` dispatch hijack with a native tool-registry that calls PermissionManager.Check BEFORE invoking the handler. Because the permission rule logic (permission.go), hook config/exec plumbing (hooks.go, hooks_exec.go runHTTP/runCommand/parseJSONResult, hooks_types.go), and sandbox (sandbox.go) are framework-free, they port almost verbatim. The llm-prompt hook + auto-review LLM calls need the new LLMClient signature swapped in (mechanical). The blockingConfirmationTool hijack is the only piece that must be re-architected: in a native loop, permission check is just a call before tool dispatch, not a wrapper that rewrites a tool map. Estimated effort for this area alone: LOW-MEDIUM (the security logic is already isolated; only the 3 ADK seams need rewiring).", "divergences": ["LLM-prompt hook type (HookTypePrompt='llm-prompt', hooks_types.go:45) does NOT exist in real Claude Code \u2014 Claude Code hooks are command (subprocess) and matching only. This is an Iroha-original extension that adds a built-in LLM safety-judge hook mechanism.", "Auto-review LLM safety judge (ReviewCommand/ReviewFileOperation/GlobalAutoReviewConfig) is an Iroha-original concept. Real Claude Code does NOT do LLM-based pre-approval of shell commands or file writes \u2014 it relies on permission rules + human confirmation. Iroha's ModeAuto uses ClassifyTool 4-tier + LLM review to auto-approve 'medium' ops, which is more permissive than real Claude Code.", "Sandbox implementation differs: Iroha uses macOS `sandbox-exec` + Linux `bwrap` directly in-process (sandbox.go). Real Claude Code ships its own sandboxing binary (seatbelt on mac via a dedicated helper, landlock+namespaces on linux) with more granular workspace allowlisting and network policy. Iroha's Seatbelt profile is static-string-built and allows network by default ('(allow default)'), weaker than Claude Code.", "Permission rule config format diverges: Iroha uses hardcoded built-in rules + AddRule API (permission.go:85-131, 201-208), NOT real Claude Code's .claude/settings.json 'permissions.allow/deny/ask' array with tool:path/content pattern syntax. Iroha's matchesPattern (permission.go:626) uses substring-when-no-wildcard which is looser than Claude Code's gitignore-style matching.", "Hook config path is .iroha/hooks.json (hooks.go:58-96) not .claude/settings.json hooks block. Has a legacy .go-claude/ migration shim. Hook JSON shape (HookConfig.Hooks map[string][]HookDef) is close but not identical to Claude Code's settings.json 'hooks' structure (Claude Code nests under PreToolUse/PostToolUse arrays of {matcher,hooks:[{type,command}]}).", "ConsecutiveDenials counter with 3-strike safety-fuse warning (runner_confirmation.go:76-80, permission.go:555-583) is an Iroha-original UX feature, not in real Claude Code.", "dontAsk mode in Iroha (permission.go:290-316) acts as 'deny-by-default unless explicit allow rule' \u2014 this maps to Claude Code's behavior but the CI-style naming and builtinRuleCount skip logic (permission.go:295) is Iroha-specific.", "checkShellCommandSandbox (tools.go:151) is a second, independent path-based sandbox that runs BEFORE the OS sandbox and duplicates some of heuristicReview's path-danger logic (auto_review_apply.go isPathDangerous). Two overlapping path-escape checkers is divergence-from-Claude-Code (which has one coherent sandbox)."], "externalDeps": ["google.golang.org/adk v1.2.1-0.20260519122726-f2aee5301649 \u2014 provides model.LLM interface, model.LLMRequest, model.LLMResponse (used in auto_review.go, hooks_exec.go, runner_confirmation.go). Load-bearing for the 3 LLM-calling seams and the tool.Context/tool.Tool dispatch wrapper.", "google.golang.org/genai v1.57.0 \u2014 provides genai.Content/genai.Part/genai.FunctionDeclaration/genai.GenerateContentConfig. Used to construct LLM requests in auto_review.go, hooks_exec.go, runner_confirmation.go. Would be replaced by a native Message type in a no-framework rewrite.", "github.com/firebase/genkit/go v1.8.0 \u2014 NOT directly imported by the A3 files, but the configured model.LLM for ProviderClaude/ProviderGemini is GenkitModelAdapter (pkg/llm/genkit_adapter.go) which bridges genkit -> ADK model.LLM. So auto-review + llm-prompt hooks transitively depend on Genkit when using Claude/Gemini providers (the model passed to SetAutoReviewConfig/globalLLMModel is a GenkitModelAdapter in the default path). Direct OpenAI/Anthropic adapters (pkg/llm/openai.go, anthropic.go) bypass Genkit.", "iroha/pkg/llm \u2014 CollectNonStreaming helper (helpers.go) is the thin wrapper auto_review.go depends on; it in turn imports adk/model. This is the single import edge from the security area into the LLM subsystem."], "filesAudited": ["/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/permission.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/hooks.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/hooks_exec.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/hooks_types.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/sandbox.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/auto_review.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/auto_review_apply.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/auto_review_diff.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_confirmation.go (coupling seam: blockingConfirmationTool wraps tool.Tool, permission gate)", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_shell.go (sandbox wrap site)", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools.go:149+ (checkShellCommandSandbox second sandbox layer)", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/helpers.go (CollectNonStreaming ADK coupling)", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/adapter.go + genkit_adapter.go (model.LLM provider chain)", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/go.mod (ADK v1.2.1-..., genkit v1.8.0, genai v1.57.0)"], "qualityNotes": "SECURITY LOGIC QUALITY IS HIGH. The hybrid security model (hard regex/heuristic rules as an absolute floor, LLM judge as advisory with a 'safety fuse' that overrides LLM approvals, hooks_exec.go:230-272) is well-designed and resists prompt-injection jailbreaks. The regex pattern coverage (14 patterns in BashSecurityValidator + 10 in auto_review_diff.go) is broad. Command-hook env whitelisting (hooks_exec.go:345) prevents secret leakage. Project command hooks gated behind IROHA_TRUST_PROJECT_HOOKS is correct trust-boundary hygiene.\\n\\nWEAKNESSES: (1) Two overlapping path-escape checkers (tools.go checkShellCommandSandbox + auto_review_apply.go isPathDangerous) with divergent whitelists \u2014 maintenance hazard and inconsistency risk. (2) Iroha's mac Seatbelt profile uses '(allow default)' then denies specific paths (sandbox.go:78) \u2014 this is an ALLOW-by-default policy, weaker than Claude Code's deny-by-default; network is implicitly allowed. (3) globalLLMModel and GlobalAutoReviewConfig and GlobalPermissionManager and GlobalHookManager are all package-level singletons (runner.go:62, auto_review.go:171, permission.go:141, hooks.go:29) \u2014 global mutable state makes testing and multi-agent isolation harder; a native rewrite should inject these. (4) matchesPattern substring fallback (permission.go:634) can over-match. (5) LLM JSON parsing in runLLMPrompt/hooks_exec.go relies on heuristics to strip markdown fences and extract first {..} block (hooks_exec.go:275-295) \u2014 brittle but defended against multi-JSON injection. Overall: the area is over-engineered relative to Claude Code (extra LLM-judge + llm-prompt hook layers) but the core permission/hook/sandbox primitives are solid and largely portable."}, "A4-context-memory-session": {"area": "A4-context-memory-session (compaction, memory store, session persistence, system-prompt assembly)", "capabilities": [{"detail": "compaction.go:49 CompactContents operates on []*genai.Content. Two phases: (1) micro-compaction archives any FunctionResponse.Response >1000 bytes to ~/.iroha/transcripts/.jsonl and replaces it in-place with a placeholder (compaction.go:115-145); (2) conversational summarization when len(contents)>12 \u2014 keeps round[0], summarizes middle (index 1..len-5) via LLM or truncation fallback, preserves last 4 rounds, re-inserts sticky blocks (compaction.go:148-258). Deep-copies all Parts/FunctionCall/FunctionResponse before mutating (compaction.go:55-99). Hooks fire at micro/before_summarization/after_summarization/circuit_breaker_tripped. Triggered in runner.go:131-136 inside DynamicLLMDelegator.GenerateContent when >12 rounds OR estimateContentsTokens>50000.", "name": "Micro-compaction of large tool outputs + transcript archiving", "status": "implemented"}, {"detail": "compaction_helpers.go:15 extractStickyBlocks collects any genai.Content whose Part.Text contains '[STICKY]'; capStickyContent (helpers:30) trims oldest until total sticky bytes <= 20% of a hardcoded 200000-byte context-window estimate. Sticky blocks are re-inserted after the summary. prompt.go marks the Persona and CLAUDE.md sections with [STICKY] so they survive summarization. NOTE: only text-bearing blocks can be sticky; FunctionCall/FunctionResponse parts are never preserved as sticky.", "name": "Sticky-latch preservation during summarization", "status": "implemented"}, {"detail": "compaction.go:17 global struct, 3 consecutive failures (empty summary or error) opens the breaker (open=true) and forces truncateOnlySummary for subsequent runs; auto-resets after 5 minutes. compaction_helpers.go:69 truncateOnlySummary builds an extractStructuredSummary block + a 4000-char transcript. Recovered via defer/recover around summarizeRounds (compaction.go:194-202).", "name": "Compaction circuit breaker + truncation-only fallback", "status": "implemented"}, {"detail": "compaction_helpers.go:212 summarizeRounds: builds a transcript from text/FunctionCall/FunctionResponse, caps at 8000 chars, issues a 30s-timeout model.LLMRequest via the passed-in model.LLM, streams GenerateContent and concatenates text parts. Falls back to extraction if LLM absent/empty. extractStructuredSummary (helpers:108) regex-extracts tool names, file paths, and 'decision' lines (prefixes like 'let's ', 'i'll ', 'decided to ') into a [SUMMARY] block.", "name": "LLM-based conversation summarization", "status": "partial"}, {"detail": "memory.go:35 MemoryManager holds map[name]*MemoryEntry with RWMutex. Two-layer load: ~/.iroha/memory (global) then /.iroha/memory (project overrides). Each entry is one .md with YAML frontmatter (memory_frontmatter.go parse/render). MaxMemoryEntries=100 cap. Save/Update/Delete/List/Search/Count/Reload all implemented. Singletons GlobalMemoryManager + GlobalDreamConsolidator (memory.go:42-45).", "name": "Memory store (file-based, YAML frontmatter, global+project layers)", "status": "implemented"}, {"detail": "memory_agents_sync.go syncToAgentsMD / syncFromAgentsMDLocked / makeAgentsBlock parse/write a '## Agent Dynamic Learnings' section in AGENTS.md, mirroring entries both directions. Round-trips Name/Type/Description/Content with line-based block parser. Hardcoded path 'AGENTS.md' (cwd-relative).", "name": "Bidirectional AGENTS.md <-> memory sync", "status": "implemented"}, {"detail": "memory.go:234 BuildSystemPromptSection groups entries by type (user/feedback/project/reference), fuzzy keyword-matches against the current user prompt (feedback type always injected), and emits a Markdown block with emoji headers. Called from prompt.go:135 (stable section) and runner.go:398. MarkStale invalidation is exposed on SystemPromptBuilder but memory section is rebuilt every turn unconditionally.", "name": "Memory injection into system prompt (trigger-aware)", "status": "implemented"}, {"detail": "memory_dream.go:169 Consolidate runs Orient/Gather/Consolidate/Prune: deletes empty entries, exact-content dedup within type groups, optional LLM semantic merge (ConsolidateSemantically:303 when >=3 entries of a type, JSON-array contract), then enforces MaxMemoryEntries cap (oldest first). ShouldConsolidate (dream:113) checks 7 gates incl. PID-based .dream_lock with stale-lock eviction. Triggered async at startup (runner.go:465) and IncrementSession bumps count on every MemoryManager init.", "name": "Dream consolidation (dedup/prune/cap + LLM semantic merge)", "status": "implemented"}, {"detail": "session_store.go:57 PersistentSessionService wraps a delegate session.Service (runner.go:416 wires session.InMemoryService()). SerializedSession (store:28) embeds []*session.Event plus state map, CWD, first prompt, permission mode, token/cost estimates, compaction archive path. Create/Get/List/Delete/AppendEvent delegate then persist; SaveSession serializes via json.MarshalIndent; LoadSessions re-hydrates the delegate; ListSavedSessions + ForkSession for TUI picker and branching. interface asserted at session_store_helpers.go:133.", "name": "Persistent session service (JSON-per-session, wraps ADK session.Service)", "status": "implemented"}, {"detail": "migrate_legacy.go migrateGoClaudeIfNeeded one-shot copy of ~/.go-claude/memory and ./.go-claude/memory into .iroha equivalents, gated by ~/.iroha/.migrated sentinel, renames old dir to .bak. Called inside MemoryManager.loadLocked (memory.go:73).", "name": "Legacy .go-claude -> .iroha migration", "status": "implemented"}, {"detail": "prompt.go:94 BuildWithPrompt assembles identity tag, [STICKY] persona, memories, layered CLAUDE.md (with @-import expansion + path sandboxing, prompt.go:501-687), AGENTS.md (cwd-up-to-project-root), skills (folder SKILL.md + flat .md + manifest always-on + trigger-matched), then '=== DYNAMIC_BOUNDARY ===' caching boundary, then time/workdir/safety/tasks/teammates/inbox/worktrees/reminder. maybeCached emits '' when a section's SHA-256 is unchanged since last call.", "name": "System prompt builder with prompt-caching boundary", "status": "implemented"}, {"detail": "session_store.go:168-194 and session_store_helpers.go:12 estimateTokens = textLen/4; estimateCost = tokens*2/1000000. Used for session picker metadata and as the compaction trigger (compactionTriggerTokens=50000, runner.go:79) via estimateContentsTokens (runner.go:83). No tokenizer library; not Anthropic/GPT tokenizer-accurate. Cost basis ($2/M) is a placeholder, not per-model pricing.", "name": "Token counting", "status": "partial"}, {"detail": "tokenizer.go is misnamed \u2014 it implements tokenizeCommand, a shell-command tokenizer for the sandbox that blocks pipes/subshells/redirections. There is NO LLM tokenizer (tiktoken/BPE/CountTokens) anywhere in pkg/agent. The file does not belong to this functional area; it is a shell-security helper.", "name": "tokenizer.go (NOT an LLM tokenizer)", "status": "stub"}, {"detail": "No microcompact-undo, no /compact slash command wiring to trigger manual compaction, no diff/restore of archived tool output back into context, no token-accurate budgeting (only bytes/4). Compaction archive is append-only JSONL with no rotation or read-back path. Sticky cap uses a hardcoded 200000-byte window estimate rather than the real model context window.", "name": "Compaction archive read-back / restore / tool-result fetch", "status": "missing"}], "couplingNotes": "This area is MODERATELY-TO-HEAVILY coupled to Google ADK and transitively to Firebase Genkit. The load-bearing ADK primitives are (1) google.golang.org/genai \u2014 genai.Content and genai.Part are the canonical message model threaded through compaction.go, compaction_helpers.go (21 references), session_store.go, and memory_dream.go; CompactContents signature is `func CompactContents(contents []*genai.Content, sessionID string, llm ...model.LLM) []*genai.Content` (compaction.go:49). The deep-copy loop (compaction.go:55-99) is hand-written against genai.Part/FunctionCall/FunctionResponse fields. (2) google.golang.org/adk/model \u2014 model.LLM is the summarizer interface (summarizeRounds helpers:212, ConsolidateSemantically dream:303) and model.LLMRequest/LMMResponse are the request/response wrappers. The summarizers are invoked by passing the live delegator's current model (runner.go:134 passes `m`). (3) google.golang.org/adk/session \u2014 session.Service, session.Session, session.Event, session.InMemoryService are the entire persistence substrate; PersistentSessionService is literally a session.Service wrapper (session_store_helpers.go:133 interface assertion), SerializedSession embeds []*session.Event and reads sess.State().All()/sess.Events().All(). (4) google.golang.org/adk/agent/llmagent + adk/runner \u2014 runner.go:404-430 constructs the agent and runner; CustomRunner.Execute drives adkRunner. Genkit (github.com/firebase/genkit/go v1.8.0) is NOT imported by any file in THIS area directly \u2014 it enters via pkg/llm.NewAdapter (runner.go:511 initGenkit) which produces the model.LLM. So Genkit coupling is one hop away, but model.LLM (ADK) is the contract this area speaks.\n\nDECOUPLING FEASIBILITY: High effort but tractable. The pure-Go pieces (MemoryManager, memory_frontmatter, memory_helpers, memory_agents_sync, migrate_legacy, SystemPromptBuilder, frontmatter/dream gates) are already framework-free \u2014 they only use os/strings/regexp and could survive a native rewrite unchanged. The ADK-coupled surface to replace is narrow and well-defined: (a) replace []*genai.Content with a native Message struct {Role string; Parts []Part} where Part is {Text, ToolCall, ToolResult} \u2014 this is a mechanical refactor of compaction.go + helpers (the deep-copy, sticky scan, structured extraction, transcript builder) plus session_store.go's event serialization; (b) replace session.Service/Session/Event with a native SessionStore interface (Create/Get/List/Delete/AppendEvent + a serializable Event with Content/Author/Timestamp/Usage) \u2014 PersistentSessionService already isolates the JSON layer so the delegate swap is small; (c) replace model.LLM / model.LLMRequest / model.LLMResponse with a native LLMClient interface {Generate(ctx, []Message, opts) -> stream of (Message, error)} used by summarizeRounds and ConsolidateSemantically. None of these require Genkit. The DynamicLLMDelegator (runner.go:65-143) is the seam where compaction plugs in today; a native agent loop would call the same CompactContents(nativeMsgs, sessionID, nativeLLM) before each provider call. The single biggest blocker to a 1:1 Claude Code native loop is that Claude Code uses Anthropic's content-block model (text/tool_use/tool_result) with real token counting via the Anthropic tokenizer \u2014 iroha's genai.Content + bytes/4 heuristic diverges from that and would need a native message type + a real tokenizer (tiktoken-go or Anthropic's counting endpoint) for faithful budgeting and auto-compact thresholds.", "divergences": ["Message model is genai.Content/genai.Part (Google GenAI SDK) not Anthropic content blocks (text/tool_use/tool_result). Tool calls are FunctionCall/FunctionResponse, not Anthropic's tool_use/tool_result block types. A 1:1 port to Anthropic-native shape requires remapping all Part handling.", "No real tokenizer. Token counts are bytes/4 everywhere (session_store.go:193, runner.go:106, session_store_helpers.go:14). Claude Code uses Anthropic's actual token counting for context-window budgeting and the 92%/95% auto-compact thresholds. iroha's 50000-token trigger (runner.go:79) and 200000-byte sticky window (compaction.go:35) are arbitrary byte proxies.", "Compaction is round-count-based (>12 rounds) OR byte-token-based (>50k), triggered inside the model delegator. Claude Code's compaction is token-threshold-based on the real context window with a specific summarization prompt and a restore-on-edit mechanism; iroha has no restore path (archives are append-only and never read back).", "Sticky mechanism is a bespoke '[STICKY]' text marker in content blocks (compaction.go:26) capped at 20% of a hardcoded byte estimate. Claude Code has no public equivalent; it relies on prompt-caching breakpoints and file/snapshot references rather than in-band markers.", "System prompt is re-emitted in full every turn (DynamicLLMDelegator.GenerateContent runner.go:118-124 calls SystemPromptBuilder.BuildWithPrompt each call) and only uses a string-hash 'cached:' comment marker (prompt.go:87) as a pseudo-cache hint \u2014 it does NOT use Anthropic's actual prompt-caching cache_control breakpoints. Claude Code relies on provider-side cache_control with explicit breakpoints.", "Memory model (user/feedback/project/reference .md files with YAML frontmatter + AGENTS.md mirror) is iroha-specific, not Claude Code's CLAUDE.md-only convention. The Dream consolidator (dedup + LLM semantic merge + PID lock + 7 gates) has no Claude Code equivalent; Claude Code does not auto-merge memories.", "Token/cost accounting is a rough $2/M placeholder (session_store_helpers.go:22) independent of model; Claude Code computes per-model cost from real usage metadata.", "memory_dream.go:51 isProcessAlive uses syscall.Signal(0) \u2014 UNIX/macOS only; not portable to Windows (matches the darwin-only env but diverges from Claude Code's cross-platform support).", "prompt.go:307 sanitizeADKStatePlaceholders escapes {var} and {app:name}/{user:name} patterns to '{name /* literal */}' \u2014 an ADK-template-injection guard that only exists because ADK does Go-template substitution in instructions; a native loop would not need this and it is dead weight / a divergence from Claude Code's plain-text system prompt."], "externalDeps": ["google.golang.org/adk v1.2.1-0.20260519122726-f2aee5301649 (go.mod:14) \u2014 model.LLM/model.LLMRequest/model.LLMResponse (compaction, dream, session persistence), session.{Service,Session,Event,InMemoryService} (session_store + helpers), and transitively adk/agent/llmagent + adk/runner (runner.go) which owns the session and drives Execute.", "google.golang.org/genai v1.57.0 (go.mod:15) \u2014 genai.Content and genai.Part are the message model used throughout compaction.go/compaction_helpers.go/session_store.go/memory_dream.go (21 direct refs in the two compaction files alone). This is the deepest coupling: it IS the conversation data type.", "github.com/firebase/genkit/go v1.8.0 (go.mod:9) \u2014 NOT imported by any file in this area directly; enters via pkg/llm.NewAdapter/initGenkit (runner.go:508,511) which produces the model.LLM passed into summarizeRounds/ConsolidateSemantically. Decoupling model.LLM to a native LLMClient removes the transitive Genkit dependency from this area.", "No tokenizer library (tiktoken/BPE) is present anywhere \u2014 token counting is the bytes/4 heuristic. Any 1:1 fidelity effort must add a real tokenizer."], "filesAudited": ["/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/compaction.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/compaction_helpers.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/memory.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/memory_helpers.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/memory_frontmatter.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/memory_agents_sync.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/memory_dream.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/session_store.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/session_store_helpers.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/prompt.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tokenizer.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/migrate_legacy.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner.go (lines 40-160, 385-540 for compaction seam + agent/runner/session wiring)", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/go.mod (ADK/Genkit versions)"], "qualityNotes": "Code quality is generally solid and well-logged (structured LogInfo/LogWarn/LogError/LogAudit throughout). Memory subsystem (memory.go, memory_frontmatter.go, memory_agents_sync.go, memory_helpers.go, migrate_legacy.go) is framework-free, tested, and cleanly separated \u2014 the easiest part to preserve verbatim in a native rewrite. Compaction is functional but has rough edges: the sticky cap uses a magic 200000-byte constant rather than the real context window; the deep-copy is hand-rolled and will silently drop any Part field ADK adds later (only Text/InlineData/FunctionCall/FunctionResponse copied); summarizeRounds swallows LLM errors by `break`-ing and falling through to extraction without incrementing the circuit breaker (compaction_helpers.go:286), so transient LLM failures do not trip the breaker \u2014 only empty/zero output does. memory_dream.go ConsolidateSemantically deletes originals before validating LLM JSON fully (dream:350-353 deletes list, then saves items); if mm.Save fails partway, memories are lost \u2014 not transactional. session_store SaveSession reads GlobalPermissionManager and os.Getwd() at save time, coupling persistence to global state. tokenizer.go is misnamed and misplaced (shell tokenizer in the context-memory area) and should be relocated. sanitizeADKStatePlaceholders (prompt.go:307) is an ADK-specific wart that would vanish in a native loop. Tests exist for compaction (compaction_test.go, compaction_helpers_test.go, compaction_ext_test.go), memory (memory_test.go, memory_ext_test.go), and session_store (session_store_test.go)."}, "A5-mcp-subagent-team-skills": {"area": "A5-mcp-subagent-team-skills", "capabilities": [{"detail": "pkg/agent/mcp.go: MCPToolRouter singleton with LoadAndStartPlugins (reads .iroha/plugins.json, migrates from .go-claude, scans skill dirs for per-skill plugins.json, merges PluginManager servers+hooks), DiscoverTools (calls tools/list per client, wraps each as DynamicMCPTool named mcp____), ListServers, CloseAll. Supports stdio (MCPClient) + HTTP (HTTPTransport via NewMCPTransport). Real JSON-RPC 2.0 over child process stdin/stdout with initialize handshake + notifications/initialized. 10s per-call timeout. NOTE: LoadAndStartPlugins always uses NewMCPClient (stdio) directly \u2014 it does NOT route through NewMCPTransport, so URL-based HTTP servers in plugins.json are NOT actually started as HTTP; the transport factory exists but is not wired into plugin loading.", "name": "MCP server discovery + lifecycle", "status": "implemented"}, {"detail": "pkg/agent/mcp_client.go: hand-rolled JSON-RPC 2.0 client over exec.Cmd pipes, pending-request map keyed by int64 id, readLoop goroutine, SendNotification, Call with 10s timeout. Protocol version pinned to 2024-11-05 (older). No resource/prompt subscriptions, no sampling, no cancellation, no logging notifications handled.", "name": "MCP stdio JSON-RPC client", "status": "implemented"}, {"detail": "pkg/agent/mcp_transport_http.go: HTTPTransport implements Streamable HTTP \u2014 POST with Accept: text/event-stream, captures Mcp-Session-Id header, DELETE on Close, parseSSEResponse extracts first 'data:' line. Only reads the FIRST SSE event (no multi-event/progress streaming). StdioTransport wraps MCPClient. MCPTransport interface defined but, as noted above, not used by the router.", "name": "MCP HTTP streamable transport", "status": "partial"}, {"detail": "pkg/agent/mcp_oauth.go: OAuthConfig/Token structs, PKCE S256 verifier+challenge generation, manual-copy StartOAuthFlow (prints URL, reads code via fmt.Scanln), RefreshToken, StoreToken/LoadToken to ~/.iroha/tokens/.json (0600), IROHA_MCP_TOKEN env bypass. OOB redirect (urn:ietf:wg:oauth:2.0:oob). Token storage exists but is NOT plumbed into MCPClient/HTTPTransport \u2014 no code calls LoadToken to attach a Bearer header, and StartOAuthFlow is never invoked from the router. OAuth is a standalone utility, not integrated into the MCP connect path.", "name": "MCP OAuth2 + PKCE", "status": "partial"}, {"detail": "pkg/agent/subagent.go SubagentManager.RunSubagent: 6 typed agents (explore/planner/reviewer/researcher/executor/work). Executor+work get a git worktree (GlobalWorktreeManager) cleaned up via defer Closeout; read-only types run in parent CWD. Toolsets curated by GetToolsForType (pool.go) with allowedToolsByType allowlist. Default model overridden to a cheap/fast per-provider model unless spec.ModelName set. Synchronous: blocks iterating subRunner.Run events, writes JSONL log to .iroha/subagents/logs, then git status --porcelain to derive FilesCreated/FilesEdited. DIVERGES from Claude Code: subagent has its OWN in-memory session (not parent session), no stream/interleaving with parent context, no tool-result relay, model is forced cheap (haiku/flash/4o-mini) rather than honoring parent model.", "name": "Subagent synchronous execution", "status": "implemented"}, {"detail": "pool.go GetToolsForType + TypePromptPrefix: typePromptTemplates and allowedToolsByType maps. explore/planner/reviewer/researcher restricted to read-only tool names (file_read/list_directory/search_grep/find_files). executor/unknown get all tools. Curated by exact tool-name string match, not capability tags.", "name": "Subagent typed tool curation + prompt prefixes", "status": "implemented"}, {"detail": "team.go: TeamManager singleton, .team/config.json persistence, roster CRUD (RegisterTeammate/GetTeammate/ListTeammates), plus loadYAMLAgents which scans .iroha/agents/ and .claude/agents/ for YAML-frontmatter .yaml/.yml/.md agent definition files (parseAgentDefinitionFile). Matches Claude Code's .claude/agents convention.", "name": "Team manager + YAML agent discovery", "status": "implemented"}, {"detail": "team_message.go: AppendToInbox / ReadAndClearInbox / PeekInbox against .team/inbox/.jsonl, Broadcast to all teammates. team_process.go StartTeammateLoop polls inbox every 2s, calls ProcessMessage callback, replies to sender's inbox, updates status idle/working. This is a polling inbox model, NOT the automatic-delivery + idle-notification model of Claude Code teams (real CC delivers messages to the running agent turn and emits idle notifications).", "name": "Team inbox messaging", "status": "implemented"}, {"detail": "team_process.go: EnableProcessIsolation sets isolationMode + binaryPath + NewIPCBridge over unix sockets; StartTeammateProcess spawns child via Watchdog (3 crashes / 60s budget), Recover() restores checkpoint, handleIPCMessage routes message/task_complete/heartbeat/shutdown, heartbeatChecker flags stale after 45s, RunTeammateMode is the child-side entrypoint (--teammate/--socket flags). Substantial but only 'message'/'task_assign'/'task_complete' message types \u2014 no structured protocol-response/plan-approval/shutdown_request JSON message types that real Claude Code teams use.", "name": "Team process isolation + IPC + watchdog", "status": "partial"}, {"detail": "skills.go SkillManager: discovers ~/.iroha/skills/ + .iroha/skills/ (project overrides global by ID), skill.json manifest (id/name/description/triggers/tags/instructions_file/type). 3 types: model_invoked (keyword substring match), user_invoked (/skill slash), always (system prompt). LoadInstructions reads SKILL.md with path-escape guard (prefix check on absBase). MatchTriggers is naive case-insensitive substring, not Claude Code's model-driven progressive disclosure (real CC uses the model to decide skill loading and SKILL.md body is injected on demand). Skill body is loaded but injection into the running prompt loop is handled elsewhere (prompt.go), not verified here to follow CC's on-demand progressive disclosure.", "name": "Skill discovery + matching", "status": "partial"}, {"detail": "plugin.go PluginManager: discovers ~/.iroha/plugins/*/plugin.json + project, ValidateManifest (id regex, no __, semver), MergeMCPServers (namespaced pluginID__name), MergeHooks. MigratePluginsConfig for legacy flat config. Pure manifest layer; no plugin sandboxing, signature verification, dependency resolution, or marketplace.", "name": "Plugin manifest discovery", "status": "implemented"}, {"detail": "task.go: .tasks/.json persistence, SaveTask does bidirectional ReconcileEdges (auto-creates placeholders for missing refs, rebuilds Blocks/blockedBy from active edges) + DFS 3-color CheckCycles with rollback on cycle. ListTasks excludes deleted, sorted by ID. ResolveTasksDir prefers local .tasks with write-test, falls back to ~/.iroha/tasks (with .go-claude migration). Matches Claude Code TaskCreate/TaskUpdate semantics closely (subject/status/blockedBy/blocks/owner). Owner field is 'agent'|'user' but no per-agent ownership enforcement like CC's owner assignment.", "name": "Task DAG manager", "status": "implemented"}, {"detail": "todo_manager.go GlobalTodoManager: Update validates max 12 items, status enum, single in_progress; GetItems/NoteRoundWithoutUpdate/RoundsSinceUpdate/ResetRounds (round-staleness tracking for reminders); Render with ANSI colored checkbox + completed count. In-memory only (no persistence), unlike CC's per-task-list persistence. Maps to CC TaskCreate but lacks the metadata/owner/blockedBy richness of task.go.", "name": "TodoWrite session plan", "status": "implemented"}, {"detail": "cron.go GlobalCronScheduler: hand-rolled cron (cronMatches + computeJitter), 5-field validation, recurring vs one-shot, durable (.iroha/scheduled_tasks.json) vs session-only, file-lock CronLock so only one process fires, checkLoop ticks every 5s deduped by minute, 7-day auto-expiry, DetectMissedTasks (catch-up capped at 24h), DrainNotifications, jitter for :00/:30 crons. Jitter is applied by shifting the check time, not the fire time. DIVERGES from CC: prompts never auto-execute as a turn \u2014 they only queue as ScheduledNotification for the UI/runner to drain; CC scheduled tasks fire as enqueued prompts while REPL idle.", "name": "Cron scheduler", "status": "implemented"}, {"detail": "background.go GlobalBackgroundManager: Run/RunContext spawns sh -c in goroutine via WrapSandboxCommand, 300s timeout+kill, output to .runtime-tasks/.log (capped 50KB), preview, persist .json per task, loadPersistedTasks on startup, Check (single or all), ListTasks sorted desc, DrainNotifications, DetectStalled. NotifQueue is in-memory (lost on crash unless reloaded from persisted status). Maps to CC run_in_background but notification delivery to the active turn is poll-based, not the re-invocation CC uses.", "name": "Background task lanes", "status": "implemented"}, {"detail": "worktree.go GlobalWorktreeManager: git worktree add -b wt/ into .worktrees/, index.json registry + events.jsonl lifecycle log, Create/Closeout(keep|remove)/Enter/List, branch -D on remove, cascades task status to in_progress/completed when TaskID bound. EnterWorktree-style interactive session switching (CC's EnterWorktree/ExitWorktree tool) is NOT implemented \u2014 only Enter (timestamp update).", "name": "Git worktree manager", "status": "implemented"}, {"detail": "Real Claude Code exposes TeamCreate/TeamDelete/EnterWorktree/ExitWorktree/TaskGet/TaskList/TaskUpdate/CronCreate/CronList/CronDelete as first-class tools. Here they exist only as internal managers; only fragments are surfaced as tools (tools_team.go, tools_worktree.go, tools_schedule.go, tools_task.go exist but the manager APIs substantially exceed what is exposed).", "name": "Team tool surface (TeamCreate/TeamDelete/EnterWorktree)", "status": "missing"}], "couplingNotes": "This area splits cleanly into two coupling tiers:\\n\\n(A) FULLY DECOUPLED \u2014 no ADK/Genkit dependency: task.go, todo_manager.go, cron.go, background.go, worktree.go, skills.go, plugin.go, team.go, team_message.go, team_types.go, team_process.go (except it references Watchdog/IPCBridge which are also pure-Go), mcp_oauth.go, mcp_transport_http.go, and the entire stdio MCPClient in mcp_client.go. These are plain Go (os, exec, net/http, encoding/json, sync) and already mirror a native architecture. They can be lifted out with zero ADK work.\\n\\n(B) ADK-COUPLED via the tool/agent/runner/session surface \u2014 concentrated in exactly 3 files: mcp.go, subagent.go, pool.go. The load-bearing ADK/Genkit primitives are:\\n - mcp.go: imports google.golang.org/adk/tool, google.golang.org/adk/model, google.golang.org/genai. DynamicMCPTool implements the adkRunnableTool interface (Name/Description/IsLongRunning + Declaration()*genai.FunctionDeclaration + Run(tool.Context, any)(map[string]any,error) + ProcessRequest(tool.Context, *model.LLMRequest)). This is the SOLE coupling point for MCP tool exposure \u2014 the MCP transport/client layer itself is framework-free; only the 'wrap discovered MCP tool as a runnable ADK tool' adapter is ADK-specific.\\n - subagent.go + pool.go: heavy coupling. They call llm.NewAdapter (returns model.LLM \u2014 pkg/llm/adapter.go signature takes *genkit.Genkit), llmagent.New(llmagent.Config{Name/Instruction/Model/Tools}), session.InMemoryService(), runner.New(runner.Config{AppName/Agent/SessionService/AutoCreateSession}), then subRunner.Run(ctx, userID, sessionID, *genai.Content, agent.RunConfig{StreamingMode}). Tools are wrapped in blockingConfirmationTool (which embeds tool.Tool and re-implements the same adkRunnableTool interface + ProcessRequest to overwrite req.Tools map). The runnerHooks{} struct is passed to NewAdapter as AdapterHooks.\\n\\nNative replacement requirement: introduce a single small Tool interface (Name()/Description()/Declaration()->schema/Run(ctx,args)->(map,err)) to replace the adkRunnableTool interface used in mcp.go:228, runner_confirmation.go:21, pool.go, subagent.go \u2014 DynamicMCPTool becomes framework-agnostic. Then replace the subagent/team execution path (llmagent.New + runner.New + session.InMemoryService + Run over events) with a native agent loop (provider-agnostic message list + tool-call dispatch) \u2014 subagent.go:155-203 and pool.go:131-203 are the only two call sites that construct an ADK runner for a sub-agent. The Genkit dependency enters ONLY through llm.NewAdapter's *genkit.Genkit param (used solely for the Claude-via-Genkit and Gemini paths; the OpenAI/Anthropic-direct paths pass g==nil and already bypass Genkit), so decoupling llm.Adapter from model.LLM is the shared prerequisite across areas A3/A4 and this one.\\n\\nNet: ~85% of this area's lines are already framework-free. The decoupling work is narrowly scoped to (1) the DynamicMCPTool wrapper (mcp.go:228-283) and (2) the two sub-runner construction blocks in subagent.go and pool.go. No Genkit APIs are used directly inside this area's files except via the llm package.", "divergences": ["MCP HTTP transport + OAuth token storage exist as standalone utilities but are NOT wired into the plugin router: LoadAndStartPlugins (mcp.go:87) always constructs NewMCPClient (stdio), ignoring config.URL, and never calls LoadToken/StoreToken \u2014 so HTTP and OAuth-protected MCP servers effectively cannot connect. Real Claude Code supports streamable-HTTP MCP servers and OAuth from .mcp.json.", "MCP protocol version is pinned to 2024-11-05 (mcp_client.go:106, mcp_transport_http.go:81); real CC uses the 2025-06-18 revision with newer capabilities (elicitation, structured tool output, resource links).", "Subagents default to a CHEAP model (haiku/flash/4o-mini) per-provider (subagent.go:134-144) unless overridden; real Claude Code spawns subagents with the parent's model (or an explicitly chosen one), not a forced downgrade.", "Subagents run with an isolated in-memory session and DO NOT interleave with the parent's session/context \u2014 there is no parent->child context handoff, no automatic return of the full tool-call transcript, only the accumulated text Summary + git-derived file lists. CC subagents return a structured handoff and their tool calls are visible to the parent.", "Team inbox is a polled JSONL mailbox (2s ticker, team_process.go:36); CC's native teams deliver messages into the running agent turn and emit idle notifications \u2014 not a poll-and-clear loop. No structured protocol JSON message types (protocol-response, plan-approval, shutdown-request) are implemented.", "Skills use naive case-insensitive substring trigger matching (skills.go:160) and load the SKILL.md body eagerly via LoadInstructions; CC uses model-driven progressive disclosure where the model decides when to expand a skill body, and triggers are far richer than substring.", "Scheduled cron tasks never auto-fire as an agent turn \u2014 they only append to an in-memory notifQueue drained by the host (cron.go:336). CC scheduled tasks fire as enqueued prompts while the REPL is idle.", "Background tasks notify via an in-memory queue (background.go:220) rather than re-invoking the agent turn on completion as CC does.", "Worktree manager has no EnterWorktree/ExitWorktree interactive session-switching tool (only Enter = timestamp bump); CC has first-class worktree session entry/exit.", "Owner assignment on TaskRecord is a free string ('agent'/'user', task.go:111) with no enforcement of per-agent ownership or claim semantics that CC's TaskUpdate owner field provides.", "MCP tool result is parsed as map[string]any and returned directly (mcp.go:259); CC normalizes MCP tool results (content blocks, is_error, structured output) into its native tool-result format \u2014 here any non-object JSON result would error.", "stdio MCP stderr is silently discarded (mcp_client.go:92-95 'Discard/log') with no capture, making server debugging impossible."], "externalDeps": ["google.golang.org/adk/tool (tool.Tool, tool.Context) \u2014 load-bearing in mcp.go, subagent.go, pool.go, runner_confirmation.go as the tool interface", "google.golang.org/adk/model (model.LLMRequest) \u2014 used in DynamicMCPTool.ProcessRequest and blockingConfirmationTool.ProcessRequest to register function declarations into req.Config.Tools / req.Tools map", "google.golang.org/adk/agent + google.golang.org/adk/agent/llmagent \u2014 llmagent.New + agent.RunConfig{StreamingMode} construct every sub-agent runner (subagent.go:155, pool.go:136)", "google.golang.org/adk/runner \u2014 runner.New + Runner.Run event iterator is the execution loop for subagents and team teammates (subagent.go:166-188, pool.go:147-187)", "google.golang.org/adk/session \u2014 session.InMemoryService() used per-subagent (no persistence) (subagent.go:165, pool.go:146)", "google.golang.org/genai \u2014 genai.Content / genai.Part / genai.FunctionDeclaration / genai.Tool / genai.GenerateContentConfig are the message+schema vocabulary throughout (mcp.go, subagent.go, pool.go, runner_confirmation.go)", "github.com/firebase/genkit/go/genkit \u2014 *genkit.Genkit threaded through AgentPool.GenkitRegistry into llm.NewAdapter; only consumed inside the llm package (Claude-via-Genkit + Gemini paths), never used directly in this area's logic", "gopkg.in/yaml.v3 \u2014 YAML frontmatter parsing for .claude/agents/* and .iroha/agents/* agent definitions (team.go)", "github.com/google/uuid \u2014 task/background/cron IDs (task is int-id; uuid used in background.go:98, cron.go:118)", "Standard library only for the decoupled managers: net/http, os/exec, encoding/json, sync, crypto/rand, crypto/sha256 (OAuth PKCE), path/filepath, bufio (stdio + SSE parsing)"], "filesAudited": ["pkg/agent/mcp.go", "pkg/agent/mcp_client.go", "pkg/agent/mcp_oauth.go", "pkg/agent/mcp_transport_http.go", "pkg/agent/subagent.go", "pkg/agent/pool.go", "pkg/agent/team.go", "pkg/agent/team_message.go", "pkg/agent/team_process.go", "pkg/agent/team_types.go", "pkg/agent/skills.go", "pkg/agent/plugin.go", "pkg/agent/task.go", "pkg/agent/todo_manager.go", "pkg/agent/cron.go", "pkg/agent/background.go", "pkg/agent/worktree.go", "pkg/agent/runner_confirmation.go (adkRunnableTool interface + blockingConfirmationTool, followed)", "pkg/agent/tools.go (ToolRegistry/functiontool surface, followed)", "pkg/agent/runner.go:370-440 (root runner construction, followed for parity)", "pkg/llm/adapter.go:54 (NewAdapter signature, followed)"], "qualityNotes": "Code quality is generally solid: thread-safe (sync.RWMutex everywhere), durable persistence with migration from legacy .go-claude paths, and good separation (transport/client/router layers in MCP; managers are singletons). task.go's ReconcileEdges + CheckCycles with rollback is genuinely well-engineered. Weak spots: (1) substantial dead/separated code \u2014 NewMCPTransport/HTTPTransport/OAuth are implemented but not wired into the router, so URL/OAuth MCP servers silently fall back to stdio and fail; (2) no integration tests exercise real MCP servers, HTTP transport, or process-isolated teammates end-to-end against a live binary (test files exist but are mostly unit-level); (3) error handling swallows failures with `continue` in LoadAndStartPlugins (mcp.go:90,124,144) making misconfigurations invisible; (4) MCPClient has no reconnect, no request cancellation, hard-coded 10s timeout; (5) team_process.go's IPC + Watchdog path is complex and lightly tested. For a 1:1 refactor: the decoupled managers (task/todo/cron/background/worktree/skills/plugin/team-inbox) are essentially already native Go and need little change; effort concentrates on the 3 ADK-coupled files and on wiring the currently-orphaned HTTP/OAuth transport into the router."}, "A6-tui-llm-config": {"area": "A6-tui-llm-config", "capabilities": [{"detail": "app.go is a hand-rolled Bubble-Tea-style loop (NOT Bubble Tea itself). RunApp() (app.go:662) wires: raw key reader goroutine (raw_input.go ReadRawKeys), agent.Bridge.PromptChan + agent.ToolBridge.StatusChan bridge goroutines, a 100ms spinner ticker, all fanned into one `eventChan`. HandleEvent dispatches typed messages (StreamTextMsg/ToolStatusMsg/ConfirmationRequiredMsg/AgentErrorMsg/AgentDoneMsg/StartupPromptMsg) and Key. This is the load-bearing loop and it is fully implemented, no stubs.", "name": "Custom retained-mode App event loop (non-Bubble-Tea)", "status": "implemented"}, {"detail": "app.go NewApp wires 6 components (chat/input/confirm/status/slash/screens) via callback fields. activeComponents() (app.go:238) dispatches input in priority order. notifyStateChange() (app.go:357) propagates the 7 TuiState transitions (statePrompt..stateSessionSelect, model.go).", "name": "Component model (Component interface + BaseComponent)", "status": "implemented"}, {"detail": "InputComponent handles runes, cursor, backspace, alt-enter newline, tab, history nav (HistoryManager in input.go). KeyEsc closes slash menu. Multi-line wrap via WrapInput(). Submit path: OnSubmit -> App.executePrompt (app.go:419) -> runner.Execute.", "name": "Input editing, multiline (Alt-Enter), history", "status": "implemented"}, {"detail": "raw_input.go parseBytes() decodes Ctrl-C/D/Y, backspace, tab, enter, arrow keys, Shift+Tab, PgUp/PgDn (\\x1b[5~/6~), and SGR mouse wheel (parseSGRMouse). IROHA_ENABLE_MOUSE toggles \\x1b[?1000h. Ctrl+Y is declared in KeyType but the 'copy last response' handler is NOT wired anywhere in app.go handleKey (missing feature vs help text claim in view.go:808).", "name": "Raw terminal input parsing (ANSI/SGR mouse/UTF-8)", "status": "implemented"}, {"detail": "renderer.go Draw() does synchronized output (\\x1b[?2026h), cursor-up diff, clear-to-EOL, trailing-line cleanup, and hardware cursor positioning for IME alignment. Reset() clears state on exit.", "name": "Flicker-free differential renderer", "status": "implemented"}, {"detail": "view.go RenderMarkdownWithWidth caches glamour.TermRenderer by width (rendererCache). App.renderStreamedMarkdown (app.go:250) additionally memoizes the rendered string per (text,width) so it only re-parses when streamedText changes during streaming ticks. Compact style derived from DarkStyleConfig to avoid line-padding blocks.", "name": "Glamour markdown rendering with width-keyed cache + stream memoization", "status": "implemented"}, {"detail": "HistoryStore (history.go) supports scrollOffset, renderedCache keyed by entry index, Compact() (replaces older entries with a RoleSystem summary, keeps recent verbatim), Search(), PageUp/Down. RenderWithTail composes transient stream/tool/confirm tail into the timeline.", "name": "History viewport with caching, scroll, and local compaction", "status": "implemented"}, {"detail": "component_confirm.go implements Y/N/Always/Edit/? card with its own editBuffer (separate from InputComponent buffer, mediated by FocusModel in focus.go). Responds go to agent.Bridge.ResponseChan (app.go:377). Edit mode extracts command/content/path from activeToolArgs.", "name": "Human-in-the-loop confirmation card with inline edit mode", "status": "implemented"}, {"detail": "handleRawSlashCommand (app.go:793) dispatches ~22 commands including /permission, /mode, /rules, /hooks reload, /memory reload, /compact, /context (token estimate dashboard), /prompt, /sections, /sessions, /resume, /team, /worktree, /bg, /skill[s], /mcp reload, /stats. SlashMenuComponent does prefix filtering. /trace is a stub reply ('live timeline rendering is not wired', app.go:1094).", "name": "Slash command system (~22 commands) + autocomplete menu", "status": "implemented"}, {"detail": "component_screens.go renderPermissionScreen/renderSessionScreen. Sessions come from agent.GlobalSessionService.ListSavedSessions (app.go:521). loadHistoryFromSession (app.go:543) replays session events into a fresh HistoryStore by reconstructing user/agent turns.", "name": "Permission mode + session picker screens", "status": "implemented"}, {"detail": "component_status.go shows mode, token count (k-notation), cost ($), running tool activity+duration, thinking state. SetTokenUsage fed from finalizeTurn() (app.go:496) via runner.GetTokenUsage() + config.EstimateCost.", "name": "Status bar (mode/tokens/cost/spinner/status-tag)", "status": "implemented"}, {"detail": "StreamTextMsg handler (app.go:138) only scans the new chunk for statusTagRe (^[status:...]) with a 50-byte tail-window fallback for cross-chunk tags, avoiding O(n) regex on full accumulated text each tick.", "name": "Streaming accumulation + incremental status-tag scan", "status": "implemented"}, {"detail": "AnthropicAdapter (anthropic.go) and OpenAICompatibleAdapter (openai.go) both implement model.LLM.GenerateContent returning iter.Seq2[*model.LLMResponse,error], parse SSE streams, map genai.Content<->provider messages, track cumulative tokens, support nag-reminder injection + SetSystemPrompt. These are real, working, non-Genkit adapters.", "name": "Direct HTTP adapters (Anthropic + OpenAI-compatible)", "status": "implemented"}, {"detail": "genkit_adapter.go GenkitModelAdapter.GenerateContent maps ADK LLMRequest -> ai.Message list + ai.GenerateOption, calls genkit.GenerateStream / genkit.Generate, and re-yields as model.LLMResponse. Tool wrappers use a no-op executor (return nil,nil) because ADK runner handles execution. Required only for ProviderGemini (Claude now falls back to direct AnthropicAdapter when genkit is nil).", "name": "Genkit model adapter", "status": "implemented"}, {"detail": "retry.go: ConsumeRetry session budget (default 10, IROHA_MAX_RETRIES/CLAUDE_CODE_MAX_RETRIES), RetryDelay exponential backoff capped 60s with Retry-After header parsing, IsRetryableHTTPStatus (408/429/5xx), IsRetryableTemporaryError string classifier, RetryNotice() emits a user-visible model.LLMResponse chunk. Both adapters integrate budget + RetryNotice.", "name": "Retry with budget, backoff, Retry-After, classification", "status": "implemented"}, {"detail": "max_tokens (Anthropic 'max_tokens' / OpenAI 'length') yields a truncation warning chunk (anthropic.go:465, openai.go:491). helpers.go CollectNonStreaming provides a non-streaming convenience collector.", "name": "Output-truncation surfacing (s11 error recovery)", "status": "implemented"}, {"detail": "config.go LoadConfig reads ~/.iroha.json with auto-migration from legacy ~/.go-claude.json, auto-detects provider from model name prefix. ProviderDefaults table covers glm/openai/claude/deepseek/kimi/siliconflow. SaveConfig writes 0600. RunConfigWizard is a 5-step interactive setup. EstimateCost uses ModelPricingMap with 85/15 input/output split. No ADK/Genkit dependency here.", "name": "Config load/save + provider defaults + wizard + pricing", "status": "implemented"}, {"detail": "interfaces.go AgentRunner.Execute signature takes onEvent func(*session.Event). app.go OnEvent reads ev.LLMResponse.Content.Parts. loadHistoryFromSession uses session.GetRequest and iterates resp.Session.Events().All(). This is the ONLY direct ADK coupling in TUI and it is narrowly scoped to event/session shape.", "name": "AgentRunner/BridgeResponder interfaces (test seam)", "status": "partial"}, {"detail": "Declared in KeyType (raw_input.go:38) and advertised in RenderHelpDashboard (view.go:808 'Copy last AI response to system clipboard') but NO handler exists in app.go handleKey(). Dead/advertised-only.", "name": "Ctrl+Y copy-last-response", "status": "missing"}, {"detail": "Declared as a slash command with an honest stub reply ('live timeline rendering is not wired into the TUI yet', app.go:1094). No actual trace UI.", "name": "/trace live timeline", "status": "stub"}, {"detail": "No --model flag switching UI path; SwitchModel exists on the runner (runner.go:504) but is not reachable from any TUI slash command (only /permission changes mode). Model switching is startup-time only.", "name": "Model hot-swap command (/model)", "status": "missing"}], "couplingNotes": "DECISION: This area CAN be decoupled from ADK, and the coupling is far narrower than it looks. TUI is ~95% framework-free; LLM is the load-bearing ADK dependency; config is 100% framework-free.\\n\\nTUI coupling (4 touchpoints only, all in app.go + interfaces.go):\\n1. `import google.golang.org/adk/session` (app.go:17, interfaces.go:6). Used as the type of `OnEvent func(*session.Event)` and in `loadHistoryFromSession` via `session.GetRequest` + `resp.Session.Events().All()` (app.go:547-559) and `ev.LLMResponse.Content.Parts` (app.go:695-701, 588-602). Native replacement: define a local `type AgentEvent struct { Text string; ToolCall *ToolCallInfo; IsFinal bool }` and have the runner translate ADK events into it before calling OnEvent. Session replay becomes a local (sessionID -> []Turn) loader. ~3 hours of work, mechanical.\\n\\nLLM coupling (load-bearing, harder):\\n- The package signature is `model.LLM` (google.golang.org/adk/model), whose contract is `GenerateContent(ctx, *model.LLMRequest, bool) iter.Seq2[*model.LLMResponse, error]`. ALL three adapters (anthropic.go:153, openai.go:134, genkit_adapter.go:66) implement this exact signature. The data types are google.golang.org/genai: `*genai.Content`, `*genai.Part`, `*genai.FunctionCall`, `*genai.FunctionResponse`, `req.Config.Tools[].FunctionDeclarations`, `req.Config.SystemInstruction`. These genai types are the wire format the runner, tools.go, and compaction code all speak.\\n- Native replacement requires defining local equivalents (LLMRequest{SystemPrompt; Contents []Content{Role; Parts []Part{Text, ToolCall, ToolResult}}; Tools []ToolSchema; Temperature; MaxTokens} and LLMResponse{Parts; Partial; TurnComplete; Usage}) and a local `Model interface { GenerateContent(ctx,*Request,bool) iter.Seq2[*Response,error]; Name() string }`. The direct HTTP adapters (anthropic.go, openai.go) already do all the real work and would translate cleanly \u2014 they only use genai as an in-memory struct shape. ~1 day to define the types + rewrite 3 adapters' signatures + update runner.go/delegator/tools to the new types.\\n- Genkit (firebase/genkit) is the heavier dependency: genkit_adapter.go imports `github.com/firebase/genkit/go/ai` and `/genkit`, and runner.go imports `genkit.Init`, `plugins/anthropic`, `plugins/googlegenai`. genkit_adapter.go uses `ai.NewSystemMessage`, `ai.NewMessage`, `ai.RoleUser/Model/System/Tool`, `ai.NewTool`, `ai.GenerateOption`, `genkit.GenerateStream`, `genkit.Generate`. It is ONLY reached for ProviderGemini (and Claude-with-genkit, which is optional). Dropping Genkit entirely is viable IF Gemini support is dropped or reimplemented via the google generative-ai Go SDK directly; the direct AnthropicAdapter already covers Claude. Without Genkit, ProviderGemini returns an error (adapter.go:79).\\n\\nCONFIG coupling: zero. config.go uses only stdlib (encoding/json, os, path/filepath, bufio, strings). Framework-free already.\\n\\nBOTTOM LINE: ADK/Genkit are used as (a) an event/session envelope shape and (b) a streaming model interface contract \u2014 neither is doing essential algorithmic work that the direct HTTP adapters don't already do. A native rewrite = define local event/request/response/tool types, port the 2 direct adapters to those types, port session replay to a local struct, and decide Gemini's fate. Estimated effort for this area alone: ~2-3 days. No behavioral reimplementation needed; it's a type-migration.", "divergences": ["Not Bubble Tea: iroha implements its own retained-mode event loop + differential renderer instead of Bubble Tea's Model/Update/View. This diverges from how most Go Claude Code replicas are built and re-implements viewport/scroll/cursor logic that Bubble Tea gives for free.", "Ctrl+Y 'copy last response' is advertised in /help (view.go:808) and parsed in raw_input.go but has no handler \u2014 real Claude Code and any honest UI would wire or remove it.", "/trace is a stub reply admitting it is not wired, while real Claude Code surfaces a live tool-call timeline.", "Local-only history compaction (/compact) summarises by role-counting + 240-char excerpts (history.go:161) rather than calling the LLM to summarise \u2014 diverges from Claude Code's model-driven compaction.", "Context estimate (/context, app.go:1142) is a static heuristic (chars/4, hooks*80 tokens, servers*120 tokens) not a real tokenizer; Claude Code reports real token counts.", "The LLM adapters hardcode MaxTokens:8192 for Anthropic (anthropic.go:247) and ignore req.Config.MaxOutputTokens for the direct Anthropic path \u2014 real Claude Code uses the configured max_tokens.", "Provider/model switching is startup-only; no live /model slash command, unlike Claude Code's /model.", "Session replay (loadHistoryFromSession) reconstructs turns by walking ADK session events and concatenating text parts \u2014 tool calls/results in history are not faithfully reconstructed into the timeline, so resumed sessions lose tool-card fidelity.", "Retry budget is global per-process (retryBudget package var) and not reset per session in the TUI flow, so a long-running session silently exhausts its retry budget across many turns.", "Status-tag injection (`[status:...]` regex, model.go:48) is an iroha-specific convention to surface LLM self-reported status into the status bar \u2014 not a Claude Code concept."], "externalDeps": ["github.com/charmbracelet/lipgloss \u2014 TUI styling (styles.go, view.go, all components)", "github.com/charmbracelet/glamour + glamour/ansi + glamour/styles \u2014 markdown rendering (view.go)", "github.com/charmbracelet/x/ansi \u2014 ANSI strip/width/cut helpers (view.go, wrap.go)", "github.com/muesli/termenv \u2014 color profile detection (renderer.go)", "golang.org/x/term \u2014 raw mode terminal control (raw_input.go, app.go UpdateWidth)", "github.com/google/uuid \u2014 session ID generation (app.go handleNewSession)", "google.golang.org/adk/session \u2014 session.Event, session.GetRequest, Session.Events().All() \u2014 ONLY in app.go + interfaces.go, used for event delivery and history replay", "google.golang.org/adk/model \u2014 model.LLM interface, model.LLMRequest, model.LLMResponse \u2014 the streaming contract for ALL 3 llm adapters + helpers.go + retry.go RetryNotice return type", "google.golang.org/genai \u2014 genai.Content, genai.Part, genai.FunctionCall, genai.FunctionResponse, GenerationConfig, FunctionDeclarations \u2014 the canonical message/tool wire types used across anthropic.go, openai.go, genkit_adapter.go, retry.go", "github.com/firebase/genkit (+ /ai, /core/api, /plugins/anthropic, /plugins/googlegenai) \u2014 Genkit registry + plugins; only load-bearing for ProviderGemini and optional for Claude. Imported by genkit_adapter.go and runner.go (initGenkit)."], "filesAudited": ["/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/app.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/model.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/view.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component_chat.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component_input.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component_confirm.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component_status.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component_screens.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component_slash_menu.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/input.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/history.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/renderer.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/raw_input.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/styles.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/interfaces.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/focus.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/wrap.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/adapter.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/anthropic.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/openai.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/genkit_adapter.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/retry.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/helpers.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/debuglog.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/config/config.go", "/Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner.go (cross-package, for coupling verification)"], "qualityNotes": "The code is clean, idiomatic Go with thoughtful performance work: renderer caching by width (view.go rendererCache), per-text stream render memoization (app.go renderStreamedMarkdown), incremental status-tag scanning that only regexes the new chunk (app.go:141-153), and history render caching keyed by entry index (history.go). The component model is genuinely decoupled via callback fields (no App back-references). Tests are extensive (~25 _test.go files across tui/llm/config including table-driven tests and a 54k coverage_boost_test.go). Real risks: (1) the direct Anthropic adapter hardcodes MaxTokens=8192 and ignores req.Config.MaxOutputTokens (anthropic.go:247), so the truncation handler at line 465 will fire at 8192 regardless of config; (2) genkit_adapter tool wrappers are no-op executors (genkit_adapter.go:201 return nil,nil) relying on ADK runner to execute \u2014 fine under ADK but a trap if decoupling leaves dangling no-op tools; (3) retryBudget is a package-level global with no per-session reset hooked into the TUI new-session flow; (4) two parallel render paths exist \u2014 ChatComponent.Render (component_chat.go:146) appears legacy/unused since App.Render only calls RenderTail, creating dead code."}}, "verify": {"memory-claudemd": [{"claim": "Managed-policy CLAUDE.md precedence: managed (highest) \u2192 CLI args \u2192 local \u2192 project \u2192 user (lowest); the managed CLAUDE.md (file or the managed-only `claudeMd` settings key) cannot be excluded by claudeMdExcludes, and the Windows legacy path C:\\ProgramData\\ClaudeCode\\managed-settings.json was removed in v2.1.75 (now C:\\Program Files\\ClaudeCode\\).", "verdict": "confirmed", "evidence": "All four sub-claims are confirmed verbatim by primary sources.\n\n(1) Precedence ordering \u2014 docs.claude.com/docs/en/settings, section \"How scopes interact\": \"1. Managed (highest) - can't be overridden by anything / 2. Command line arguments - temporary session overrides / 3. Local - overrides project and user settings / 4. Project - overrides user settings / 5. User (lowest) - applies when nothing e", "sourceUrl": "https://code.claude.com/docs/en/settings", "correctedClaim": ""}], "streaming-protocol": [{"claim": "The headless final event is type==\"result\" with subtype \"result\" (or \"success\"/\"error\" variants) \u2014 NOT \"message_stop\". message_stop is the Messages-API SSE terminal event inside a stream_event, distinct from the ResultMessage that ends stream-json. Known bug #1920: missing result event hangs consumers.", "verdict": "confirmed", "evidence": "Three authoritative sources confirm the core claim. (1) Headless docs (https://code.claude.com/docs/en/headless) document `--output-format stream-json` and the headless/SDK spec (quoted in issue #1920): \"Each conversation begins with an initial `init` system message, followed by a list of user and assistant messages, followed by a final `result` system message with stats.\" The terminal line is `{\"", "sourceUrl": "https://code.claude.com/docs/en/agent-sdk/streaming-output", "correctedClaim": "The headless (stream-json / Agent SDK) conversation is terminated by a top-level event of type==\"result\" with subtype \"success\" (or an error variant such as \"error\") \u2014 NOT \"message_stop\". `message_stop` is a Messages-API SSE event that marks the end of a single message; in stream-json it arrives inside a StreamEvent (top-level type: \"stream_event\") and precedes the AssistantMessage and ultimately the final ResultMessage, which is what actually ends the stream. Known bug anthropics/claude-code#1920: Claude Code intermittently fails to emit the final {\"type\":\"result\",...} event in stream-json mode, which hangs SDK consumers indefinitely."}], "system-prompt-assembly": [{"claim": "CLAUDE.md IS NOT IN THE SYSTEM PROMPT: official docs state CLAUDE.md/CLAUDE.local.md content is injected into the conversation as a USER message (project context), not into the system prompt; it therefore does NOT affect system-prompt cache entries. The exception is excludeDynamicSections (TS) / exclude_dynamic_sections (Python), added claude-agent-sdk v0.2.98 / v0.1.58, which moves the env-info block from the system prompt into the first user message.", "verdict": "confirmed", "evidence": "The official Claude Code Agent SDK docs (code.claude.com/docs/en/agent-sdk/modifying-system-prompts) state verbatim: \"CLAUDE.md takes a different path: the SDK reads it and injects its content into the conversation as project context, not into the system prompt.\" The docs reinforce this in two more places: \"CLAUDE.md files give Claude persistent project context and instructions. The SDK injects th", "sourceUrl": "https://code.claude.com/docs/en/agent-sdk/modifying-system-prompts", "correctedClaim": ""}], "agent-loop": [{"claim": "Token-budget auto-continue: COMPLETION_THRESHOLD=0.9 (stop at >=90% used) and DIMINISHING_THRESHOLD=500 tokens \u2014 early stop requires >=3 continuations AND both current+previous deltas <500. Subagents ALWAYS stop (budget is top-level only). The nudge is an isMeta user message. Source: claude-code-from-source.com ch05 + inematds/claudecode-manual 04-query-engine.md.", "verdict": "confirmed", "evidence": "Confirmed against three independent primary sources that all trace back to the same upstream file (openclaudecode/src/query/tokenBudget.ts).\n\n(1) openonion/claude-code TS rewrite (https://github.com/openonion/claude-code/blob/main/src/query/tokenBudget.ts): `const COMPLETION_THRESHOLD = 0.9` and `const DIMINISHING_THRESHOLD = 500`. The continue branch: `if (!isDiminishing && turnTokens < budget * ", "sourceUrl": "https://github.com/openonion/claude-code/blob/main/src/query/tokenBudget.ts", "correctedClaim": ""}], "context-compaction": [{"claim": "API microcompact uses clear_tool_uses_20250919 with DEFAULT_MAX_INPUT_TOKENS=180,000 trigger and DEFAULT_TARGET_INPUT_TOKENS=40,000 (clear_at_least = 140,000); clear_thinking_20251015 with keep:'all' is emitted whenever hasThinking && !isRedactThinkingActive.", "verdict": "confirmed", "evidence": "The deobfuscated Claude Code source `services/compact/apiMicrocompact.ts` (mirrored at github.com/leaf-kit/claude-analysis and claude-code-os.vercel.app) confirms every figure. Constants: `const DEFAULT_MAX_INPUT_TOKENS = 180_000 // Typical warning threshold` and `const DEFAULT_TARGET_INPUT_TOKENS = 40_000 // Keep last 40k tokens like client-side`. The clear_tool_uses_20250919 strategy (emitted wh", "sourceUrl": "https://github.com/leaf-kit/claude-analysis/blob/main/src/services/compact/apiMicrocompact.ts", "correctedClaim": "Claim confirmed. One caveat the claim omits (without contradicting it): the clear_tool_uses_20250919 strategy is emitted only when process.env.USER_TYPE === 'ant' AND env flags USE_API_CLEAR_TOOL_RESULTS or USE_API_CLEAR_TOOL_USES are truthy; the clear_thinking_20251015 strategy is emitted for all users whenever hasThinking && !isRedactThinkingActive (switching to keep:{type:'thinking_turns',value:1} when clearAllThinking is set)."}], "tool-exec-engine": [{"claim": "Permission rule evaluation order is deny -> ask -> allow (first match wins, specificity does not change order); rules format 'Tool' or 'Tool(specifier)' with Bash wildcards where a space before * enforces a word boundary; oversized tool results persist to ~/.claude/tool-results/{hash}.txt and MCP default persist threshold is 25000 chars (hard ceiling 500000 via _meta anthropic/maxResultSizeChars)", "verdict": "refuted", "evidence": "Most sub-claims are confirmed verbatim by https://code.claude.com/docs/en/permissions: \"Rules are evaluated in order: deny, then ask, then allow. The first match in that order determines the outcome, and rule specificity does not change the order. A matching ask rule prompts even when a more specific allow rule also matches the same call.\" And: \"Permission rules follow the format `Tool` or `Tool(s", "sourceUrl": "https://code.claude.com/docs/en/permissions", "correctedClaim": "Permission rule evaluation order is deny -> ask -> allow (first match wins, rule specificity does not change the order); rules use the format 'Tool' or 'Tool(specifier)'; Bash specifiers support glob wildcards where a space before a trailing * (e.g. Bash(ls *)) enforces a word boundary, while Bash(ls*) does not; the _meta[\"anthropic/maxResultSizeChars\"] override has a hard ceiling of 500,000 characters. HOWEVER, the documented default MCP output cap is 25,000 TOKENS (via MAX_MCP_OUTPUT_TOKENS), not 25,000 chars \u2014 the docs do not publish a default char-based persist-to-disk threshold. Oversized results ARE persisted to disk and replaced with a file reference, but the official docs do not document the exact path ~/.claude/tool-results/{hash}.txt; that path/hash-scheme is implementation detail not stated in authoritative docs."}], "session-transcript": [{"claim": "Every transcript line carries a parentUuid (not just uuid), forming a DAG/linked-list; compact_boundary records set parentUuid:null and carry logicalParentUuid referencing the now-erased pre-compaction last message, immediately followed by a user message with isCompactSummary:true whose content starts with \"This session is being continued from a previous conversation that ran out of context.\"", "verdict": "confirmed", "evidence": "Primary source (blog.fsck.com technical guide, 2026-02-22) confirms every sub-assertion verbatim. (1) Linked-list: \"The `parentUuid` field chains records into a linked list \u2014 each record points to the one before it.\" (2) compact_boundary record: when context approaches ~167K tokens, Claude Code writes a record with `\"subtype\": \"compact_boundary\"`, `\"logicalParentUUID\": \"last-msg-before-compaction-", "sourceUrl": "https://blog.fsck.com/agent-blog/2026/02/22/claude-code-session-continuation/", "correctedClaim": ""}], "mcp": [{"claim": "MCP_TOOL_TIMEOUT default is ~28 hours; MAX_MCP_OUTPUT_TOKENS default is 25000 with a 10000-token warning threshold; per-server 'timeout' values below 1000 ms are ignored (fall through to MCP_TOOL_TIMEOUT) since v2.1.162 (before that they were floored to 1 second)", "verdict": "uncertain", "evidence": "All three behavioral facts are confirmed by the PRIMARY source (official Claude Code env-vars doc, https://code.claude.com/docs/en/env-vars), which states verbatim:\n\n(1) MCP_TOOL_TIMEOUT: \"Timeout in milliseconds for MCP tool execution (default: 100000000, about 28 hours). A per-server `timeout` field in `.mcp.json` overrides this for that server. For the env variable, values below 1000 are floore", "sourceUrl": "https://code.claude.com/docs/en/env-vars", "correctedClaim": "CONFIRMED: MCP_TOOL_TIMEOUT default is 100000000 ms (~28 hours); MAX_MCP_OUTPUT_TOKENS default is 25000 with a warning threshold at 10000 tokens; for the per-server `timeout` field in .mcp.json, values below 1000 ms are ignored (fall back to MCP_TOOL_TIMEOUT), while for the MCP_TOOL_TIMEOUT env var itself, values below 1000 ms are floored to 1 second. The official docs (code.claude.com/docs/en/env-vars) and changelog confirm both the behavioral change and that sub-1000 ms per-server values were previously floored to a 1-second watchdog. UNVERIFIED: the specific version \"v2.1.162\" \u2014 the official changelog does not let that version be cleanly pinned to this entry; treat the version number as approximate."}], "skills": [{"claim": "Plugin skills are namespaced 'plugin-name:skill-name' and cannot conflict with enterprise/personal/project levels; the plugin root SKILL.md is the ONLY case where the frontmatter 'name' field sets the command name (otherwise directory name / filename governs).", "verdict": "confirmed", "evidence": "The official Claude Code Skills docs (https://code.claude.com/docs/en/skills) state verbatim: \"Plugin skills use a plugin-name:skill-name namespace, so they cannot conflict with other levels.\"\n\nOn command-name derivation, the docs say: \"The frontmatter name field sets the display label shown in skill listings and, except for a plugin-root SKILL.md, does not change what you type after /.\" The accom", "sourceUrl": "https://code.claude.com/docs/en/skills", "correctedClaim": ""}], "permissions": [{"claim": "Rule syntax gotcha: Bash(ls *) requires the space and enforces a word-boundary (matches 'ls -la' not 'lsof'); Bash(ls*) without space matches both; trailing :* (Bash(ls:*)) is equivalent to trailing ' *' but is ONLY recognized at end of pattern; Read/Edit pattern anchors differ \u2014 //path=filesystem root, ~/path=home, /path=project root (NOT absolute!), path/./path=relative to cwd.", "verdict": "confirmed", "evidence": "Official Claude Code docs (code.claude.com/docs/en/permissions, retrieved 2026-06-14, v2.1.x) confirm every assertion verbatim:\n\n(1) Bash word boundary: \"The space before * matters: Bash(ls *) matches ls -la but not lsof, while Bash(ls*) matches both.\" And: \"When * appears at the end with a space before it (like Bash(ls *)), it enforces a word boundary, requiring the prefix to be followed by a spa", "sourceUrl": "https://code.claude.com/docs/en/permissions", "correctedClaim": ""}], "hooks": [{"claim": "PreToolUse uses hookSpecificOutput.permissionDecision (allow/deny/ask/defer) + permissionDecisionReason + updatedInput (NOT top-level decision/reason which is DEPRECATED for this event; legacy approve/block map to allow/deny). Other events (PostToolUse, Stop, UserPromptSubmit, PreCompact, ConfigChange) use TOP-LEVEL decision:'block' + reason. PermissionRequest uses hookSpecificOutput.decision.behavior (allow/deny). PreToolUse hooks fire BEFORE permission-mode checks and can deny even in bypassPermissions mode.", "verdict": "confirmed", "evidence": "The official Hooks reference (https://code.claude.com/docs/en/hooks) confirms every component:\n\n(1) PreToolUse structure & deprecated top-level fields (line 1455, 1485): \"Unlike other hooks that use a top-level `decision` field, PreToolUse returns its decision inside a `hookSpecificOutput` object... four outcomes (allow, deny, ask, or defer) plus the ability to modify tool input before execution.\"", "sourceUrl": "https://code.claude.com/docs/en/hooks", "correctedClaim": "(Optional precision, not a correction: the top-level decision:'block' events are exactly UserPromptSubmit, UserPromptExpansion, PostToolUse, PostToolUseFailure, PostToolBatch, Stop, SubagentStop, ConfigChange, and PreCompact \u2014 i.e., the claim's list (PostToolUse, Stop, UserPromptSubmit, PreCompact, ConfigChange) is correct but not exhaustive. Updatedinput for PreToolUse sits directly under hookSpecificOutput; for PermissionRequest it is inside the decision object.)"}], "slash-commands-plan": [{"claim": "The 5 ExitPlanMode approval options presented to the user are exactly: 'Approve and start in auto mode', 'Approve and accept edits', 'Approve and review each edit manually', 'Keep planning with feedback', 'Refine with Ultraplan'; each approve option switches the permission mode accordingly.", "verdict": "confirmed", "evidence": "The official Claude Code docs page \"Choose a permission mode\" (https://code.claude.com/docs/en/permission-modes) renders the ExitPlanMode prompt verbatim as an unordered list with these exact children, in order: \"Approve and start in auto mode\", \"Approve and accept edits\", \"Approve and review each edit manually\", \"Keep planning with feedback\", and \"Refine with [Ultraplan] for browser-based review\"", "sourceUrl": "https://code.claude.com/docs/en/permission-modes", "correctedClaim": "When Claude exits plan mode, the approval prompt presents exactly these 5 options, in this order: 'Approve and start in auto mode', 'Approve and accept edits', 'Approve and review each edit manually', 'Keep planning with feedback', and 'Refine with Ultraplan for browser-based review' (the full label; 'Ultraplan' links to /en/ultraplan). 'Keep planning with feedback' and the 'Refine...' option are not approvals (they keep you in plan mode). The three approve options switch the session to the permission mode each describes (auto, acceptEdits, default), as the docs state: 'Approving a plan exits plan mode and switches the session to the permission mode each approve option describes.'"}], "subagents-task": [{"claim": "The Agent tool prompt-only return contract: parent receives ONLY the subagent's final message verbatim as the tool_result (no intermediate tool calls/reasoning); built-in Explore and Plan are one-shot and return NO agentId so they cannot be resumed via SendMessage.", "verdict": "confirmed", "evidence": "Both halves are directly confirmed by official Claude Code docs.\n\nPART 1 (verbatim final-message return, no intermediate tool calls): The SDK docs (code.claude.com/docs/en/agent-sdk/subagents) state verbatim: \"The parent receives the subagent's final message verbatim as the Agent tool result, but may summarize it in its own response.\" The parallel docs page (code.claude.com/docs/en/sub-agents) and", "sourceUrl": "https://code.claude.com/docs/en/agent-sdk/subagents", "correctedClaim": ""}]}} \ No newline at end of file diff --git a/docs/claude-code-architecture/audit/A1-agent-loop-runner.md b/docs/claude-code-architecture/audit/A1-agent-loop-runner.md new file mode 100644 index 0000000..387a414 --- /dev/null +++ b/docs/claude-code-architecture/audit/A1-agent-loop-runner.md @@ -0,0 +1,78 @@ +# Audit: A1-agent-loop-runner + +## Files audited + +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_bridge.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_exec.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_edit.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_confirmation.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_confirmation_hooks.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/autonomous.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/protocol.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/cmd/agent-cli/main.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/adapter.go (interface contract verification) +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/go.mod (ADK/Genkit/genai versions) +- ADK source (module cache) runner/runner.go, session/session.go, agent/llmagent/llmagent.go, internal/llminternal/base_flow.go, tool/tool.go — to verify the real loop owner and event/tool shapes + +## Capabilities +- **[implemented] Runner construction & dependency wiring (NewCustomRunner / CustomRunner struct)** — NewCustomRunner() wires up Genkit registry, an llm.Adapter (model.LLM), GetSWETools(), a DynamicLLMDelegator wrapping the adapter, an llmagent.New agent, a persistent session service, and finally runner.New(...) -> adkRunner. The struct stores adkRunner, llmModel, delegator, provider/model/api fields, GenkitRegistry, and a RunnerDeps bag of all global managers. This is a constructor, not a loop. +- **[missing] Agent loop driver (model-call -> tool-call -> model-call iteration)** — THE CRITICAL GAP. runner_exec.go: Execute() does NOT implement a model->tool-call->model iteration. It (1) resets circuit breaker, (2) drains bg/cron notifs and prepends them, (3) runs HookUserPrompt (can block/inject), (4) builds a *genai.Content user msg, then calls cr.adkRunner.Run(ctx, userID, sessionID, msg, RunConfig{StreamingMode:SSE}) and just ranges over the returned iter.Seq2[*session.Event,error], forwarding each ev to onEvent. The ACTUAL loop (for { runOneStep }) is ADK's internal llminternal.Flow.Run (adk base_flow.go:101). Termination, function-call dispatch, max-iterations, before/after model+tool callbacks all live in ADK, opaque to iroha. +- **[implemented] Per-run event lifecycle & instrumentation (runID, run.accepted/started/cancelled/failed/completed)** — Execute() emits run.accepted/started/cancelled/failed/completed via Logger.LogRunEvent with a uuid runID, atomic sequence, and a terminal-once guard. ctx.Done() triggers run.cancel_requested + Bridge.Cancel(). Panic in the goroutine is recovered, rolls back pending edits, emits run.failed. +- **[implemented] Pre-LLM prompt enrichment (bg/cron notifications, hook messages)** — run_exec.go appends and XML blocks in front of the user prompt each turn, draining BackgroundManager.DrainNotifications() and CronScheduler.DrainNotifications(). +- **[implemented] Post-run Git auto-commit (aider-style)** — After the event stream completes: fires HookAgentResponse, computes editedPaths (filtered against initially-dirty git paths), commits the edit snapshots, and if files were edited generates a semantic commit message via a SECOND direct cr.llmModel.GenerateContent call then GitCommitPaths with '[iroha] ' prefix. Finally runs HookSessionEnd. +- **[implemented] Tool wrapping / dispatch interception (blockingConfirmationTool)** — blockingConfirmationTool embeds tool.Tool and implements ProcessRequest (rewrites req.Tools[name] to itself so ADK dispatches through it), Run (permission check -> auto-review -> human y/n/always/explain/edit/bypass via Bridge channels), and Declaration. This is the permission+confirmation layer. +- **[implemented] Permission gating + interactive confirmation (y/n/always/explain/edit/bypass)** — GlobalPermissionManager.Check returns allow/deny/ask. allow->runWithHooks silently; deny->error with safety-fuse warning after 3 consecutive denials; ask-> ReviewCommand/ReviewFileOperation, auto-approve only in ModeAuto, else block on Bridge.PromptChan<-promptMsg and <-Bridge.ResponseChan. Supports 'explain' (calls globalLLMModel for a 1-2 sentence rationale), 'edit:' (rewrites command/content/path arg then auto-approves), 'always' (adds session allow rule), 'bypass' (returns synthetic success). +- **[implemented] Hook pipeline integration around every tool call** — runWithHooks: Stage A PreToolUse (block / rewrite args via UpdatedInput json round-trip / inject messages), Stage B runnable.Run + ToolCircuitBreaker.Track (3 consecutive identical-arg failures -> hard block), Stage C PostToolUse (inject messages, AdditionalContext). After file_edit/write/batch runs `go build ./pkg/agent/...` and injects compile errors as additional_context. Cancels respect Bridge.CancelChanRead. +- **[implemented] Dynamic model delegator (prompt rebuild, auto-compact, context-length recovery, retry)** — DynamicLLMDelegator wraps model.LLM, rebuilds system prompt each turn via SystemPromptUpdater, runs CompactContents when len(Contents)>12 or estimate>50k tokens, and on first-error context-length-exceeded force-compacts+retries once. For DirectHTTPAdapter models, adds retryable-temporary-error retry with budget, delay, and user-visible RetryNotice. +- **[implemented] Runtime model switching (SwitchModel)** — SwitchModel swaps the delegator's adapter and updates GlobalAgentPool fields + AutoReviewConfig at runtime without rebuilding the runner. Thread-safe via RWMutex on both delegator and pool. +- **[implemented] Foreground<->background bridges (ConfirmationBridge, ToolStatusBridge)** — ConfirmationBridge (singleton Bridge) with PromptChan/ResponseChan/CancelChan + Reset/Cancel; ToolStatusBridge (singleton ToolBridge) with a 100-buffered StatusChan and a goroutine drain that preserves order. ToolStatus carries Name/Args/Running/Success/Error/Duration/StreamLines. +- **[implemented] Atomic edit snapshot/rollback (pendingEditSnapshots)** — pendingEditSnapshots map[path]->originalContent; rollbackPendingEdits restores (removes if empty), commitPendingEdits clears after a successful turn, pendingEditPaths lists. findGoModuleRoot walks up to go.mod. Used by Execute on panic/cancel for rollback and on success for commit. +- **[partial] Autonomous task polling (AutonomousManager)** — AutonomousManager with StateWork/StateIdle, AutoClaimTasks (pending+unblocked+keyword match -> sets in_progress+owner), StartAutoPolling/StopAutoPolling ticker loop that claims while IDLE. Only relevant for teammate/multi-agent mode; NOT part of the single-user agent loop. GlobalMessageCount and GetIdentityTagBlock also live here. +- **[implemented] Inter-agent protocol handshake (ProtocolManager)** — ProtocolManager persists ProtocolRequest (shutdown/plan_approval) JSON files under .team/requests/, with CreateRequest/GetRequest/RespondToRequest. This is teammate-to-teammate durable handshake storage, decoupled from the runner loop and from ADK entirely. +- **[implemented] CLI entrypoint (cmd/agent-cli/main.go)** — Flags: provider/model/apikey/baseurl/api-format/teammate+socket/config-wizard/resume/last/session/fork/yes/plan/default/permission-mode. Resolves priority override hierarchy (flag > config > default > env), runs config wizard if key missing, constructs NewCustomRunner, resolves session id (new/resume/last/fork), parses initial PermissionMode, then hands off to tui.RunApp(runner, sessionID, startInSessionPicker, initialMode, startupPrompt). Teammate mode short-circuits to agent.RunTeammateMode over a unix socket. + +## External deps +- google.golang.org/adk v1.2.1-0.20260519122726-f2aee5301649 — runner.Runner (loop entry), agent/llmagent (rootAgent + Flow loop owner), model (LLM/LLMRequest/LLMResponse contract), session (Event/InMemoryService/Session), tool (Tool/Context), agent (RunConfig/StreamingMode). internal/llminternal.Flow.Run is the opaque loop driver. +- google.golang.org/genai v1.57.0 — Content/Part/FunctionCall/FunctionResponse/FunctionDeclaration/Schema wire types used across runner, confirmation, and compaction. +- github.com/firebase/genkit/go v1.8.0 — genkit.Genkit registry + api.Plugin; googlegenai.GoogleAI and anthropic.Anthropic plugins used in initGenkit for Gemini/Claude. Storable but only load-bearing for the Genkit adapter path; direct HTTP adapters (openai.go/anthropic.go) bypass it. +- github.com/google/uuid — runID + session ID generation. + +## Coupling notes + +This area is DEEPLY coupled to Google ADK and cannot be decoupled incrementally — the agent loop itself is outsourced to ADK, so a native (Claude-Code-style) refactor means replacing the loop driver, not just swapping types. + +LOAD-BEARING ADK types in this area: +- runner.Runner (google.golang.org/adk/runner) — adkRunner field on CustomRunner (runner.go:337). Its Run(ctx,userID,sessionID,*genai.Content,agent.RunConfig,...RunOption) iter.Seq2[*session.Event,error] is the entire execution entry point (runner_exec.go:139). Replacing this means writing the native loop ourselves. +- llmagent.New / llmagent.Config (google.golang.org/adk/agent/llmagent) — the rootAgent (runner.go:404). The actual model<->tool iteration lives in ADK's internal llminternal.Flow.Run (adk internal/base_flow.go:101, the `for { runOneStep }` loop). iroha has NO equivalent; ADK owns: termination detection (IsFinalResponse / no FunctionCall / no Partial), function-call dispatch, before/after model+tool callbacks, max-iterations. A native replacement must reimplement this Flow. +- model.LLM / model.LLMRequest / model.LLMResponse (google.golang.org/adk/model) — the contract the llm.Adapter implements and the type DynamicLLMDelegator wraps (runner.go:62,109). GenerateContent returns iter.Seq2[*model.LLMResponse,error]. This is the model-call surface a native loop needs to drive. +- session.Event / session.InMemoryService / session.Session (google.golang.org/adk/session) — events streamed to the TUI (runner_exec.go:144), and GlobalSessionService wraps session.InMemoryService (runner.go:416-417). session.Event embeds model.LLMResponse + Actions + LongRunningToolIDs and has IsFinalResponse(). A native design would define its own streaming event type. +- tool.Tool / tool.Context (google.golang.org/adk/tool) — blockingConfirmationTool embeds tool.Tool (runner_confirmation.go:28), implements ProcessRequest(ctx tool.Context, *model.LLMRequest) and Run(ctx tool.Context, args any)(map[string]any,error) and Declaration()*genai.FunctionDeclaration. The requestProcessor interface (runner_confirmation.go:16) mirrors ADK's internal toolinternal.RequestProcessor and the req.Tools map[string]any rewrite trick (runner_confirmation.go:42-47) is a hack to force ADK to dispatch through the wrapper. A native tool registry removes this indirection entirely. +- agent.RunConfig / agent.StreamingModeSSE (google.golang.org/adk/agent) — passed to adkRunner.Run (runner_exec.go:139-141). +- genai.Content / genai.Part / genai.FunctionDeclaration / genai.Schema (google.golang.org/genai v1.57.0) — the message/tool-declaration wire format used everywhere (runner_exec.go:132, runner_confirmation.go:371-404, compaction estimate). This is Google's genai SDK, shared with ADK. + +LOAD-BEARING Genkit types: +- genkit.Genkit registry + api.Plugin + googlegenai.GoogleAI + anthropic.Anthropic (firebase/genkit/go) — initGenkit (runner.go:350-364) builds a registry for Gemini/Claude providers; nil for OpenAI-compatible. The GenkitRegistry is stored on CustomRunner and GlobalAgentPool and threaded into llm.NewAdapter. Only the GenkitModelAdapter path actually uses it; the direct-HTTP adapters (OpenAI/Anthropic/GLM/DeepSeek/Kimi/SiliconFlow) ignore it. + +WHAT A NATIVE LOOP REQUIRES (decoupling work): +1. A new AgentLoop type owning: build request (system prompt + session contents + tool declarations) -> call model.GenerateContent -> inspect response Parts for FunctionCall -> dispatch to the tool registry (running permission + hooks + circuit-breaker inline) -> append FunctionResponse -> repeat until a response with no FunctionCall (or max-iterations / cancel). This is exactly what ADK Flow.Run owns today and iroha has zero of. +2. Replace session.Event with a native streaming event union (text delta / tool_call_start / tool_result / final / error). +3. Replace tool.Tool/tool.Context with a native Tool interface (Name/Declaration/Run(ctx, args)) and a registry; drop the ProcessRequest/req.Tools-map hack. +4. Replace llmagent+runner with a single Session+Loop struct. PersistentSessionService already wraps session.InMemoryService, so the storage layer is partially ours but still speaks session.Event/session.Session. +5. The genai wire types (Content/Part/FunctionCall/Schema) are the largest cross-cutting dependency — either keep genai as the canonical message format (lowest-effort path) or define native equivalents and translate at the adapter boundary. +Genkit can be dropped almost entirely since most providers already use direct HTTP adapters; only Gemini and the Anthropic-via-Genkit path need it, and Anthropic already has a direct adapter. + +## Divergences from Claude Code +- NO native agent loop: iroha's Execute() is a thin event-forwarder around ADK's runner.Run/Flow.Run. Real Claude Code owns its own loop (model turn -> tool-use detection -> execution -> feedback) in-process with explicit max-turns, sidechain/secondary-turn forking, and interrupt handling. iroha cannot implement these without forking or replacing ADK's Flow. +- Auto-commit on every turn: Execute() stages+commits the turn's edited paths and LLM-generates a commit message with a '[iroha] ' prefix (runner_exec.go:189-242). Real Claude Code never auto-commits; commits are an explicit user action. This is a material behavioral divergence baked into the loop tail. +- Identity is a fixed persona: GetIdentityTagBlock() hardcodes an 'iroha' cybernetic-anime-girl SWE assistant persona addressing the user as 'Developer' (autonomous.go:138-146), and GlobalMessageCount starts at 10 (autonomous.go:135). Claude Code has no fixed persona and no synthetic message-count seeding. +- No native streaming event taxonomy: iroha consumes opaque session.Event (which embeds model.LLMResponse). Claude Code defines its own granular assistant-message/tool-use/content-block streaming model. Mapping ADK events to a Claude-Code-equivalent UI requires interpretation not present here. +- Post-edit go-build self-heal is hardcoded to './pkg/agent/...' (runner_confirmation.go:157) — runs regardless of which project/module was edited, so it will misreport or no-op outside this repo. +- Circuit breaker is global and exact-arg only (runner_confirmation.go:219-256, acknowledged limitations): single shared breaker, fmt.Sprintf('%v') arg comparison, no time window, no per-tool threshold. Claude Code has per-tool, typed, time-windowed loop protection. +- Dynamic system-prompt rebuild happens inside the model delegator (DynamicLLMDelegator.GenerateContent, runner.go:118-125) keyed off GlobalMessageCount, rather than at the loop-turn boundary as Claude Code does (system prompt assembled once per turn before the model call). +- Confirmation 'explain' and 'edit' flows (runner_confirmation.go:259-320) spawn extra direct model.GenerateContent calls for rationales/arg-rewrites — there is no equivalent in Claude Code's permission model, which is rule-based + user prompt only. +- ToolCircuitBreaker.Reset is called at the top of every Execute (runner_exec.go:19) and breaker state is process-global, so concurrent runs (teammates) interfere — diverges from Claude Code's per-session isolation. + +## Quality notes + +Code is genuinely functional and reasonably well-factored for an ADK-based design: clean RunnerDeps injection bag, atomic run-event instrumentation with terminal-once guard, panic recovery with edit rollback, real hook pipeline (PreToolUse/PostToolUse/ToolError) with arg-rewrite and AdditionalContext injection, and a working permission/confirmation/auto-review/circuit-breaker stack. Honest self-documentation of limitations exists (e.g. ToolCircuitBreaker docstring at runner_confirmation.go:201-218). HOWEVER the area is architecturally the OPPOSITE of Claude Code: it is a framework-hosted agent, not a native loop. The 'agent loop' capability that defines Claude Code is entirely missing from iroha and delegated to ADK. Key smells: (1) ProcessRequest rewrites req.Tools map to force dispatch through the wrapper (fragile ADK-internals coupling); (2) post-edit go-build is hardcoded to ./pkg/agent/...; (3) Global* singletons (GlobalSessionService, globalLLMModel, GlobalMessageCount, GlobalToolCircuitBreaker, Bridge, ToolBridge) make per-session/concurrent-run isolation impossible; (4) auto-commit is baked into the loop tail with no opt-out; (5) GlobalMessageCount is seeded to 10 with no comment. Test coverage in the area is heavy (runner_test.go, runner_ext_test.go, runner_edit_integration_test.go, runner_confirmation tests) but mostly exercises the wrapper/bridge/permission layers, not a loop (because there is no loop to test). diff --git a/docs/claude-code-architecture/audit/A2-tools.md b/docs/claude-code-architecture/audit/A2-tools.md new file mode 100644 index 0000000..dd6486e --- /dev/null +++ b/docs/claude-code-architecture/audit/A2-tools.md @@ -0,0 +1,118 @@ +# Audit: A2-tools + +## Files audited + +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_file.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_file_batch.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_file_search.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_shell.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_web.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_web_safety.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_mcp.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_memory.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_schedule.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_subagent.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_task.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_team.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_todo.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_worktree.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/auto_review.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/lsp_tools.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/lsp_utils.go (registerLSPTools) +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/ci_watcher.go (registerCITools) +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/mcp.go (DynamicMCPTool) +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_exec.go (dispatch) +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_edit.go (snapshot/rollback) +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/pool.go (WorkdirKey) +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tokenizer.go (safePrefixes) +- /Users/akiwayne/go/pkg/mod/google.golang.org/adk@v1.2.1-0.20260519122726-f2aee5301649/tool/tool.go (tool.Tool/tool.Context) +- /Users/akiwayne/go/pkg/mod/google.golang.org/adk@v1.2.1-0.20260519122726-f2aee5301649/tool/functiontool/function.go (Func/New) + +## Capabilities +- **[implemented] Tool registration framework (table-driven, generic)** — ToolRegistry + generic register[TArgs,TResults]() in tools.go:24 wraps functiontool.New(Config{Name,Description}, handler). 40 tools registered across 14 register* funcs in GetSWETools() (tools.go:359). Table-driven, append-only, first-error-wins. Real, works. +- **[implemented] file_read** — tools_file.go:25-71. 10MB cap, rejects dirs, supports 1-based start/end line slicing with 'N\t' formatting (mimics Read tool cat -n). Sandbox-validated (validateSandboxPath). Matches Claude Code Read semantics closely. +- **[implemented] file_edit (exact + whitespace-tolerant)** — tools_file.go:88-159. Exact-match first, then whitespace-tolerant line-based fallback (normalizeLine collapses runs). Enforces uniqueness unless replace_all. Generates unified diff. Dry-run support. snapshotFile() for rollback. No 'Read before edit' hard requirement like real CC. +- **[implemented] file_edit_batch (atomic multi-edit)** — tools_file_batch.go:22-123. Two-phase (validate-all then apply-all) with rollbackPendingEdits() on any failure. Max 50 edits. Reuses whitespaceTolerantEdit fallback. Diff per edit. +- **[implemented] file_write** — tools_file.go:391-410. MkdirAll parents, snapshot+overwrite. No diff display, no line-numbering. Diverges from CC Write (which enforces Read-before-overwrite). +- **[implemented] shell_run (streaming, sandboxed)** — tools_shell.go:43-136. exec.CommandContext via 'sh -c', WrapSandboxCommand applied, StdoutPipe+StderrPipe merged, line streaming via ToolBridge.Send(ToolStatus{StreamLines}), 500-line stream cap, 30s timeout. Exit code reported. checkShellCommandSandbox enforces cwd containment. +- **[implemented] Shell command sandbox (path/static analysis)** — tools.go:151-202 + tokenizeCommand/splitShellPipeline/tokenizeAllowedReadOnlyPipeline. Blocks relative '../' escape, out-of-cwd absolute paths (except safePrefixes from tokenizer.go), env-var expansion ($VAR/${VAR}). Allows find|grep|git|ls|rg ... | head readonly pipelines. Real but heuristic-only (tokenized, not a real shell parser). +- **[implemented] background_run / check_background** — tools_shell.go:147-179. Delegates to GlobalBackgroundManager.RunContext/Check. checkShellCommandSandbox applied. Emits task_id; results drained via drain_notifications. +- **[implemented] web_fetch** — tools_web.go:31-114. SSRF guard (checkSSRF + ssrfSafeTransport DNS-rebinding-safe DialContext, privateNets incl. fc00::/7), 5MB cap, htmlToText conversion, rate-limit 10/min. http/https only. +- **[partial] web_search (DuckDuckGo scrape / SearXNG)** — tools_web.go:135-330. HTML scraping of html.duckduckgo.com (parseDDGResults/extractDDGResult decoding uddg redirect) OR SearXNG JSON backend from config.WebSearchSearXNGURL. 10/min rate limit. No real search-API integration (CC uses hosted search). +- **[implemented] search_grep** — tools_file_search.go:104-152. regexp.Compile, filepath.Walk, skips grepExcludedDirs (.git/node_modules/etc), 1MB file cap, 50 match cap. NOT ripgrep-backed (pure Go walk). No -i/-g/file filters like CC Grep. +- **[implemented] find_files (glob)** — tools_file_search.go:165-255. Custom matchGlob with ** support (recursive), 100-file cap, skips excluded dirs. Bubble-sort (O(n^2)) — diverges from CC Glob. +- **[implemented] list_directory** — tools_file_search.go:24-85. filepath.Walk, depth cap 4, grepExcludedDirs skip, 200-entry cap. dirs get '/' suffix. +- **[implemented] memory_save/list/search/update/delete/dream** — tools_memory.go. CRUD over GlobalMemoryManager + memory_dream (4-phase DreamConsolidator). Persisted to disk. Roughly maps to CC memory/save_search semantics but types (user/feedback/project/reference) differ. +- **[implemented] task_create/update/list/get + todo** — tools_task.go + tools_todo.go over GlobalTaskManager (DAG with DFS cycle validation) and GlobalTodoManager. Mirrors CC TaskCreate/TaskUpdate/TaskList/TaskGet + TodoWrite (single in_progress rule encoded in description only). +- **[implemented] schedule_create/list/delete** — tools_schedule.go over GlobalCronScheduler. One-shot/recurring + durable persistence. Real local cron. Maps loosely to CC scheduled-task MCP, not native. +- **[implemented] spawn_teammate + team comms + protocol + autonomy** — tools_team.go. Spawn/list/message/inbox/broadcast + protocol_shutdown/plan_approval request/response + agent_claim_task/agent_set_state. Over GlobalTeamManager/GlobalProtocolManager/GlobalAutonomyManager. Parallel to CC TeamCreate/TaskUpdate/SendMessage but bespoke protocol set. +- **[partial] spawn_subagent** — tools_subagent.go:8-19. Thin wrapper calling GlobalSubagentManager.RunSubagent(ctx, args). Synchronous. No parallel/non-blocking option (CC Task supports background). +- **[implemented] worktree_create/list/status/enter/closeout** — tools_worktree.go over GlobalWorktreeManager (Create/List/Status/Enter/Closeout with keep|remove). Real git worktree-backed isolation. +- **[implemented] MCP plugin discovery + dynamic tool registration** — tools_mcp.go + mcp.go. GlobalMCPRouter.LoadAndStartPlugins + DiscoverTools returns []tool.Tool. DynamicMCPTool implements tool.Tool + Declaration()/ProcessRequest injecting genai.FunctionDeclaration with ParametersJsonSchema. Real MCP-protocol client integration. +- **[implemented] LSP tools (5)** — lsp_utils.go:105 + lsp_tools.go. LSPGotoDefinition/FindReferences/DocumentSymbols/Hover/Diagnostics via getLSPClient per-language (Go/TS/Python/Rust from config). json.RawMessage fallback parsing. Uses textDocument/diagnostic (pull, 3.17+). Rough analog of CC LSP MCP server but native. +- **[implemented] CI watcher** — ci_watcher.go:91. agent_watch_ci starts background GitHub Actions monitor -> inbox notifications on failure. +- **[implemented] Auto-review (4-tier risk + LLM judge)** — auto_review.go. RiskTier enum + ClassifyTool/classifyShellCommand (trusted/low/medium/high) and ReviewCommand/ReviewFileOperation with LLM fallback. SetAutoReviewConfig(model.LLM). Dangerous-pattern hard-filter re-checks LLM approval. callLLMForReview via llm.CollectNonStreaming. Heuristic-only fallback when no model. +- **[implemented] Edit snapshot/rollback** — runner_edit.go snapshotFile/rollbackPendingEdits + per-run commitEditedFiles. On tool failure or ctx cancel, restores originals. CC has no equivalent (uses git). +- **[implemented] Tool pool hot-reload** — tools.go:401-451. RebuildToolPool (re-discover, bump version) + CheckPluginsFileChanged (mtime of .iroha/plugins.json). Enables /mcp reload. +- **[missing] Notebook tools (NotebookEdit)** — Not in registry. CC has NotebookEdit. Absent. +- **[missing] Grep tool flag parity (output_mode/-i/-g/context)** — Grep has no -i/--include/--exclude/-A/-B/-C flags; no JSON/structured output; 50-line cap. CC Grep is ripgrep-backed with rich flags. +- **[missing] Task (background agent) tool** — CC Task supports run_in_background / TaskStop / non-blocking spawn. spawn_subagent here is strictly synchronous via RunSubagent. +- **[missing] Large output auto-compression / headroom** — web_fetch truncates at 5MB and htmlToText is naive (no readability/JS rendering). No URL-context extraction. +- **[missing] Tool description schema validation** — register functions set description strings but there is no CC-style 'dict' arg schema with required fields. functiontool derives schema from json tags; no explicit required/enum validation at registration. + +## External deps +- google.golang.org/adk v1.2.1-... — tool.Tool, tool.Context, tool/functiontool (registration+schema reflection). Load-bearing across every tools_*.go. +- google.golang.org/genai v1.57.0 — genai.FunctionDeclaration/Tool/Content/Part/GenerateContentConfig used by DynamicMCPTool (mcp.go), runner_exec.go message building, and indirectly functiontool. NOT ADK but is the wire schema. +- github.com/firebase/genkit/go v1.8.0 — used ONLY in pkg/llm/adapter.go to build model.LLM; reaches A2 solely via SetAutoReviewConfig(model.LLM) consumed by auto_review.go. +- google.golang.org/adk/model — model.LLM + model.LLMRequest used by auto_review.go for the LLM safety judge. +- google.golang.org/adk/agent + adk/session — referenced by tool.Context (CallbackContext, EventActions) and by the runner (adkRunner.Run). Tools do not import these directly except in tests (tools_shell_test.go imports adk/agent, adk/memory, adk/session, adk/tool/toolconfirmation, genai). +- golang.org/x/net/html — HTML parsing for web_fetch/web_search (tools_web.go, tools_web_safety.go). +- iroha/pkg/config — WebSearchSearXNGURL + LSPServers config (tools_web.go:150, lsp_utils.go:108). +- iroha/pkg/llm — CollectNonStreaming helper used by auto_review.go (auto_review.go:298,443). + +## Coupling notes + +This area is HEAVILY coupled to google.golang.org/adk and is the single hardest decoupling point for a native rewrite. Concrete load-bearing dependencies: + +1. tool.Tool interface (adk/tool/tool.go:42) — every registered tool must implement Name()/Description()/IsLongRunning(). GetSWETools returns []tool.Tool. A native replacement needs an equivalent interface (Name/Description/IsLongRunning/Declaration/Run). + +2. tool.Context (adk/tool/tool.go:55) — NOT a context.Context alias. It embeds agent.CallbackContext and exposes FunctionCallID()/Actions()/*session.EventActions/SearchMemory() (returns *memory.SearchResponse)/ToolConfirmation()/*toolconfirmation.ToolConfirmation/RequestConfirmation(hint,payload). CRITICAL: iroha's handlers declare `ctx tool.Context` but ONLY use it as a bare context.Context via ctx.Value(WorkdirKey) (tools.go:70, pool.go:25). The rich ADK Context surface (confirmation, actions, memory search) is UNUSED by the handlers — confirmation is instead implemented ad-hoc via runner_confirmation*.go + ToolBridge + ReviewCommand. This means the handlers are 'decoupling-ready': replacing `tool.Context` with a plain `context.Context` (or a tiny native ToolCtx{context.Context; Workdir string}) requires changing only the handler signatures, not their bodies. + +3. functiontool.New + functiontool.Func[TArgs,TResults] (adk/tool/functiontool/function.go:71,78) — the generic register[TArgs,TResults] in tools.go:24 depends on functiontool.New(Config{Name,Description}, handler). This auto-derives the JSON schema from struct field tags (`json:\"x\" description:\"...\"`) and auto-marshals args/results to map[string]any. A native rewrite must replicate this schema-from-struct-tags reflection (iroha already relies on the `description:` struct tag everywhere — e.g. tools_file.go FileReadArgs). This is the largest mechanical port: write a generic `register[TArgs,TResults]` that reflect-walks TArgs to produce a genai.FunctionDeclaration-style schema and a JSON-(un)marshal dispatcher. + +4. genai.FunctionDeclaration / genai.Tool / genai.Part / genai.Content (google.golang.org/genai v1.57.0) — used by DynamicMCPTool.Declaration/ProcessRequest (mcp.go:267-283), by runner_exec.go building *genai.Content user messages, and indirectly by functiontool. NOTE: genai is the Google GenAI SDK, not ADK itself — it is the wire format for tool declarations and messages. Decoupling from ADK does NOT remove the genai dependency unless the native loop also replaces genai with Anthropic-native message/tool-use types. + +5. model.LLM + model.LLMRequest (adk/model) + agent.Runner/agent.RunConfig/agent.StreamingModeSSE (adk/agent) — auto_review.go uses model.LLM/model.LLMRequest/llm.CollectNonStreaming (auto_review.go:12,166-168,278-298) and the runner dispatches via cr.adkRunner.Run(...) (runner_exec.go:139). Tool execution itself does NOT call model.LLM, but the auto-review subsystem does, and tools are ultimately driven by the ADK runner's event stream. Decoupling tools from ADK therefore also requires replacing the runner (A1/A3 area). + +6. Indirect via Genkit: tools themselves do NOT import firebase/genkit. The only Genkit coupling is in pkg/llm/adapter.go (NewAdapter(*genkit.Genkit,...)) which produces the model.LLM that SetAutoReviewConfig consumes. So Genkit reaches A2 only through the LLM handle handed to auto-review — replacing the LLM adapter removes it. + +NATIVE REPLACEMENT REQUIREMENTS (what a CC-style no-framework port needs): +- A native `Tool` interface: { Name, Description, IsLongRunning, Declaration()*Schema, Run(ctx, args any)(map[string]any,error) }. +- A native `ToolCtx` carrying workdir + function_call_id + a confirmation channel (replacing tool.Context's RequestConfirmation/ToolConfirmation), OR keep confirmation outside tools entirely (iroha already does this via ReviewCommand in runner_confirmation — the cleaner path). +- A generic schema-from-struct-tags reflector to replace functiontool.New (iroha's struct tags already encode everything needed). +- Replace genai.FunctionDeclaration with an Anthropic-tool-use schema type (or keep a thin genai-compatible shim if the wire layer stays genai). +- auto_review.go must call the native LLM client, not model.LLM/llm.CollectNonStreaming. + +BOTTOM LINE: The tool HANDLERS are ~90% decoupling-ready (they only need context.Context + WorkdirKey). The coupling is concentrated in (a) the registration/reflection layer (functiontool) and (b) the types tool.Tool/tool.Context/genai.FunctionDeclaration/model.LLM. A native port is feasible and mostly mechanical for handlers, but requires building a small schema-reflection + Tool-interface + dispatch layer to replace functiontool + tool.Tool. + +## Divergences from Claude Code +- file_write has NO Read-before-overwrite enforcement — real CC refuses to overwrite a file you haven't Read in this session; iroha just overwrites (tools_file.go:391). +- file_edit does NOT require a prior file_read; CC's Edit requires the file to have been Read first. iroha allows blind edits (tools_file.go:88). +- search_grep is a pure-Go filepath.Walk regex matcher, NOT ripgrep. No -i/--include/--exclude/-A/-B/-C/output_mode flags, hard 50-match cap, 1MB-per-file skip. Semantics and ergonomics differ materially from CC Grep (tools_file_search.go:104). +- find_files uses an O(n^2) bubble sort and a hand-rolled ** glob matcher, not doublestar/fsnotify; 100-result cap (tools_file_search.go:247). +- web_search scrapes DuckDuckGo HTML or hits a self-hosted SearXNG; CC uses a hosted search backend with structured results. Rate-limited to 10/min (tools_web.go:135). +- web_fetch truncates at 5MB and uses a naive htmlToText (no readability extraction, no JS rendering); CC WebFetch has richer extraction + URL-context modes. +- shell_run always uses 'sh -c' with a 30s timeout and 500-line stream cap; CC Bash supports configurable timeout up to 600000ms, run_in_background, and richer sandboxing (iroha's sandbox is static token analysis, not a true seccomp/seatbelt sandbox). +- spawn_subagent is SYNCHRONOUS only (RunSubagent blocks). CC Task supports background dispatch + TaskStop + multiple agents (tools_subagent.go:8). +- todo enforces 'exactly one in_progress' only via description text, not structurally; CC TodoWrite enforces it at the tool layer. +- snapshotFile/rollbackPendingEdits (runner_edit.go) provide a per-run undo that CC does NOT have — CC relies on git. This is an iroha-specific divergence. +- Confirmation model differs: iroha uses ReviewCommand (heuristic+LLM) + 4-tier RiskTier + ToolBridge status bridge, whereas real CC uses permission rules in settings.json + explicit per-tool allow/deny + can_use_tool hooks. ADK's native tool.Context.RequestConfirmation/ToolConfirmation is NOT used by the handlers. +- Auto-review LLM judge (callLLMForReview) re-checks LLM 'safe' verdicts against hardcoded dangerous-pattern lists to resist prompt injection — CC has no equivalent LLM-judge layer (it uses deterministic rules + hooks). +- LSP tools are first-class native tools (lsp_*) rather than an MCP server as in CC; pull-diagnostics-only (LSP 3.17+), no workspace diagnostics fallback. +- mcp_server_list is the only MCP-meta tool; CC exposes richer MCP resource/prompt tooling. Dynamic MCP tool discovery IS implemented (mcp.go DiscoverTools) but plugin lifecycle is bespoke (.iroha/plugins.json), not the standard MCP config. +- All struct-tag-based arg schemas have no 'required' field tracking (CC uses explicit required arrays in JSON schema). + +## Quality notes + +The tool layer is broad (40 tools) and mostly functionally complete, with genuinely thoughtful security work: SSRF protection includes DNS-rebinding-safe DialContext (tools_web_safety.go:117), symlink-resolving sandbox (validatePathForSandbox, tools.go:124), env-var-expansion blocking, and an LLM-judge with anti-injection re-checking (auto_review.go:229-272). However several rough edges: (1) sortFiles is O(n^2) bubble sort (tools_file_search.go:247); (2) shell sandbox is static tokenization, not a real sandbox (no seatbelt/seccomp) — WrapSandboxCommand exists but its strength wasn't verified here; (3) findLineMatches caps at 100 matches silently (tools_file.go:223); (4) GrepHandler ignores binary files only by size (1MB), not by content sniff — will feed binaries through regexp; (5) web_search DuckDuckGo scraping is brittle to DDG HTML changes; (6) snapshotFile reads the file again even though FileEditHandler already read it (double read); (7) no per-tool 'required args' validation — relies entirely on LLM correctness; (8) memory_dream and schedule durable persistence are real but their storage formats weren't audited here (in memory.go / schedule.go, A2-adjacent). Test coverage is strong for handlers (tools_*_test.go present for most). The codebase is internally consistent but the divergence from CC's exact tool semantics (Read-before-edit, Grep flags, Task backgrounding, NotebookEdit) is the main parity gap, not capability gaps per se. diff --git a/docs/claude-code-architecture/audit/A3-permission-hooks-sandbox.md b/docs/claude-code-architecture/audit/A3-permission-hooks-sandbox.md new file mode 100644 index 0000000..90f1d3a --- /dev/null +++ b/docs/claude-code-architecture/audit/A3-permission-hooks-sandbox.md @@ -0,0 +1,62 @@ +# Audit: A3-permission-hooks-sandbox + +## Files audited + +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/permission.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/hooks.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/hooks_exec.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/hooks_types.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/sandbox.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/auto_review.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/auto_review_apply.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/auto_review_diff.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner_confirmation.go (coupling seam: blockingConfirmationTool wraps tool.Tool, permission gate) +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools_shell.go (sandbox wrap site) +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tools.go:149+ (checkShellCommandSandbox second sandbox layer) +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/helpers.go (CollectNonStreaming ADK coupling) +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/adapter.go + genkit_adapter.go (model.LLM provider chain) +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/go.mod (ADK v1.2.1-..., genkit v1.8.0, genai v1.57.0) + +## Capabilities +- **[implemented] Permission modes (6 modes incl. bypass/acceptEdits)** — permission.go:12-19. All 6 real Claude Code modes present: default/plan/auto/acceptEdits/dontAsk/bypassPermissions. ParsePermissionMode (permission.go:143-168) does aggressive fuzzy normalization (strips ()-_, spaces, 'mode' suffix) and accepts aliases like 'ci'->dontAsk, 'dangerous'->bypass, 'y'/'yes'->auto. Behavior matches Claude Code semantically. +- **[implemented] PermissionManager: rule engine (allow/deny/ask)** — permission.go:71-139. ~30 built-in allow rules per tool name (file_read/list/grep/find/todo/task/schedule/team/protocol/worktree/mcp_server_list/web). Two hard deny rules (rm -rf /, sudo *). builtinRuleCount tracked so dontAsk mode skips auto-approving built-in mutation allow-rules (permission.go:295). AddRule/GetRules/SetMode/GetMode all thread-safe. +- **[implemented] PermissionManager.Check decision pipeline** — permission.go:210-428. Eval order: (0) BashSecurityValidator on shell_run/background_run, (1) deny rules, (2) mode dispatch (dontAsk/plan/bypass/acceptEdits/auto with 4-tier classifier ClassifyTool), (3) allow rules, (4) fall-through to ask. consecutiveDenials counter with NoteApproval/NoteDenial/Reset. Returns (decision, reason) tuple. +- **[implemented] BashSecurityValidator (regex allowlist/blocklist)** — permission.go:28-69. 14 regex patterns: shell_metachar, sudo, rm_rf, cmd_substitution, ifs_injection, heredoc, process_substitution, named_pipe, terminal_escape, file_descriptor, unsafe_source, encoding_attack, proxy_injection, unsafe_find_pipe. Severe subset (sudo/rm_rf/unsafe_find_pipe/proxy_injection) -> immediate deny; others -> ask (or deny in plan/dontAsk mode). +- **[implemented] Risk classifier (4-tier: trusted/low/medium/high)** — auto_review.go:24-113. trusted/low/medium/high tiers. Trusted set for read-only tools + known safe cmds; shell classified via classifyShellCommand; unknown tools -> high. Used by ModeAuto (permission.go:362-402) to auto-approve trusted/low and escalate medium/high. +- **[implemented] Hybrid shell auto-review (heuristic + LLM safety judge)** — auto_review.go:198-275 ReviewCommand + auto_review_apply.go heuristicReview + auto_review_diff.go regex checks. Hard rule filter runs BEFORE LLM; if heuristic says safe OR hard-unsafe, LLM is skipped. LLM approval is re-validated by a 'safety fuse' (auto_review.go:230-272) that overrides LLM 'safe' if local patterns disagree. Hybrid security model is sound. +- **[implemented] File-mutation safety review (path + content + secret detection)** — auto_review.go:323-411 ReviewFileOperation + fileHeuristicReview. Blocks system dirs (/etc,/usr,...), sensitive patterns (.ssh,.aws,.env,credentials,*.pem,*private key*), secret indicators in content, unknown extensions -> LLM semantic review via callLLMForFileReview. Wired into acceptEdits mode (permission.go:338-359) and Auto mode. +- **[implemented] Hook lifecycle events** — hooks_types.go:12-37. 12 events: SessionStart/End, UserPrompt, AgentResponse, PreToolUse, PostToolUse, ToolError, Compaction, SubagentStop, Notification, PreCompact, PostCompact. Matches Claude Code's event taxonomy closely (PreCompact/PostCompact + Compaction all present; Notification present). +- **[implemented] HookManager config loading (user + project layered)** — hooks_types.go:39-46 + hooks.go:52-132. Reads ~/.iroha/hooks.json (user) + ./.iroha/hooks.json (project), with migration shim from legacy .go-claude/ dir. Tracks per-hook source (hookSourceUser/hookSourceProject). Timeout configurable per-file. +- **[implemented] Hook execution (3 types: command/http/llm-prompt) + matchers + async** — hooks.go:20-74 RunHooks. Matcher filters by tool name. Project-sourced command hooks require IROHA_TRUST_PROJECT_HOOKS=1 (hooks_exec.go:78-98) — correct trust-boundary behavior. Async hooks fire-and-forget with panic recovery; sync hooks short-circuit on Blocked. Aggregates Messages/UpdatedInput/AdditionalContext across hooks. +- **[implemented] HTTP hook type** — hooks_exec.go:113-200 runHTTP + headers env expansion (AllowedEnvVars-restricted) + parseJSONResult. Non-2xx blocks; timeout honors def.OnTimeout='block'. +- **[implemented] LLM-prompt hook type (custom, non-Claude-Code)** — hooks_exec.go:203-298 runLLMPrompt. Interpolates $TOOL_NAME/$TOOL_INPUT/$PROMPT/etc into def.Prompt, calls globalLLMModel (model.LLM) GenerateContent, parses decision JSON. THIS IS AN IROHA EXTENSION — real Claude Code has no native llm-prompt hook type (hooks are subprocess/http only). +- **[implemented] Command hook: stdin JSON + stdout JSON + exit-code protocol** — hooks_exec.go:301-469 runCommand + hooks_types.go:104-191 parseJSONResult. Whitelisted env (HOME/PATH/LANG/TERM/USER/TMPDIR/SHELL/PWD only — good secret hygiene, hooks_exec.go:345). JSON stdin payload. Supports Claude Code's hookSpecificOutput.permissionDecision/updatedInput/additionalContext AND exit-code protocol (0=ok,1=deny,2=message). JSON-first-then-exitcode ordering matches Claude Code. +- **[implemented] OS-level sandbox (macOS sandbox-exec + Linux bubblewrap)** — sandbox.go:1-168. GlobalSandboxEnabled flag. darwin -> sandbox-exec with generated Seatbelt profile (deny writes to /System,/Library,/usr,/bin,/sbin,/private/etc,~/.ssh,~/.aws,~/.kube,~/.gemini; allow workdir + tmp + caches). linux -> bwrap --ro-bind / --bind workdir. Graceful no-op fallback if binary missing. This is an Iroha-native addition; real Claude Code uses a different (seatbelt-exec on mac, landlock on linux via its own CLI binary) mechanism. +- **[implemented] Path-escape sandbox (command tokenizer + CWD bounding)** — tools.go:151+ checkShellCommandSandbox. Separate from OS sandbox — tokenizes command (handles read-only pipelines) and blocks relative '../' escape + absolute paths outside CWD (whitelisting safePrefixes). Runs inside ShellRunHandler BEFORE the OS sandbox wrap (tools_shell.go:44 vs :55). Defense-in-depth. +- **[partial] Permission gating integration via blockingConfirmationTool wrapper** — runner_confirmation.go:17-98. adkRunnableTool embeds tool.Tool; ProcessRequest overwrites req.Tools entry so ADK dispatches through Run() which calls GlobalPermissionManager.Check then underlying tool. This is the ONLY point where permission checks meet tool execution — and it is structurally dependent on ADK's tool.Tool/tool.Context/model.LLMRequest/req.Tools map. +- **[implemented] LLM-based auto-review config wiring** — Real Claude Code has NO equivalent of GlobalAutoReviewConfig (an LLM safety judge that pre-approves shell/file ops). This is an Iroha-original feature layered on top of Claude Code's model. Mode-dependent (only invoked in ModeAuto / acceptEdits 'ask' path, runner_confirmation.go:130,179). Conceptually diverges from Claude Code's 'ask human' default. +- **[partial] Real Claude Code permission JSON schema fidelity (.claude/settings.json 'permissions.allow/deny/ask')** — No settings.local.json/enterprise managed-settings.json rule merging, no 'additionalDirectories' workspace expansion, no pattern-prefix precedence semantics beyond substring+glob. matchesPattern (permission.go:626-655) is a custom glob (not gitignore-style). Acceptable but not 1:1. + +## External deps +- google.golang.org/adk v1.2.1-0.20260519122726-f2aee5301649 — provides model.LLM interface, model.LLMRequest, model.LLMResponse (used in auto_review.go, hooks_exec.go, runner_confirmation.go). Load-bearing for the 3 LLM-calling seams and the tool.Context/tool.Tool dispatch wrapper. +- google.golang.org/genai v1.57.0 — provides genai.Content/genai.Part/genai.FunctionDeclaration/genai.GenerateContentConfig. Used to construct LLM requests in auto_review.go, hooks_exec.go, runner_confirmation.go. Would be replaced by a native Message type in a no-framework rewrite. +- github.com/firebase/genkit/go v1.8.0 — NOT directly imported by the A3 files, but the configured model.LLM for ProviderClaude/ProviderGemini is GenkitModelAdapter (pkg/llm/genkit_adapter.go) which bridges genkit -> ADK model.LLM. So auto-review + llm-prompt hooks transitively depend on Genkit when using Claude/Gemini providers (the model passed to SetAutoReviewConfig/globalLLMModel is a GenkitModelAdapter in the default path). Direct OpenAI/Anthropic adapters (pkg/llm/openai.go, anthropic.go) bypass Genkit. +- iroha/pkg/llm — CollectNonStreaming helper (helpers.go) is the thin wrapper auto_review.go depends on; it in turn imports adk/model. This is the single import edge from the security area into the LLM subsystem. + +## Coupling notes + +COUPLING IS MODERATE AND CLUSTERED — permission.go, hooks.go, sandbox.go, auto_review_apply.go, auto_review_diff.go are FRAMEWORK-FREE (pure Go, only stdlib + iroha/pkg/llm). The ADK/Genkit coupling is concentrated in exactly THREE spots:\n\n(1) auto_review.go:12-13 imports `google.golang.org/adk/model` + `google.golang.org/genai`. autoReviewConfig.Model is typed `model.LLM` (auto_review.go:166-168). callLLMForReview (auto_review.go:278-319) and callLLMForFileReview (auto_review.go:413-463) build `*model.LLMRequest` with `[]*genai.Content`/`*genai.Part`/`*genai.GenerateContentConfig`, then call `llm.CollectNonStreaming(ctx, cfg.Model, req)` (pkg/llm/helpers.go:12). pkg/llm/helpers.go itself imports `google.golang.org/adk/model`.\n\n(2) hooks_exec.go:16-17 imports `google.golang.org/adk/model` + `google.golang.org/genai`. The llm-prompt hook (runLLMPrompt, hooks_exec.go:203-298) uses the package-global `globalLLMModel model.LLM` (declared runner.go:62) and calls `globalLLMModel.GenerateContent(ctx, req, false)` iterating `iter.Seq2[*model.LLMResponse, error]`, building `*model.LLMRequest`/`*genai.Content`/`*genai.Part`.\n\n(3) runner_confirmation.go:10-12 imports `google.golang.org/adk/model`, `google.golang.org/adk/tool`, `google.golang.org/genai`. The blockingConfirmationTool wrapper embeds `tool.Tool`, implements `ProcessRequest(ctx tool.Context, req *model.LLMRequest)` and `Run(ctx tool.Context, args any)`. It hijacks `req.Tools map[string]any` to force ADK to dispatch through the permission-checking Run(). This is the structural seam where permission gating meets the agent loop — and it is the MOST load-bearing ADK coupling in this area.\n\nA native rewrite needs to replace: (a) the `model.LLM` interface with a plain `type LLMClient interface { Generate(ctx, messages, system) (string, error) }`; (b) `*model.LLMRequest`/`*genai.Content`/`*genai.Part` with a native Message{Role,Parts} struct; (c) `llm.CollectNonStreaming` with a thin local collector; (d) the `tool.Tool`/`tool.Context`/`req.Tools` dispatch hijack with a native tool-registry that calls PermissionManager.Check BEFORE invoking the handler. Because the permission rule logic (permission.go), hook config/exec plumbing (hooks.go, hooks_exec.go runHTTP/runCommand/parseJSONResult, hooks_types.go), and sandbox (sandbox.go) are framework-free, they port almost verbatim. The llm-prompt hook + auto-review LLM calls need the new LLMClient signature swapped in (mechanical). The blockingConfirmationTool hijack is the only piece that must be re-architected: in a native loop, permission check is just a call before tool dispatch, not a wrapper that rewrites a tool map. Estimated effort for this area alone: LOW-MEDIUM (the security logic is already isolated; only the 3 ADK seams need rewiring). + +## Divergences from Claude Code +- LLM-prompt hook type (HookTypePrompt='llm-prompt', hooks_types.go:45) does NOT exist in real Claude Code — Claude Code hooks are command (subprocess) and matching only. This is an Iroha-original extension that adds a built-in LLM safety-judge hook mechanism. +- Auto-review LLM safety judge (ReviewCommand/ReviewFileOperation/GlobalAutoReviewConfig) is an Iroha-original concept. Real Claude Code does NOT do LLM-based pre-approval of shell commands or file writes — it relies on permission rules + human confirmation. Iroha's ModeAuto uses ClassifyTool 4-tier + LLM review to auto-approve 'medium' ops, which is more permissive than real Claude Code. +- Sandbox implementation differs: Iroha uses macOS `sandbox-exec` + Linux `bwrap` directly in-process (sandbox.go). Real Claude Code ships its own sandboxing binary (seatbelt on mac via a dedicated helper, landlock+namespaces on linux) with more granular workspace allowlisting and network policy. Iroha's Seatbelt profile is static-string-built and allows network by default ('(allow default)'), weaker than Claude Code. +- Permission rule config format diverges: Iroha uses hardcoded built-in rules + AddRule API (permission.go:85-131, 201-208), NOT real Claude Code's .claude/settings.json 'permissions.allow/deny/ask' array with tool:path/content pattern syntax. Iroha's matchesPattern (permission.go:626) uses substring-when-no-wildcard which is looser than Claude Code's gitignore-style matching. +- Hook config path is .iroha/hooks.json (hooks.go:58-96) not .claude/settings.json hooks block. Has a legacy .go-claude/ migration shim. Hook JSON shape (HookConfig.Hooks map[string][]HookDef) is close but not identical to Claude Code's settings.json 'hooks' structure (Claude Code nests under PreToolUse/PostToolUse arrays of {matcher,hooks:[{type,command}]}). +- ConsecutiveDenials counter with 3-strike safety-fuse warning (runner_confirmation.go:76-80, permission.go:555-583) is an Iroha-original UX feature, not in real Claude Code. +- dontAsk mode in Iroha (permission.go:290-316) acts as 'deny-by-default unless explicit allow rule' — this maps to Claude Code's behavior but the CI-style naming and builtinRuleCount skip logic (permission.go:295) is Iroha-specific. +- checkShellCommandSandbox (tools.go:151) is a second, independent path-based sandbox that runs BEFORE the OS sandbox and duplicates some of heuristicReview's path-danger logic (auto_review_apply.go isPathDangerous). Two overlapping path-escape checkers is divergence-from-Claude-Code (which has one coherent sandbox). + +## Quality notes + +SECURITY LOGIC QUALITY IS HIGH. The hybrid security model (hard regex/heuristic rules as an absolute floor, LLM judge as advisory with a 'safety fuse' that overrides LLM approvals, hooks_exec.go:230-272) is well-designed and resists prompt-injection jailbreaks. The regex pattern coverage (14 patterns in BashSecurityValidator + 10 in auto_review_diff.go) is broad. Command-hook env whitelisting (hooks_exec.go:345) prevents secret leakage. Project command hooks gated behind IROHA_TRUST_PROJECT_HOOKS is correct trust-boundary hygiene.\n\nWEAKNESSES: (1) Two overlapping path-escape checkers (tools.go checkShellCommandSandbox + auto_review_apply.go isPathDangerous) with divergent whitelists — maintenance hazard and inconsistency risk. (2) Iroha's mac Seatbelt profile uses '(allow default)' then denies specific paths (sandbox.go:78) — this is an ALLOW-by-default policy, weaker than Claude Code's deny-by-default; network is implicitly allowed. (3) globalLLMModel and GlobalAutoReviewConfig and GlobalPermissionManager and GlobalHookManager are all package-level singletons (runner.go:62, auto_review.go:171, permission.go:141, hooks.go:29) — global mutable state makes testing and multi-agent isolation harder; a native rewrite should inject these. (4) matchesPattern substring fallback (permission.go:634) can over-match. (5) LLM JSON parsing in runLLMPrompt/hooks_exec.go relies on heuristics to strip markdown fences and extract first {..} block (hooks_exec.go:275-295) — brittle but defended against multi-JSON injection. Overall: the area is over-engineered relative to Claude Code (extra LLM-judge + llm-prompt hook layers) but the core permission/hook/sandbox primitives are solid and largely portable. diff --git a/docs/claude-code-architecture/audit/A4-context-memory-session.md b/docs/claude-code-architecture/audit/A4-context-memory-session.md new file mode 100644 index 0000000..56451af --- /dev/null +++ b/docs/claude-code-architecture/audit/A4-context-memory-session.md @@ -0,0 +1,61 @@ +# Audit: A4-context-memory-session + +## Files audited + +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/compaction.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/compaction_helpers.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/memory.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/memory_helpers.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/memory_frontmatter.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/memory_agents_sync.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/memory_dream.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/session_store.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/session_store_helpers.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/prompt.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/tokenizer.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/migrate_legacy.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner.go (lines 40-160, 385-540 for compaction seam + agent/runner/session wiring) +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/go.mod (ADK/Genkit versions) + +## Capabilities +- **[implemented] Micro-compaction of large tool outputs + transcript archiving** — compaction.go:49 CompactContents operates on []*genai.Content. Two phases: (1) micro-compaction archives any FunctionResponse.Response >1000 bytes to ~/.iroha/transcripts/.jsonl and replaces it in-place with a placeholder (compaction.go:115-145); (2) conversational summarization when len(contents)>12 — keeps round[0], summarizes middle (index 1..len-5) via LLM or truncation fallback, preserves last 4 rounds, re-inserts sticky blocks (compaction.go:148-258). Deep-copies all Parts/FunctionCall/FunctionResponse before mutating (compaction.go:55-99). Hooks fire at micro/before_summarization/after_summarization/circuit_breaker_tripped. Triggered in runner.go:131-136 inside DynamicLLMDelegator.GenerateContent when >12 rounds OR estimateContentsTokens>50000. +- **[implemented] Sticky-latch preservation during summarization** — compaction_helpers.go:15 extractStickyBlocks collects any genai.Content whose Part.Text contains '[STICKY]'; capStickyContent (helpers:30) trims oldest until total sticky bytes <= 20% of a hardcoded 200000-byte context-window estimate. Sticky blocks are re-inserted after the summary. prompt.go marks the Persona and CLAUDE.md sections with [STICKY] so they survive summarization. NOTE: only text-bearing blocks can be sticky; FunctionCall/FunctionResponse parts are never preserved as sticky. +- **[implemented] Compaction circuit breaker + truncation-only fallback** — compaction.go:17 global struct, 3 consecutive failures (empty summary or error) opens the breaker (open=true) and forces truncateOnlySummary for subsequent runs; auto-resets after 5 minutes. compaction_helpers.go:69 truncateOnlySummary builds an extractStructuredSummary block + a 4000-char transcript. Recovered via defer/recover around summarizeRounds (compaction.go:194-202). +- **[partial] LLM-based conversation summarization** — compaction_helpers.go:212 summarizeRounds: builds a transcript from text/FunctionCall/FunctionResponse, caps at 8000 chars, issues a 30s-timeout model.LLMRequest via the passed-in model.LLM, streams GenerateContent and concatenates text parts. Falls back to extraction if LLM absent/empty. extractStructuredSummary (helpers:108) regex-extracts tool names, file paths, and 'decision' lines (prefixes like 'let's ', 'i'll ', 'decided to ') into a [SUMMARY] block. +- **[implemented] Memory store (file-based, YAML frontmatter, global+project layers)** — memory.go:35 MemoryManager holds map[name]*MemoryEntry with RWMutex. Two-layer load: ~/.iroha/memory (global) then /.iroha/memory (project overrides). Each entry is one .md with YAML frontmatter (memory_frontmatter.go parse/render). MaxMemoryEntries=100 cap. Save/Update/Delete/List/Search/Count/Reload all implemented. Singletons GlobalMemoryManager + GlobalDreamConsolidator (memory.go:42-45). +- **[implemented] Bidirectional AGENTS.md <-> memory sync** — memory_agents_sync.go syncToAgentsMD / syncFromAgentsMDLocked / makeAgentsBlock parse/write a '## Agent Dynamic Learnings' section in AGENTS.md, mirroring entries both directions. Round-trips Name/Type/Description/Content with line-based block parser. Hardcoded path 'AGENTS.md' (cwd-relative). +- **[implemented] Memory injection into system prompt (trigger-aware)** — memory.go:234 BuildSystemPromptSection groups entries by type (user/feedback/project/reference), fuzzy keyword-matches against the current user prompt (feedback type always injected), and emits a Markdown block with emoji headers. Called from prompt.go:135 (stable section) and runner.go:398. MarkStale invalidation is exposed on SystemPromptBuilder but memory section is rebuilt every turn unconditionally. +- **[implemented] Dream consolidation (dedup/prune/cap + LLM semantic merge)** — memory_dream.go:169 Consolidate runs Orient/Gather/Consolidate/Prune: deletes empty entries, exact-content dedup within type groups, optional LLM semantic merge (ConsolidateSemantically:303 when >=3 entries of a type, JSON-array contract), then enforces MaxMemoryEntries cap (oldest first). ShouldConsolidate (dream:113) checks 7 gates incl. PID-based .dream_lock with stale-lock eviction. Triggered async at startup (runner.go:465) and IncrementSession bumps count on every MemoryManager init. +- **[implemented] Persistent session service (JSON-per-session, wraps ADK session.Service)** — session_store.go:57 PersistentSessionService wraps a delegate session.Service (runner.go:416 wires session.InMemoryService()). SerializedSession (store:28) embeds []*session.Event plus state map, CWD, first prompt, permission mode, token/cost estimates, compaction archive path. Create/Get/List/Delete/AppendEvent delegate then persist; SaveSession serializes via json.MarshalIndent; LoadSessions re-hydrates the delegate; ListSavedSessions + ForkSession for TUI picker and branching. interface asserted at session_store_helpers.go:133. +- **[implemented] Legacy .go-claude -> .iroha migration** — migrate_legacy.go migrateGoClaudeIfNeeded one-shot copy of ~/.go-claude/memory and ./.go-claude/memory into .iroha equivalents, gated by ~/.iroha/.migrated sentinel, renames old dir to .bak. Called inside MemoryManager.loadLocked (memory.go:73). +- **[implemented] System prompt builder with prompt-caching boundary** — prompt.go:94 BuildWithPrompt assembles identity tag, [STICKY] persona, memories, layered CLAUDE.md (with @-import expansion + path sandboxing, prompt.go:501-687), AGENTS.md (cwd-up-to-project-root), skills (folder SKILL.md + flat .md + manifest always-on + trigger-matched), then '=== DYNAMIC_BOUNDARY ===' caching boundary, then time/workdir/safety/tasks/teammates/inbox/worktrees/reminder. maybeCached emits '' when a section's SHA-256 is unchanged since last call. +- **[partial] Token counting** — session_store.go:168-194 and session_store_helpers.go:12 estimateTokens = textLen/4; estimateCost = tokens*2/1000000. Used for session picker metadata and as the compaction trigger (compactionTriggerTokens=50000, runner.go:79) via estimateContentsTokens (runner.go:83). No tokenizer library; not Anthropic/GPT tokenizer-accurate. Cost basis ($2/M) is a placeholder, not per-model pricing. +- **[stub] tokenizer.go (NOT an LLM tokenizer)** — tokenizer.go is misnamed — it implements tokenizeCommand, a shell-command tokenizer for the sandbox that blocks pipes/subshells/redirections. There is NO LLM tokenizer (tiktoken/BPE/CountTokens) anywhere in pkg/agent. The file does not belong to this functional area; it is a shell-security helper. +- **[missing] Compaction archive read-back / restore / tool-result fetch** — No microcompact-undo, no /compact slash command wiring to trigger manual compaction, no diff/restore of archived tool output back into context, no token-accurate budgeting (only bytes/4). Compaction archive is append-only JSONL with no rotation or read-back path. Sticky cap uses a hardcoded 200000-byte window estimate rather than the real model context window. + +## External deps +- google.golang.org/adk v1.2.1-0.20260519122726-f2aee5301649 (go.mod:14) — model.LLM/model.LLMRequest/model.LLMResponse (compaction, dream, session persistence), session.{Service,Session,Event,InMemoryService} (session_store + helpers), and transitively adk/agent/llmagent + adk/runner (runner.go) which owns the session and drives Execute. +- google.golang.org/genai v1.57.0 (go.mod:15) — genai.Content and genai.Part are the message model used throughout compaction.go/compaction_helpers.go/session_store.go/memory_dream.go (21 direct refs in the two compaction files alone). This is the deepest coupling: it IS the conversation data type. +- github.com/firebase/genkit/go v1.8.0 (go.mod:9) — NOT imported by any file in this area directly; enters via pkg/llm.NewAdapter/initGenkit (runner.go:508,511) which produces the model.LLM passed into summarizeRounds/ConsolidateSemantically. Decoupling model.LLM to a native LLMClient removes the transitive Genkit dependency from this area. +- No tokenizer library (tiktoken/BPE) is present anywhere — token counting is the bytes/4 heuristic. Any 1:1 fidelity effort must add a real tokenizer. + +## Coupling notes + +This area is MODERATELY-TO-HEAVILY coupled to Google ADK and transitively to Firebase Genkit. The load-bearing ADK primitives are (1) google.golang.org/genai — genai.Content and genai.Part are the canonical message model threaded through compaction.go, compaction_helpers.go (21 references), session_store.go, and memory_dream.go; CompactContents signature is `func CompactContents(contents []*genai.Content, sessionID string, llm ...model.LLM) []*genai.Content` (compaction.go:49). The deep-copy loop (compaction.go:55-99) is hand-written against genai.Part/FunctionCall/FunctionResponse fields. (2) google.golang.org/adk/model — model.LLM is the summarizer interface (summarizeRounds helpers:212, ConsolidateSemantically dream:303) and model.LLMRequest/LMMResponse are the request/response wrappers. The summarizers are invoked by passing the live delegator's current model (runner.go:134 passes `m`). (3) google.golang.org/adk/session — session.Service, session.Session, session.Event, session.InMemoryService are the entire persistence substrate; PersistentSessionService is literally a session.Service wrapper (session_store_helpers.go:133 interface assertion), SerializedSession embeds []*session.Event and reads sess.State().All()/sess.Events().All(). (4) google.golang.org/adk/agent/llmagent + adk/runner — runner.go:404-430 constructs the agent and runner; CustomRunner.Execute drives adkRunner. Genkit (github.com/firebase/genkit/go v1.8.0) is NOT imported by any file in THIS area directly — it enters via pkg/llm.NewAdapter (runner.go:511 initGenkit) which produces the model.LLM. So Genkit coupling is one hop away, but model.LLM (ADK) is the contract this area speaks. + +DECOUPLING FEASIBILITY: High effort but tractable. The pure-Go pieces (MemoryManager, memory_frontmatter, memory_helpers, memory_agents_sync, migrate_legacy, SystemPromptBuilder, frontmatter/dream gates) are already framework-free — they only use os/strings/regexp and could survive a native rewrite unchanged. The ADK-coupled surface to replace is narrow and well-defined: (a) replace []*genai.Content with a native Message struct {Role string; Parts []Part} where Part is {Text, ToolCall, ToolResult} — this is a mechanical refactor of compaction.go + helpers (the deep-copy, sticky scan, structured extraction, transcript builder) plus session_store.go's event serialization; (b) replace session.Service/Session/Event with a native SessionStore interface (Create/Get/List/Delete/AppendEvent + a serializable Event with Content/Author/Timestamp/Usage) — PersistentSessionService already isolates the JSON layer so the delegate swap is small; (c) replace model.LLM / model.LLMRequest / model.LLMResponse with a native LLMClient interface {Generate(ctx, []Message, opts) -> stream of (Message, error)} used by summarizeRounds and ConsolidateSemantically. None of these require Genkit. The DynamicLLMDelegator (runner.go:65-143) is the seam where compaction plugs in today; a native agent loop would call the same CompactContents(nativeMsgs, sessionID, nativeLLM) before each provider call. The single biggest blocker to a 1:1 Claude Code native loop is that Claude Code uses Anthropic's content-block model (text/tool_use/tool_result) with real token counting via the Anthropic tokenizer — iroha's genai.Content + bytes/4 heuristic diverges from that and would need a native message type + a real tokenizer (tiktoken-go or Anthropic's counting endpoint) for faithful budgeting and auto-compact thresholds. + +## Divergences from Claude Code +- Message model is genai.Content/genai.Part (Google GenAI SDK) not Anthropic content blocks (text/tool_use/tool_result). Tool calls are FunctionCall/FunctionResponse, not Anthropic's tool_use/tool_result block types. A 1:1 port to Anthropic-native shape requires remapping all Part handling. +- No real tokenizer. Token counts are bytes/4 everywhere (session_store.go:193, runner.go:106, session_store_helpers.go:14). Claude Code uses Anthropic's actual token counting for context-window budgeting and the 92%/95% auto-compact thresholds. iroha's 50000-token trigger (runner.go:79) and 200000-byte sticky window (compaction.go:35) are arbitrary byte proxies. +- Compaction is round-count-based (>12 rounds) OR byte-token-based (>50k), triggered inside the model delegator. Claude Code's compaction is token-threshold-based on the real context window with a specific summarization prompt and a restore-on-edit mechanism; iroha has no restore path (archives are append-only and never read back). +- Sticky mechanism is a bespoke '[STICKY]' text marker in content blocks (compaction.go:26) capped at 20% of a hardcoded byte estimate. Claude Code has no public equivalent; it relies on prompt-caching breakpoints and file/snapshot references rather than in-band markers. +- System prompt is re-emitted in full every turn (DynamicLLMDelegator.GenerateContent runner.go:118-124 calls SystemPromptBuilder.BuildWithPrompt each call) and only uses a string-hash 'cached:' comment marker (prompt.go:87) as a pseudo-cache hint — it does NOT use Anthropic's actual prompt-caching cache_control breakpoints. Claude Code relies on provider-side cache_control with explicit breakpoints. +- Memory model (user/feedback/project/reference .md files with YAML frontmatter + AGENTS.md mirror) is iroha-specific, not Claude Code's CLAUDE.md-only convention. The Dream consolidator (dedup + LLM semantic merge + PID lock + 7 gates) has no Claude Code equivalent; Claude Code does not auto-merge memories. +- Token/cost accounting is a rough $2/M placeholder (session_store_helpers.go:22) independent of model; Claude Code computes per-model cost from real usage metadata. +- memory_dream.go:51 isProcessAlive uses syscall.Signal(0) — UNIX/macOS only; not portable to Windows (matches the darwin-only env but diverges from Claude Code's cross-platform support). +- prompt.go:307 sanitizeADKStatePlaceholders escapes {var} and {app:name}/{user:name} patterns to '{name /* literal */}' — an ADK-template-injection guard that only exists because ADK does Go-template substitution in instructions; a native loop would not need this and it is dead weight / a divergence from Claude Code's plain-text system prompt. + +## Quality notes + +Code quality is generally solid and well-logged (structured LogInfo/LogWarn/LogError/LogAudit throughout). Memory subsystem (memory.go, memory_frontmatter.go, memory_agents_sync.go, memory_helpers.go, migrate_legacy.go) is framework-free, tested, and cleanly separated — the easiest part to preserve verbatim in a native rewrite. Compaction is functional but has rough edges: the sticky cap uses a magic 200000-byte constant rather than the real context window; the deep-copy is hand-rolled and will silently drop any Part field ADK adds later (only Text/InlineData/FunctionCall/FunctionResponse copied); summarizeRounds swallows LLM errors by `break`-ing and falling through to extraction without incrementing the circuit breaker (compaction_helpers.go:286), so transient LLM failures do not trip the breaker — only empty/zero output does. memory_dream.go ConsolidateSemantically deletes originals before validating LLM JSON fully (dream:350-353 deletes list, then saves items); if mm.Save fails partway, memories are lost — not transactional. session_store SaveSession reads GlobalPermissionManager and os.Getwd() at save time, coupling persistence to global state. tokenizer.go is misnamed and misplaced (shell tokenizer in the context-memory area) and should be relocated. sanitizeADKStatePlaceholders (prompt.go:307) is an ADK-specific wart that would vanish in a native loop. Tests exist for compaction (compaction_test.go, compaction_helpers_test.go, compaction_ext_test.go), memory (memory_test.go, memory_ext_test.go), and session_store (session_store_test.go). diff --git a/docs/claude-code-architecture/audit/A5-mcp-subagent-team-skills.md b/docs/claude-code-architecture/audit/A5-mcp-subagent-team-skills.md new file mode 100644 index 0000000..574e840 --- /dev/null +++ b/docs/claude-code-architecture/audit/A5-mcp-subagent-team-skills.md @@ -0,0 +1,78 @@ +# Audit: A5-mcp-subagent-team-skills + +## Files audited + +- pkg/agent/mcp.go +- pkg/agent/mcp_client.go +- pkg/agent/mcp_oauth.go +- pkg/agent/mcp_transport_http.go +- pkg/agent/subagent.go +- pkg/agent/pool.go +- pkg/agent/team.go +- pkg/agent/team_message.go +- pkg/agent/team_process.go +- pkg/agent/team_types.go +- pkg/agent/skills.go +- pkg/agent/plugin.go +- pkg/agent/task.go +- pkg/agent/todo_manager.go +- pkg/agent/cron.go +- pkg/agent/background.go +- pkg/agent/worktree.go +- pkg/agent/runner_confirmation.go (adkRunnableTool interface + blockingConfirmationTool, followed) +- pkg/agent/tools.go (ToolRegistry/functiontool surface, followed) +- pkg/agent/runner.go:370-440 (root runner construction, followed for parity) +- pkg/llm/adapter.go:54 (NewAdapter signature, followed) + +## Capabilities +- **[implemented] MCP server discovery + lifecycle** — pkg/agent/mcp.go: MCPToolRouter singleton with LoadAndStartPlugins (reads .iroha/plugins.json, migrates from .go-claude, scans skill dirs for per-skill plugins.json, merges PluginManager servers+hooks), DiscoverTools (calls tools/list per client, wraps each as DynamicMCPTool named mcp____), ListServers, CloseAll. Supports stdio (MCPClient) + HTTP (HTTPTransport via NewMCPTransport). Real JSON-RPC 2.0 over child process stdin/stdout with initialize handshake + notifications/initialized. 10s per-call timeout. NOTE: LoadAndStartPlugins always uses NewMCPClient (stdio) directly — it does NOT route through NewMCPTransport, so URL-based HTTP servers in plugins.json are NOT actually started as HTTP; the transport factory exists but is not wired into plugin loading. +- **[implemented] MCP stdio JSON-RPC client** — pkg/agent/mcp_client.go: hand-rolled JSON-RPC 2.0 client over exec.Cmd pipes, pending-request map keyed by int64 id, readLoop goroutine, SendNotification, Call with 10s timeout. Protocol version pinned to 2024-11-05 (older). No resource/prompt subscriptions, no sampling, no cancellation, no logging notifications handled. +- **[partial] MCP HTTP streamable transport** — pkg/agent/mcp_transport_http.go: HTTPTransport implements Streamable HTTP — POST with Accept: text/event-stream, captures Mcp-Session-Id header, DELETE on Close, parseSSEResponse extracts first 'data:' line. Only reads the FIRST SSE event (no multi-event/progress streaming). StdioTransport wraps MCPClient. MCPTransport interface defined but, as noted above, not used by the router. +- **[partial] MCP OAuth2 + PKCE** — pkg/agent/mcp_oauth.go: OAuthConfig/Token structs, PKCE S256 verifier+challenge generation, manual-copy StartOAuthFlow (prints URL, reads code via fmt.Scanln), RefreshToken, StoreToken/LoadToken to ~/.iroha/tokens/.json (0600), IROHA_MCP_TOKEN env bypass. OOB redirect (urn:ietf:wg:oauth:2.0:oob). Token storage exists but is NOT plumbed into MCPClient/HTTPTransport — no code calls LoadToken to attach a Bearer header, and StartOAuthFlow is never invoked from the router. OAuth is a standalone utility, not integrated into the MCP connect path. +- **[implemented] Subagent synchronous execution** — pkg/agent/subagent.go SubagentManager.RunSubagent: 6 typed agents (explore/planner/reviewer/researcher/executor/work). Executor+work get a git worktree (GlobalWorktreeManager) cleaned up via defer Closeout; read-only types run in parent CWD. Toolsets curated by GetToolsForType (pool.go) with allowedToolsByType allowlist. Default model overridden to a cheap/fast per-provider model unless spec.ModelName set. Synchronous: blocks iterating subRunner.Run events, writes JSONL log to .iroha/subagents/logs, then git status --porcelain to derive FilesCreated/FilesEdited. DIVERGES from Claude Code: subagent has its OWN in-memory session (not parent session), no stream/interleaving with parent context, no tool-result relay, model is forced cheap (haiku/flash/4o-mini) rather than honoring parent model. +- **[implemented] Subagent typed tool curation + prompt prefixes** — pool.go GetToolsForType + TypePromptPrefix: typePromptTemplates and allowedToolsByType maps. explore/planner/reviewer/researcher restricted to read-only tool names (file_read/list_directory/search_grep/find_files). executor/unknown get all tools. Curated by exact tool-name string match, not capability tags. +- **[implemented] Team manager + YAML agent discovery** — team.go: TeamManager singleton, .team/config.json persistence, roster CRUD (RegisterTeammate/GetTeammate/ListTeammates), plus loadYAMLAgents which scans .iroha/agents/ and .claude/agents/ for YAML-frontmatter .yaml/.yml/.md agent definition files (parseAgentDefinitionFile). Matches Claude Code's .claude/agents convention. +- **[implemented] Team inbox messaging** — team_message.go: AppendToInbox / ReadAndClearInbox / PeekInbox against .team/inbox/.jsonl, Broadcast to all teammates. team_process.go StartTeammateLoop polls inbox every 2s, calls ProcessMessage callback, replies to sender's inbox, updates status idle/working. This is a polling inbox model, NOT the automatic-delivery + idle-notification model of Claude Code teams (real CC delivers messages to the running agent turn and emits idle notifications). +- **[partial] Team process isolation + IPC + watchdog** — team_process.go: EnableProcessIsolation sets isolationMode + binaryPath + NewIPCBridge over unix sockets; StartTeammateProcess spawns child via Watchdog (3 crashes / 60s budget), Recover() restores checkpoint, handleIPCMessage routes message/task_complete/heartbeat/shutdown, heartbeatChecker flags stale after 45s, RunTeammateMode is the child-side entrypoint (--teammate/--socket flags). Substantial but only 'message'/'task_assign'/'task_complete' message types — no structured protocol-response/plan-approval/shutdown_request JSON message types that real Claude Code teams use. +- **[partial] Skill discovery + matching** — skills.go SkillManager: discovers ~/.iroha/skills/ + .iroha/skills/ (project overrides global by ID), skill.json manifest (id/name/description/triggers/tags/instructions_file/type). 3 types: model_invoked (keyword substring match), user_invoked (/skill slash), always (system prompt). LoadInstructions reads SKILL.md with path-escape guard (prefix check on absBase). MatchTriggers is naive case-insensitive substring, not Claude Code's model-driven progressive disclosure (real CC uses the model to decide skill loading and SKILL.md body is injected on demand). Skill body is loaded but injection into the running prompt loop is handled elsewhere (prompt.go), not verified here to follow CC's on-demand progressive disclosure. +- **[implemented] Plugin manifest discovery** — plugin.go PluginManager: discovers ~/.iroha/plugins/*/plugin.json + project, ValidateManifest (id regex, no __, semver), MergeMCPServers (namespaced pluginID__name), MergeHooks. MigratePluginsConfig for legacy flat config. Pure manifest layer; no plugin sandboxing, signature verification, dependency resolution, or marketplace. +- **[implemented] Task DAG manager** — task.go: .tasks/.json persistence, SaveTask does bidirectional ReconcileEdges (auto-creates placeholders for missing refs, rebuilds Blocks/blockedBy from active edges) + DFS 3-color CheckCycles with rollback on cycle. ListTasks excludes deleted, sorted by ID. ResolveTasksDir prefers local .tasks with write-test, falls back to ~/.iroha/tasks (with .go-claude migration). Matches Claude Code TaskCreate/TaskUpdate semantics closely (subject/status/blockedBy/blocks/owner). Owner field is 'agent'|'user' but no per-agent ownership enforcement like CC's owner assignment. +- **[implemented] TodoWrite session plan** — todo_manager.go GlobalTodoManager: Update validates max 12 items, status enum, single in_progress; GetItems/NoteRoundWithoutUpdate/RoundsSinceUpdate/ResetRounds (round-staleness tracking for reminders); Render with ANSI colored checkbox + completed count. In-memory only (no persistence), unlike CC's per-task-list persistence. Maps to CC TaskCreate but lacks the metadata/owner/blockedBy richness of task.go. +- **[implemented] Cron scheduler** — cron.go GlobalCronScheduler: hand-rolled cron (cronMatches + computeJitter), 5-field validation, recurring vs one-shot, durable (.iroha/scheduled_tasks.json) vs session-only, file-lock CronLock so only one process fires, checkLoop ticks every 5s deduped by minute, 7-day auto-expiry, DetectMissedTasks (catch-up capped at 24h), DrainNotifications, jitter for :00/:30 crons. Jitter is applied by shifting the check time, not the fire time. DIVERGES from CC: prompts never auto-execute as a turn — they only queue as ScheduledNotification for the UI/runner to drain; CC scheduled tasks fire as enqueued prompts while REPL idle. +- **[implemented] Background task lanes** — background.go GlobalBackgroundManager: Run/RunContext spawns sh -c in goroutine via WrapSandboxCommand, 300s timeout+kill, output to .runtime-tasks/.log (capped 50KB), preview, persist .json per task, loadPersistedTasks on startup, Check (single or all), ListTasks sorted desc, DrainNotifications, DetectStalled. NotifQueue is in-memory (lost on crash unless reloaded from persisted status). Maps to CC run_in_background but notification delivery to the active turn is poll-based, not the re-invocation CC uses. +- **[implemented] Git worktree manager** — worktree.go GlobalWorktreeManager: git worktree add -b wt/ into .worktrees/, index.json registry + events.jsonl lifecycle log, Create/Closeout(keep|remove)/Enter/List, branch -D on remove, cascades task status to in_progress/completed when TaskID bound. EnterWorktree-style interactive session switching (CC's EnterWorktree/ExitWorktree tool) is NOT implemented — only Enter (timestamp update). +- **[missing] Team tool surface (TeamCreate/TeamDelete/EnterWorktree)** — Real Claude Code exposes TeamCreate/TeamDelete/EnterWorktree/ExitWorktree/TaskGet/TaskList/TaskUpdate/CronCreate/CronList/CronDelete as first-class tools. Here they exist only as internal managers; only fragments are surfaced as tools (tools_team.go, tools_worktree.go, tools_schedule.go, tools_task.go exist but the manager APIs substantially exceed what is exposed). + +## External deps +- google.golang.org/adk/tool (tool.Tool, tool.Context) — load-bearing in mcp.go, subagent.go, pool.go, runner_confirmation.go as the tool interface +- google.golang.org/adk/model (model.LLMRequest) — used in DynamicMCPTool.ProcessRequest and blockingConfirmationTool.ProcessRequest to register function declarations into req.Config.Tools / req.Tools map +- google.golang.org/adk/agent + google.golang.org/adk/agent/llmagent — llmagent.New + agent.RunConfig{StreamingMode} construct every sub-agent runner (subagent.go:155, pool.go:136) +- google.golang.org/adk/runner — runner.New + Runner.Run event iterator is the execution loop for subagents and team teammates (subagent.go:166-188, pool.go:147-187) +- google.golang.org/adk/session — session.InMemoryService() used per-subagent (no persistence) (subagent.go:165, pool.go:146) +- google.golang.org/genai — genai.Content / genai.Part / genai.FunctionDeclaration / genai.Tool / genai.GenerateContentConfig are the message+schema vocabulary throughout (mcp.go, subagent.go, pool.go, runner_confirmation.go) +- github.com/firebase/genkit/go/genkit — *genkit.Genkit threaded through AgentPool.GenkitRegistry into llm.NewAdapter; only consumed inside the llm package (Claude-via-Genkit + Gemini paths), never used directly in this area's logic +- gopkg.in/yaml.v3 — YAML frontmatter parsing for .claude/agents/* and .iroha/agents/* agent definitions (team.go) +- github.com/google/uuid — task/background/cron IDs (task is int-id; uuid used in background.go:98, cron.go:118) +- Standard library only for the decoupled managers: net/http, os/exec, encoding/json, sync, crypto/rand, crypto/sha256 (OAuth PKCE), path/filepath, bufio (stdio + SSE parsing) + +## Coupling notes + +This area splits cleanly into two coupling tiers:\n\n(A) FULLY DECOUPLED — no ADK/Genkit dependency: task.go, todo_manager.go, cron.go, background.go, worktree.go, skills.go, plugin.go, team.go, team_message.go, team_types.go, team_process.go (except it references Watchdog/IPCBridge which are also pure-Go), mcp_oauth.go, mcp_transport_http.go, and the entire stdio MCPClient in mcp_client.go. These are plain Go (os, exec, net/http, encoding/json, sync) and already mirror a native architecture. They can be lifted out with zero ADK work.\n\n(B) ADK-COUPLED via the tool/agent/runner/session surface — concentrated in exactly 3 files: mcp.go, subagent.go, pool.go. The load-bearing ADK/Genkit primitives are:\n - mcp.go: imports google.golang.org/adk/tool, google.golang.org/adk/model, google.golang.org/genai. DynamicMCPTool implements the adkRunnableTool interface (Name/Description/IsLongRunning + Declaration()*genai.FunctionDeclaration + Run(tool.Context, any)(map[string]any,error) + ProcessRequest(tool.Context, *model.LLMRequest)). This is the SOLE coupling point for MCP tool exposure — the MCP transport/client layer itself is framework-free; only the 'wrap discovered MCP tool as a runnable ADK tool' adapter is ADK-specific.\n - subagent.go + pool.go: heavy coupling. They call llm.NewAdapter (returns model.LLM — pkg/llm/adapter.go signature takes *genkit.Genkit), llmagent.New(llmagent.Config{Name/Instruction/Model/Tools}), session.InMemoryService(), runner.New(runner.Config{AppName/Agent/SessionService/AutoCreateSession}), then subRunner.Run(ctx, userID, sessionID, *genai.Content, agent.RunConfig{StreamingMode}). Tools are wrapped in blockingConfirmationTool (which embeds tool.Tool and re-implements the same adkRunnableTool interface + ProcessRequest to overwrite req.Tools map). The runnerHooks{} struct is passed to NewAdapter as AdapterHooks.\n\nNative replacement requirement: introduce a single small Tool interface (Name()/Description()/Declaration()->schema/Run(ctx,args)->(map,err)) to replace the adkRunnableTool interface used in mcp.go:228, runner_confirmation.go:21, pool.go, subagent.go — DynamicMCPTool becomes framework-agnostic. Then replace the subagent/team execution path (llmagent.New + runner.New + session.InMemoryService + Run over events) with a native agent loop (provider-agnostic message list + tool-call dispatch) — subagent.go:155-203 and pool.go:131-203 are the only two call sites that construct an ADK runner for a sub-agent. The Genkit dependency enters ONLY through llm.NewAdapter's *genkit.Genkit param (used solely for the Claude-via-Genkit and Gemini paths; the OpenAI/Anthropic-direct paths pass g==nil and already bypass Genkit), so decoupling llm.Adapter from model.LLM is the shared prerequisite across areas A3/A4 and this one.\n\nNet: ~85% of this area's lines are already framework-free. The decoupling work is narrowly scoped to (1) the DynamicMCPTool wrapper (mcp.go:228-283) and (2) the two sub-runner construction blocks in subagent.go and pool.go. No Genkit APIs are used directly inside this area's files except via the llm package. + +## Divergences from Claude Code +- MCP HTTP transport + OAuth token storage exist as standalone utilities but are NOT wired into the plugin router: LoadAndStartPlugins (mcp.go:87) always constructs NewMCPClient (stdio), ignoring config.URL, and never calls LoadToken/StoreToken — so HTTP and OAuth-protected MCP servers effectively cannot connect. Real Claude Code supports streamable-HTTP MCP servers and OAuth from .mcp.json. +- MCP protocol version is pinned to 2024-11-05 (mcp_client.go:106, mcp_transport_http.go:81); real CC uses the 2025-06-18 revision with newer capabilities (elicitation, structured tool output, resource links). +- Subagents default to a CHEAP model (haiku/flash/4o-mini) per-provider (subagent.go:134-144) unless overridden; real Claude Code spawns subagents with the parent's model (or an explicitly chosen one), not a forced downgrade. +- Subagents run with an isolated in-memory session and DO NOT interleave with the parent's session/context — there is no parent->child context handoff, no automatic return of the full tool-call transcript, only the accumulated text Summary + git-derived file lists. CC subagents return a structured handoff and their tool calls are visible to the parent. +- Team inbox is a polled JSONL mailbox (2s ticker, team_process.go:36); CC's native teams deliver messages into the running agent turn and emit idle notifications — not a poll-and-clear loop. No structured protocol JSON message types (protocol-response, plan-approval, shutdown-request) are implemented. +- Skills use naive case-insensitive substring trigger matching (skills.go:160) and load the SKILL.md body eagerly via LoadInstructions; CC uses model-driven progressive disclosure where the model decides when to expand a skill body, and triggers are far richer than substring. +- Scheduled cron tasks never auto-fire as an agent turn — they only append to an in-memory notifQueue drained by the host (cron.go:336). CC scheduled tasks fire as enqueued prompts while the REPL is idle. +- Background tasks notify via an in-memory queue (background.go:220) rather than re-invoking the agent turn on completion as CC does. +- Worktree manager has no EnterWorktree/ExitWorktree interactive session-switching tool (only Enter = timestamp bump); CC has first-class worktree session entry/exit. +- Owner assignment on TaskRecord is a free string ('agent'/'user', task.go:111) with no enforcement of per-agent ownership or claim semantics that CC's TaskUpdate owner field provides. +- MCP tool result is parsed as map[string]any and returned directly (mcp.go:259); CC normalizes MCP tool results (content blocks, is_error, structured output) into its native tool-result format — here any non-object JSON result would error. +- stdio MCP stderr is silently discarded (mcp_client.go:92-95 'Discard/log') with no capture, making server debugging impossible. + +## Quality notes + +Code quality is generally solid: thread-safe (sync.RWMutex everywhere), durable persistence with migration from legacy .go-claude paths, and good separation (transport/client/router layers in MCP; managers are singletons). task.go's ReconcileEdges + CheckCycles with rollback is genuinely well-engineered. Weak spots: (1) substantial dead/separated code — NewMCPTransport/HTTPTransport/OAuth are implemented but not wired into the router, so URL/OAuth MCP servers silently fall back to stdio and fail; (2) no integration tests exercise real MCP servers, HTTP transport, or process-isolated teammates end-to-end against a live binary (test files exist but are mostly unit-level); (3) error handling swallows failures with `continue` in LoadAndStartPlugins (mcp.go:90,124,144) making misconfigurations invisible; (4) MCPClient has no reconnect, no request cancellation, hard-coded 10s timeout; (5) team_process.go's IPC + Watchdog path is complex and lightly tested. For a 1:1 refactor: the decoupled managers (task/todo/cron/background/worktree/skills/plugin/team-inbox) are essentially already native Go and need little change; effort concentrates on the 3 ADK-coupled files and on wiring the currently-orphaned HTTP/OAuth transport into the router. diff --git a/docs/claude-code-architecture/audit/A6-tui-llm-config.md b/docs/claude-code-architecture/audit/A6-tui-llm-config.md new file mode 100644 index 0000000..0f24735 --- /dev/null +++ b/docs/claude-code-architecture/audit/A6-tui-llm-config.md @@ -0,0 +1,86 @@ +# Audit: A6-tui-llm-config + +## Files audited + +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/app.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/model.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/view.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component_chat.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component_input.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component_confirm.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component_status.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component_screens.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/component_slash_menu.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/input.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/history.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/renderer.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/raw_input.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/styles.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/interfaces.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/focus.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/tui/wrap.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/adapter.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/anthropic.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/openai.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/genkit_adapter.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/retry.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/helpers.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/llm/debuglog.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/config/config.go +- /Users/akiwayne/Documents/Project2026/go-project/go-claude/pkg/agent/runner.go (cross-package, for coupling verification) + +## Capabilities +- **[implemented] Custom retained-mode App event loop (non-Bubble-Tea)** — app.go is a hand-rolled Bubble-Tea-style loop (NOT Bubble Tea itself). RunApp() (app.go:662) wires: raw key reader goroutine (raw_input.go ReadRawKeys), agent.Bridge.PromptChan + agent.ToolBridge.StatusChan bridge goroutines, a 100ms spinner ticker, all fanned into one `eventChan`. HandleEvent dispatches typed messages (StreamTextMsg/ToolStatusMsg/ConfirmationRequiredMsg/AgentErrorMsg/AgentDoneMsg/StartupPromptMsg) and Key. This is the load-bearing loop and it is fully implemented, no stubs. +- **[implemented] Component model (Component interface + BaseComponent)** — app.go NewApp wires 6 components (chat/input/confirm/status/slash/screens) via callback fields. activeComponents() (app.go:238) dispatches input in priority order. notifyStateChange() (app.go:357) propagates the 7 TuiState transitions (statePrompt..stateSessionSelect, model.go). +- **[implemented] Input editing, multiline (Alt-Enter), history** — InputComponent handles runes, cursor, backspace, alt-enter newline, tab, history nav (HistoryManager in input.go). KeyEsc closes slash menu. Multi-line wrap via WrapInput(). Submit path: OnSubmit -> App.executePrompt (app.go:419) -> runner.Execute. +- **[implemented] Raw terminal input parsing (ANSI/SGR mouse/UTF-8)** — raw_input.go parseBytes() decodes Ctrl-C/D/Y, backspace, tab, enter, arrow keys, Shift+Tab, PgUp/PgDn (\x1b[5~/6~), and SGR mouse wheel (parseSGRMouse). IROHA_ENABLE_MOUSE toggles \x1b[?1000h. Ctrl+Y is declared in KeyType but the 'copy last response' handler is NOT wired anywhere in app.go handleKey (missing feature vs help text claim in view.go:808). +- **[implemented] Flicker-free differential renderer** — renderer.go Draw() does synchronized output (\x1b[?2026h), cursor-up diff, clear-to-EOL, trailing-line cleanup, and hardware cursor positioning for IME alignment. Reset() clears state on exit. +- **[implemented] Glamour markdown rendering with width-keyed cache + stream memoization** — view.go RenderMarkdownWithWidth caches glamour.TermRenderer by width (rendererCache). App.renderStreamedMarkdown (app.go:250) additionally memoizes the rendered string per (text,width) so it only re-parses when streamedText changes during streaming ticks. Compact style derived from DarkStyleConfig to avoid line-padding blocks. +- **[implemented] History viewport with caching, scroll, and local compaction** — HistoryStore (history.go) supports scrollOffset, renderedCache keyed by entry index, Compact() (replaces older entries with a RoleSystem summary, keeps recent verbatim), Search(), PageUp/Down. RenderWithTail composes transient stream/tool/confirm tail into the timeline. +- **[implemented] Human-in-the-loop confirmation card with inline edit mode** — component_confirm.go implements Y/N/Always/Edit/? card with its own editBuffer (separate from InputComponent buffer, mediated by FocusModel in focus.go). Responds go to agent.Bridge.ResponseChan (app.go:377). Edit mode extracts command/content/path from activeToolArgs. +- **[implemented] Slash command system (~22 commands) + autocomplete menu** — handleRawSlashCommand (app.go:793) dispatches ~22 commands including /permission, /mode, /rules, /hooks reload, /memory reload, /compact, /context (token estimate dashboard), /prompt, /sections, /sessions, /resume, /team, /worktree, /bg, /skill[s], /mcp reload, /stats. SlashMenuComponent does prefix filtering. /trace is a stub reply ('live timeline rendering is not wired', app.go:1094). +- **[implemented] Permission mode + session picker screens** — component_screens.go renderPermissionScreen/renderSessionScreen. Sessions come from agent.GlobalSessionService.ListSavedSessions (app.go:521). loadHistoryFromSession (app.go:543) replays session events into a fresh HistoryStore by reconstructing user/agent turns. +- **[implemented] Status bar (mode/tokens/cost/spinner/status-tag)** — component_status.go shows mode, token count (k-notation), cost ($), running tool activity+duration, thinking state. SetTokenUsage fed from finalizeTurn() (app.go:496) via runner.GetTokenUsage() + config.EstimateCost. +- **[implemented] Streaming accumulation + incremental status-tag scan** — StreamTextMsg handler (app.go:138) only scans the new chunk for statusTagRe (^[status:...]) with a 50-byte tail-window fallback for cross-chunk tags, avoiding O(n) regex on full accumulated text each tick. +- **[implemented] Direct HTTP adapters (Anthropic + OpenAI-compatible)** — AnthropicAdapter (anthropic.go) and OpenAICompatibleAdapter (openai.go) both implement model.LLM.GenerateContent returning iter.Seq2[*model.LLMResponse,error], parse SSE streams, map genai.Content<->provider messages, track cumulative tokens, support nag-reminder injection + SetSystemPrompt. These are real, working, non-Genkit adapters. +- **[implemented] Genkit model adapter** — genkit_adapter.go GenkitModelAdapter.GenerateContent maps ADK LLMRequest -> ai.Message list + ai.GenerateOption, calls genkit.GenerateStream / genkit.Generate, and re-yields as model.LLMResponse. Tool wrappers use a no-op executor (return nil,nil) because ADK runner handles execution. Required only for ProviderGemini (Claude now falls back to direct AnthropicAdapter when genkit is nil). +- **[implemented] Retry with budget, backoff, Retry-After, classification** — retry.go: ConsumeRetry session budget (default 10, IROHA_MAX_RETRIES/CLAUDE_CODE_MAX_RETRIES), RetryDelay exponential backoff capped 60s with Retry-After header parsing, IsRetryableHTTPStatus (408/429/5xx), IsRetryableTemporaryError string classifier, RetryNotice() emits a user-visible model.LLMResponse chunk. Both adapters integrate budget + RetryNotice. +- **[implemented] Output-truncation surfacing (s11 error recovery)** — max_tokens (Anthropic 'max_tokens' / OpenAI 'length') yields a truncation warning chunk (anthropic.go:465, openai.go:491). helpers.go CollectNonStreaming provides a non-streaming convenience collector. +- **[implemented] Config load/save + provider defaults + wizard + pricing** — config.go LoadConfig reads ~/.iroha.json with auto-migration from legacy ~/.go-claude.json, auto-detects provider from model name prefix. ProviderDefaults table covers glm/openai/claude/deepseek/kimi/siliconflow. SaveConfig writes 0600. RunConfigWizard is a 5-step interactive setup. EstimateCost uses ModelPricingMap with 85/15 input/output split. No ADK/Genkit dependency here. +- **[partial] AgentRunner/BridgeResponder interfaces (test seam)** — interfaces.go AgentRunner.Execute signature takes onEvent func(*session.Event). app.go OnEvent reads ev.LLMResponse.Content.Parts. loadHistoryFromSession uses session.GetRequest and iterates resp.Session.Events().All(). This is the ONLY direct ADK coupling in TUI and it is narrowly scoped to event/session shape. +- **[missing] Ctrl+Y copy-last-response** — Declared in KeyType (raw_input.go:38) and advertised in RenderHelpDashboard (view.go:808 'Copy last AI response to system clipboard') but NO handler exists in app.go handleKey(). Dead/advertised-only. +- **[stub] /trace live timeline** — Declared as a slash command with an honest stub reply ('live timeline rendering is not wired into the TUI yet', app.go:1094). No actual trace UI. +- **[missing] Model hot-swap command (/model)** — No --model flag switching UI path; SwitchModel exists on the runner (runner.go:504) but is not reachable from any TUI slash command (only /permission changes mode). Model switching is startup-time only. + +## External deps +- github.com/charmbracelet/lipgloss — TUI styling (styles.go, view.go, all components) +- github.com/charmbracelet/glamour + glamour/ansi + glamour/styles — markdown rendering (view.go) +- github.com/charmbracelet/x/ansi — ANSI strip/width/cut helpers (view.go, wrap.go) +- github.com/muesli/termenv — color profile detection (renderer.go) +- golang.org/x/term — raw mode terminal control (raw_input.go, app.go UpdateWidth) +- github.com/google/uuid — session ID generation (app.go handleNewSession) +- google.golang.org/adk/session — session.Event, session.GetRequest, Session.Events().All() — ONLY in app.go + interfaces.go, used for event delivery and history replay +- google.golang.org/adk/model — model.LLM interface, model.LLMRequest, model.LLMResponse — the streaming contract for ALL 3 llm adapters + helpers.go + retry.go RetryNotice return type +- google.golang.org/genai — genai.Content, genai.Part, genai.FunctionCall, genai.FunctionResponse, GenerationConfig, FunctionDeclarations — the canonical message/tool wire types used across anthropic.go, openai.go, genkit_adapter.go, retry.go +- github.com/firebase/genkit (+ /ai, /core/api, /plugins/anthropic, /plugins/googlegenai) — Genkit registry + plugins; only load-bearing for ProviderGemini and optional for Claude. Imported by genkit_adapter.go and runner.go (initGenkit). + +## Coupling notes + +DECISION: This area CAN be decoupled from ADK, and the coupling is far narrower than it looks. TUI is ~95% framework-free; LLM is the load-bearing ADK dependency; config is 100% framework-free.\n\nTUI coupling (4 touchpoints only, all in app.go + interfaces.go):\n1. `import google.golang.org/adk/session` (app.go:17, interfaces.go:6). Used as the type of `OnEvent func(*session.Event)` and in `loadHistoryFromSession` via `session.GetRequest` + `resp.Session.Events().All()` (app.go:547-559) and `ev.LLMResponse.Content.Parts` (app.go:695-701, 588-602). Native replacement: define a local `type AgentEvent struct { Text string; ToolCall *ToolCallInfo; IsFinal bool }` and have the runner translate ADK events into it before calling OnEvent. Session replay becomes a local (sessionID -> []Turn) loader. ~3 hours of work, mechanical.\n\nLLM coupling (load-bearing, harder):\n- The package signature is `model.LLM` (google.golang.org/adk/model), whose contract is `GenerateContent(ctx, *model.LLMRequest, bool) iter.Seq2[*model.LLMResponse, error]`. ALL three adapters (anthropic.go:153, openai.go:134, genkit_adapter.go:66) implement this exact signature. The data types are google.golang.org/genai: `*genai.Content`, `*genai.Part`, `*genai.FunctionCall`, `*genai.FunctionResponse`, `req.Config.Tools[].FunctionDeclarations`, `req.Config.SystemInstruction`. These genai types are the wire format the runner, tools.go, and compaction code all speak.\n- Native replacement requires defining local equivalents (LLMRequest{SystemPrompt; Contents []Content{Role; Parts []Part{Text, ToolCall, ToolResult}}; Tools []ToolSchema; Temperature; MaxTokens} and LLMResponse{Parts; Partial; TurnComplete; Usage}) and a local `Model interface { GenerateContent(ctx,*Request,bool) iter.Seq2[*Response,error]; Name() string }`. The direct HTTP adapters (anthropic.go, openai.go) already do all the real work and would translate cleanly — they only use genai as an in-memory struct shape. ~1 day to define the types + rewrite 3 adapters' signatures + update runner.go/delegator/tools to the new types.\n- Genkit (firebase/genkit) is the heavier dependency: genkit_adapter.go imports `github.com/firebase/genkit/go/ai` and `/genkit`, and runner.go imports `genkit.Init`, `plugins/anthropic`, `plugins/googlegenai`. genkit_adapter.go uses `ai.NewSystemMessage`, `ai.NewMessage`, `ai.RoleUser/Model/System/Tool`, `ai.NewTool`, `ai.GenerateOption`, `genkit.GenerateStream`, `genkit.Generate`. It is ONLY reached for ProviderGemini (and Claude-with-genkit, which is optional). Dropping Genkit entirely is viable IF Gemini support is dropped or reimplemented via the google generative-ai Go SDK directly; the direct AnthropicAdapter already covers Claude. Without Genkit, ProviderGemini returns an error (adapter.go:79).\n\nCONFIG coupling: zero. config.go uses only stdlib (encoding/json, os, path/filepath, bufio, strings). Framework-free already.\n\nBOTTOM LINE: ADK/Genkit are used as (a) an event/session envelope shape and (b) a streaming model interface contract — neither is doing essential algorithmic work that the direct HTTP adapters don't already do. A native rewrite = define local event/request/response/tool types, port the 2 direct adapters to those types, port session replay to a local struct, and decide Gemini's fate. Estimated effort for this area alone: ~2-3 days. No behavioral reimplementation needed; it's a type-migration. + +## Divergences from Claude Code +- Not Bubble Tea: iroha implements its own retained-mode event loop + differential renderer instead of Bubble Tea's Model/Update/View. This diverges from how most Go Claude Code replicas are built and re-implements viewport/scroll/cursor logic that Bubble Tea gives for free. +- Ctrl+Y 'copy last response' is advertised in /help (view.go:808) and parsed in raw_input.go but has no handler — real Claude Code and any honest UI would wire or remove it. +- /trace is a stub reply admitting it is not wired, while real Claude Code surfaces a live tool-call timeline. +- Local-only history compaction (/compact) summarises by role-counting + 240-char excerpts (history.go:161) rather than calling the LLM to summarise — diverges from Claude Code's model-driven compaction. +- Context estimate (/context, app.go:1142) is a static heuristic (chars/4, hooks*80 tokens, servers*120 tokens) not a real tokenizer; Claude Code reports real token counts. +- The LLM adapters hardcode MaxTokens:8192 for Anthropic (anthropic.go:247) and ignore req.Config.MaxOutputTokens for the direct Anthropic path — real Claude Code uses the configured max_tokens. +- Provider/model switching is startup-only; no live /model slash command, unlike Claude Code's /model. +- Session replay (loadHistoryFromSession) reconstructs turns by walking ADK session events and concatenating text parts — tool calls/results in history are not faithfully reconstructed into the timeline, so resumed sessions lose tool-card fidelity. +- Retry budget is global per-process (retryBudget package var) and not reset per session in the TUI flow, so a long-running session silently exhausts its retry budget across many turns. +- Status-tag injection (`[status:...]` regex, model.go:48) is an iroha-specific convention to surface LLM self-reported status into the status bar — not a Claude Code concept. + +## Quality notes + +The code is clean, idiomatic Go with thoughtful performance work: renderer caching by width (view.go rendererCache), per-text stream render memoization (app.go renderStreamedMarkdown), incremental status-tag scanning that only regexes the new chunk (app.go:141-153), and history render caching keyed by entry index (history.go). The component model is genuinely decoupled via callback fields (no App back-references). Tests are extensive (~25 _test.go files across tui/llm/config including table-driven tests and a 54k coverage_boost_test.go). Real risks: (1) the direct Anthropic adapter hardcodes MaxTokens=8192 and ignores req.Config.MaxOutputTokens (anthropic.go:247), so the truncation handler at line 465 will fire at 8192 regardless of config; (2) genkit_adapter tool wrappers are no-op executors (genkit_adapter.go:201 return nil,nil) relying on ADK runner to execute — fine under ADK but a trap if decoupling leaves dangling no-op tools; (3) retryBudget is a package-level global with no per-session reset hooked into the TUI new-session flow; (4) two parallel render paths exist — ChatComponent.Render (component_chat.go:146) appears legacy/unused since App.Render only calls RenderTail, creating dead code. diff --git a/docs/claude-code-architecture/gap-analysis.md b/docs/claude-code-architecture/gap-analysis.md new file mode 100644 index 0000000..090917f --- /dev/null +++ b/docs/claude-code-architecture/gap-analysis.md @@ -0,0 +1,165 @@ +# Gap Analysis — iroha (go-claude) vs Claude Code + +Organized by the three architectural clusters. Status legend: ✅ present & faithful · 🟡 partial · 🔴 missing · ⚠️ divergent (present but wrong). + +The recurring structural blocker is called out once here and applies throughout: **the agent loop, tool registry, MCP-tool wrapper, subagent execution, and LLM streaming all speak Google ADK / `genai` types — Claude Code owns native equivalents.** See [refactor-plan.md](refactor-plan.md) for the decoupling strategy. + +--- + +## Cluster A — Core Engine & Runtime +*(agent loop · tool set · tool-exec engine · streaming · session/transcript · compaction · system-prompt assembly)* + +### The loop itself +| Capability | CC | iroha | Status | +|---|---|---|---| +| Native model→tool→model iteration (`queryLoop`) | owned, ~1,730 lines, one path | **outsourced to ADK `Flow.Run`**; `Execute()` only forwards events | 🔴 **THE critical gap** | +| `max_turns` counts only tool-use turns | yes | n/a (no native loop) | 🔴 | +| Parallel read-only / sequential stateful tool dispatch | yes (StreamingToolExecutor) | ADK decides | ⚠️ (uncontrolled) | +| `yieldMissingToolResultBlocks` safety net (orphan tool_use) | yes | ADK internal | ⚠️ | +| Stop-hook forced continuation (`stopHookActive`) | yes | no | 🔴 | +| Token-budget auto-continue (`0.9` / `500` thresholds) | yes | no | 🔴 | + +### Tool set +| Tool | CC | iroha | Status | +|---|---|---|---| +| Read (cat -n, slices, 10MB) | yes | `file_read` | ✅ | +| Write (**Read-before-overwrite** enforced) | yes | `file_write` overwrites blindly | ⚠️ | +| Edit (**requires prior Read**, unique-match) | yes | `file_edit` allows blind edits | ⚠️ | +| MultiEdit | yes | `file_edit_batch` (atomic, rollback) | ✅ (parity+) | +| **NotebookEdit** | yes | — | 🔴 | +| Bash (timeout to 600000ms, `run_in_background`, real sandbox) | yes | `shell_run` (30s, 500-line cap, heuristic sandbox) | 🟡 | +| Glob (doublestar/fsnotify) | yes | hand-rolled `**`, **O(n²) bubble sort**, 100-cap | ⚠️ | +| Grep (**ripgrep-backed**, `-i/-g/-A/-B/-C/output_mode`) | yes | pure-Go regex walk, 50-match cap, no flags | ⚠️ | +| Task/Agent (`run_in_background`, `TaskStop`, structured handoff) | yes | `spawn_subagent` **synchronous only** | 🟡 | +| TodoWrite (structural single-in_progress) | yes | text-only enforcement | 🟡 | +| WebSearch / WebFetch (hosted backend, readability, URL-context) | yes | DDG-scrape / SearXNG, naive htmlToText, 5MB | 🟡 | + +iroha **extras** (not in CC): LSP tools (native, not MCP), CI watcher, worktree manager, memory dream consolidator, auto-review LLM judge, AGENTS.md↔memory sync. + +### Tool execution engine +- ✅ Permission gating + hook pipeline + snapshot/rollback — implemented, but via the **`blockingConfirmationTool` ADK-wrapper hack** (overwrites `req.Tools` map to force dispatch). ⚠️ Structural divergence; a native registry calls permission inline before dispatch. +- ⚠️ **No `required`-field schema validation** at registration (relies on LLM correctness); CC uses explicit JSON-schema `required` arrays. + +### Streaming protocol +- ✅ Direct Anthropic + OpenAI adapters parse SSE and emit `model.LLMResponse` chunks. +- 🔴 **No SDK message taxonomy** (`SystemMessage`/`AssistantMessage`/`UserMessage`/`StreamEvent`/`ResultMessage`) — iroha consumes opaque `session.Event`. Headless `stream-json` mode absent. +- ⚠️ Anthropic adapter **hardcodes `MaxTokens:8192`** and ignores configured max_output_tokens. + +### Session/transcript +- ✅ `PersistentSessionService` JSON-per-session, resume/last/fork, session picker. +- ⚠️ **Not the CC transcript format**: iroha serializes `[]*session.Event` + state map. CC is append-only JSONL with `uuid`+`parentUuid` DAG, `compact_boundary` records, `isCompactSummary` user messages, `toolUseResult` fields. Resume loses tool-card fidelity (replay concatenates text parts). +- ⚠️ Token/cost = `bytes/4` and `$2/M` placeholder (not per-model, no real tokenizer). + +### Compaction +- ✅ Microcompact (archives >1000B tool results to transcript JSONL) + round-based summarization (>12 rounds) + circuit breaker + sticky-block preservation. +- ⚠️ **Divergent strategy**: round/byte-based, not CC's token-threshold API microcompact (`clear_tool_uses_20250919`: 180k→40k tokens; `clear_thinking_20251015`). +- 🔴 **No restore path** — archives are append-only, never read back into context; CC restores on edit. +- ⚠️ Sticky mechanism is a bespoke `[STICKY]` text marker capped at 20% of a hardcoded 200000-byte estimate; CC uses prompt-cache breakpoints + file/snapshot references. + +### System prompt assembly +- ✅ `SystemPromptBuilder` assembles identity/persona/memories/CLAUDE.md/AGENTS.md/skills/dynamic sections with SHA-256 `cached:` hints. +- 🔴 **CLAUDE.md placement wrong**: iroha puts CLAUDE.md **in the system prompt**; CC injects it as a **user message** (verified). This breaks prompt-cache semantics. +- 🔴 **No real `cache_control` breakpoints** — only a string-hash comment; CC uses provider-side cache breakpoints. +- ⚠️ Rebuilt inside the model delegator (keyed off `GlobalMessageCount`), not at the turn boundary. +- ⚠️ `sanitizeADKStatePlaceholders` is an ADK-template-injection guard — dead weight under a native loop. + +--- + +## Cluster B — Trust Boundary (permissions · hooks · sandbox · MCP) + +### Permissions +- ✅ All 6 modes + rule engine (allow/deny/ask) + `BashSecurityValidator` (14 regex) + 4-tier risk classifier. +- 🔴 **Not CC's config format**: hardcoded built-in rules + `AddRule` API, not `settings.json` `permissions.{allow,deny,ask}` arrays. No enterprise managed-settings / `settings.local.json` merge, no `additionalDirectories`, no gitignore-style matching. +- ⚠️ `matchesPattern` uses substring fallback (looser than CC). +- ⚠️ Permission order not guaranteed deny→ask→allow with correct precedence. + +### Hooks +- ✅ 12 events (covers CC's 8 + extras), command/http/llm-prompt types, matchers, stdin-JSON/stdout-JSON/exit-code protocol, project-hook trust gate (`IROHA_TRUST_PROJECT_HOOKS`), env whitelisting (good secret hygiene). +- 🔴 **`PreToolUse` does not use `hookSpecificOutput.permissionDecision`** semantics (allow/deny/ask/defer + `updatedInput`); does not fire before permission-mode checks. +- ⚠️ Config at `.iroha/hooks.json`, not `.claude/settings.json` hooks block (shape close but not identical). +- ⚠️ `llm-prompt` hook type is an **iroha extension** (CC has no native LLM hook). + +### Sandbox +- ✅ Real OS-level sandbox: mac `sandbox-exec` (generated Seatbelt profile) + linux `bwrap`, graceful fallback. +- ⚠️ Seatbelt profile is **allow-by-default** (`(allow default)` then denies specific paths) — **weaker** than CC's deny-by-default; network implicitly allowed. +- ⚠️ Two **overlapping** path-escape checkers (`checkShellCommandSandbox` + `isPathDangerous`) with divergent whitelists. +- ⚠️ CC ships its own sandboxing binary (seatbelt helper / landlock+namespaces) with granular workspace allowlisting + network policy. + +### MCP +- ✅ Stdio JSON-RPC 2.0 client, `tools/list` → `DynamicMCPTool` (`mcp__server__tool`), plugin discovery, per-skill `plugins.json`, `/mcp` reload. +- 🔴 **HTTP transport + OAuth are implemented but NOT wired** — `LoadAndStartPlugins` always constructs stdio `NewMCPClient`, ignoring `config.URL`; OAuth tokens never attached. URL/OAuth MCP servers silently fall back and fail. +- 🔴 **Protocol version pinned to `2024-11-05`**; CC uses **`2025-06-18`** (elicitation, structured tool output, resource links). +- 🔴 No resource/prompt subscriptions, no sampling, no cancellation, no logging notifications. MCP stderr silently discarded. +- ⚠️ MCP tool result parsed as `map[string]any` (non-object JSON errors); CC normalizes content blocks / `is_error` / structured output. + +--- + +## Cluster C — Human Interface & Orchestration (memory · subagents · skills · slash/plan · TUI/config) + +### Memory / CLAUDE.md +- ✅ File-based memory (YAML frontmatter, global+project layers, 100-cap), trigger-aware injection, AGENTS.md↔memory sync, dream consolidator. +- 🔴 **Wrong layer**: memory + CLAUDE.md injected into system prompt; CC injects CLAUDE.md as a **user message**. +- ⚠️ Memory model (user/feedback/project/reference `.md`) is iroha-specific, not CC's CLAUDE.md-only convention + `memory` tool. +- ⚠️ Dream consolidator (dedup + LLM merge + PID lock + 7 gates) has no CC equivalent; `ConsolidateSemantically` deletes originals before validating LLM JSON (not transactional). + +### Subagents / Task +- ✅ 6 typed agents (explore/planner/reviewer/researcher/executor/work), curated toolsets, worktree isolation for executor/work, JSONL logs, file-diff derivation. +- 🔴 **Forced cheap model** (haiku/flash/4o-mini) unless overridden; CC spawns with parent's model. +- 🔴 **No parent↔child context handoff**; isolated in-memory session; parent gets only text summary + git file lists, not a structured handoff or visible tool transcript. +- 🔴 Synchronous only (no `run_in_background` / `TaskStop`). + +### Skills +- ✅ Discovery (~/.iroha/skills + project), `skill.json` manifest, 3 types (model/user/always), path-escape guard. +- 🔴 **Naive substring trigger matching** + eager body load; CC uses **model-driven progressive disclosure** (model decides when to expand SKILL.md body). +- ⚠️ Plugin namespace not CC's `plugin-name:skill-name`. + +### Slash commands + plan mode +- ✅ ~22 commands + autocomplete, `/compact`, `/context`, `/sessions`, `/resume`, `/team`, `/worktree`, `/bg`, `/skill`, `/mcp`, permission/session screens. +- 🔴 **No live `/model` hot-swap** (startup-only); CC has `/model`. +- 🔴 `/trace` is a stub; CC surfaces a live tool-call timeline. +- 🔴 **No plan mode tool pair** (`EnterPlanMode`/`ExitPlanMode`) with the 5-option approval flow. +- ⚠️ Custom-command `.claude/commands/*.md` support with `$ARGUMENTS`/`$1`/`!`/`@file` — verify parity (audit didn't confirm full). + +### TUI / IDE / config +- ✅ Hand-rolled retained-mode event loop + differential renderer + glamour (width-keyed cache + stream memoization), component model, multiline input, history viewport, confirmation card, status bar. **Genuinely good TUI engineering.** +- ⚠️ **Not Bubble Tea** (re-implements viewport/scroll/cursor) — and CC is React/Ink anyway, so this is a style choice, not a fidelity bug. Keep it. +- ⚠️ `/context` uses chars/4 heuristic, not real tokens. +- 🔴 **Ctrl+Y copy-last-response advertised but unwired**; `/trace` stub. +- 🔴 **No IDE integration** (VS Code/JetBrains bridge). +- ⚠️ Config at `~/.iroha.json`, not CC's `settings.json` 4-tier hierarchy (managed → user → project → local). +- ⚠️ Retry budget is a **process-global** package var, not reset per session. + +### LLM adapters +- ✅ Direct Anthropic + OpenAI-compatible (7 providers), SSE, cumulative tokens, nag-reminder injection, retry (budget/backoff/Retry-After/classification), truncation surfacing. +- ⚠️ All implement ADK `model.LLM.GenerateContent` over `genai` types — the load-bearing coupling. Genkit only for Gemini/Claude-via-Genkit; direct adapters bypass it. +- 🔴 **Genkit dependency** for Gemini; dropping Genkit leaves ProviderGemini broken (reimplement via google generative-ai SDK directly). + +--- + +## Cross-cutting: behavioral divergences baked into the current loop tail + +These are not capability gaps — they are **wrong behaviors** the refactor must remove: + +1. **Auto-commit on every turn** (`runner_exec.go:189-242`) — CC never auto-commits; commits are explicit user actions. +2. **Fixed "iroha" persona** + `GlobalMessageCount` seeded at 10 (`autonomous.go:135-146`) — CC has no fixed persona, no synthetic count. +3. **Global, exact-arg-only circuit breaker**, reset every `Execute` (`runner_confirmation.go:219-256`) — breaks teammate isolation; CC is per-tool, typed, time-windowed. +4. **go-build self-heal hardcoded to `./pkg/agent/...`** (`runner_confirmation.go:157`) — misreports outside this repo. +5. **Confirmation explain/edit flows spawn extra model calls** — CC permission is rule-based + user prompt only. +6. **Auto-review LLM judge pre-approves** medium-risk ops in `ModeAuto` — more permissive than CC's ask-human default (iroha extension; keep as opt-in, not default). + +## Coupling summary (what must change for a native loop) + +The ADK/Genkit coupling is concentrated in **8 files** (out of ~100 Go files): + +| File | Coupling | Native replacement | +|---|---|---| +| `runner.go` | `llmagent.New` + `runner.New` + `session.InMemoryService` + `DynamicLLMDelegator` | native `AgentLoop` + `Session` | +| `runner_exec.go` | `adkRunner.Run` event iteration | native loop driver | +| `runner_confirmation.go` | `tool.Tool`/`tool.Context`/`req.Tools` map hijack | inline permission call before dispatch | +| `tools.go` | `functiontool.New` + `tool.Tool` | native `Tool` interface + struct-tag schema reflector | +| `mcp.go` | `DynamicMCPTool` impl of `adkRunnableTool` | native `Tool` adapter (transport stays) | +| `subagent.go` + `pool.go` | per-subagent `llmagent`+`runner`+`session` | native `AgentLoop` recursion | +| `pkg/llm/*` (3 adapters) | `model.LLM` + `genai` types | native `Model` interface + content-block types | +| `compaction.go` + `session_store.go` | `[]*genai.Content` + `session.Event` | native `Message`/`Event` | + +**Everything else** (task/todo/cron/background/worktree/skills/plugin/team-inbox/memory/frontmatter/migrate/prompt-builder/permission-rules/hook-config/sandbox/MCP-client/MCP-transport/OAuth/config) is **framework-free** and ports with signature changes only. diff --git a/docs/claude-code-architecture/refactor-plan.md b/docs/claude-code-architecture/refactor-plan.md new file mode 100644 index 0000000..90e44f3 --- /dev/null +++ b/docs/claude-code-architecture/refactor-plan.md @@ -0,0 +1,132 @@ +# Refactor Plan — Native Engine for 1:1 Claude Code Fidelity + +## Architecture self-assessment (per CLAUDE.md directive) + +**Verdict: underengineered at the core, well-engineered at the periphery.** + +The current `iroha` is functionally broad (~24.9k lines, 40+ tools, 7 providers, real sandbox) but **architecturally hollow at the single most important seam**: the agent loop is outsourced to Google ADK. Everything that makes Claude Code *Claude Code* — the `query()`/`queryLoop()` generator, the SDK message taxonomy, Anthropic content-block messages, real token budgeting, stop-hook continuation, prompt-cache breakpoints, the CC transcript format — is either absent or approximated through a framework that was never designed to mirror it. + +A 1:1 replica cannot reach fidelity by patching ADK's `Flow.Run`. The audit confirms decoupling is non-incremental. So the plan is a **native engine rewrite**, reusing the ~85% framework-free periphery, executed in phases so the binary stays green at each step. + +This is *not* overengineering — overengineering would be greenfield-rewriting the framework-free managers (which already work) or inventing a second abstraction layer on top of ADK. This plan touches exactly the 8 coupled files + adds a small native core, and leaves the periphery intact. + +## Core decisions + +### Decision 1 — Native `AgentLoop`, decouple from Google ADK + Genkit +Replace the ADK-mediated loop with a native Go `AgentLoop` that owns the model→tool→model iteration. This is the load-bearing change; everything else follows. + +**Native loop contract (from verified research):** +```go +// One iteration = one model call. Loop continues while the response contains +// any tool_use block; yields when tool-free (end_turn) and no stop-hook/budget +// continuation. max_turns counts ONLY tool-use turns. +type AgentLoop struct { session *Session; tools *Registry; model Model; perms *PermissionManager; hooks *HookManager; budget *Budget } + +func (l *AgentLoop) Run(ctx, userInput) iter.Seq2[Event, error] // yields the 5 SDK message types +``` +- Read-only tools run concurrently; stateful tools sequentially. +- `yieldMissingToolResults` safety net on abort/fallback/error. +- Stop-hook continuation (`stopHookActive`). +- Token-budget auto-continue (`COMPLETION_THRESHOLD=0.9`, `DIMINISHING_THRESHOLD=500`); subagents always stop. + +### Decision 2 — Anthropic-native message types + real tokenizer +The audit (A4) is explicit: the single biggest 1:1 blocker is `genai.Content` + `bytes/4` token heuristic vs CC's Anthropic content-blocks + real counting. Define native types: +```go +type Content struct { Role string; Blocks []Block } +type Block interface{ blockType() string } +type TextBlock struct { Text string } +type ToolUseBlock struct { ID, Name string; Input json.RawMessage } +type ToolResultBlock struct { ToolUseID string; Content []Block; IsError bool } +type ThinkingBlock struct { Text, Signature string } +``` +- Provider adapters translate native ↔ wire (Anthropic direct, OpenAI-compatible). +- **Tokenizer**: add `tiktoken-go` (or Anthropic count_tokens endpoint) for real budgeting + the 180k/40k microcompact thresholds + the 92/95% auto-compact thresholds. + +**Genai stays as an adapter-internal detail only** (or is dropped entirely once adapters speak native). Decision: drop `genai` from the loop; adapters own translation. + +### Decision 3 — Native `Tool` interface + struct-tag schema reflector +Replace `tool.Tool`/`tool.Context`/`functiontool.New` with: +```go +type Tool interface { + Name() string + Description() string + IsLongRunning() bool + Declaration() *ToolSchema // built from struct tags (iroha already uses `description:` tags everywhere) + Run(ctx context.Context, args any) (Result, error) +} +type Registry struct{ ... } // register/unregister, dispatch with permission+hooks inline +``` +- Permission check becomes an inline call in `Registry.dispatch` **before** `Tool.Run` — removes the `req.Tools`-map hijack entirely. +- A generic `register[TArgs, TResults]` reflect-walks `TArgs` struct tags → `ToolSchema` with explicit `required` arrays (CC fidelity). +- `DynamicMCPTool` becomes a native `Tool` adapter; the MCP transport/client layer is already framework-free. + +### Decision 4 — Remove the behavioral divergences (not optional for 1:1) +Delete/fix: auto-commit-on-turn, fixed persona + synthetic count, global circuit breaker (→ per-tool typed time-windowed), hardcoded `./pkg/agent/...` go-build, explain/edit extra model calls. Make the auto-review LLM judge **opt-in**, not the `ModeAuto` default. + +## Phased roadmap + +Each phase ends with `go build` + `go test` green. Phases are ordered so later phases depend on earlier primitives. + +### Phase 0 — Foundation: native types + tokenizer (no behavior change yet) +**Goal:** introduce native message/LLM types alongside ADK, with a bridge so the existing loop still runs. +- `pkg/engine/message.go` — `Content`, `Block` union, `Event` union (5 SDK message types + stream deltas). +- `pkg/engine/tokenizer.go` — real tokenizer wrapper. +- `pkg/engine/llm.go` — native `Model` interface; port the 3 adapters' internals to native types (keep `model.LLM` shim delegating to native so the old loop still compiles). +- Provider adapters translate native ↔ Anthropic/OpenAI wire. +- **Exit criteria:** `go build` green; adapters round-trip native↔wire; tokenizer counts match a known fixture. + +### Phase 1 — Core: native `AgentLoop` + `Tool` registry (the big one) +**Goal:** the loop is owned in-process; ADK runner retired for the main path. +- `pkg/engine/loop.go` — `AgentLoop.Run` (the `queryLoop` equivalent): assemble request → stream model → detect tool_use → dispatch via registry (permission + hooks + per-tool circuit breaker inline) → append tool_result → repeat; stop conditions + budget. +- `pkg/engine/tool.go` — native `Tool` interface + `Registry` + struct-tag schema reflector + `register[TArgs,TResults]`. +- Migrate `GetSWETools()` registrations to native `Tool` (handlers need only `context.Context` + workdir — already decoupling-ready). +- Replace `blockingConfirmationTool` hijack with inline permission in `Registry.dispatch`. +- Retire `runner.go`'s `adkRunner`/`llmagent`/`DynamicLLMDelegator`; `runner_exec.go` becomes a thin caller of `AgentLoop.Run` that forwards native `Event`s to the TUI bridge. +- **Exit criteria:** a real multi-turn tool-using session runs end-to-end on the native loop; `go test ./pkg/engine/...` + existing agent tests green; no `google.golang.org/adk/runner|llmagent|model|session` imports remain in non-shim code. + +### Phase 2 — Trust boundary parity +**Goal:** CC-faithful permissions, hooks, MCP. +- Permissions: switch to `settings.json` 4-tier merge (managed→user→project→local); `permissions.{allow,deny,ask}` arrays; deny→ask→allow eval; gitignore-style matching; Bash word-boundary glob; path anchors per tool. +- Hooks: implement `PreToolUse` `hookSpecificOutput.permissionDecision` (allow/deny/ask/defer + `updatedInput`); fire before permission-mode checks (deny even in bypass); confirm `.claude/settings.json` hooks-block shape; keep `llm-prompt` as opt-in extension. +- MCP: **wire HTTP transport + OAuth** into the router (currently orphaned); bump protocol to **2025-06-18**; normalize tool results (content blocks / `is_error` / structured output); persist oversized results to disk (25k token default, 500k char ceiling); capture stderr. +- Sandbox: flip Seatbelt to **deny-by-default**; collapse the two overlapping path-escape checkers into one; add network policy. +- **Exit criteria:** permission/hooks/MCP parity spot-checks pass against CC docs examples. + +### Phase 3 — Interface & orchestration parity +**Goal:** CC-faithful UX + extensibility. +- Memory/CLAUDE.md: inject CLAUDE.md as a **user message** (not system prompt); real `cache_control` breakpoints; `#` quick-add + `memory` tool; drop dream-consolidator to opt-in. +- Compaction: CC token-threshold API microcompact (`clear_tool_uses_20250919` 180k→40k, `clear_thinking_20251015`); restore-on-edit path; retire `[STICKY]` marker. +- Session transcript: adopt CC JSONL format (`uuid`+`parentUuid` DAG, `compact_boundary`, `isCompactSummary`, `toolUseResult`); faithful replay (tool cards). +- Subagents: parent's model (no forced downgrade); structured handoff + visible tool transcript; `run_in_background` + `TaskStop`; built-in `Explore`/`Plan` one-shot. +- Skills: **progressive disclosure** (model-driven body expansion); plugin namespace `plugin-name:skill-name`. +- Slash/plan: live `/model`; `EnterPlanMode`/`ExitPlanMode` with 5-option approval; `/trace` live timeline; wire Ctrl+Y; full custom-command parity. +- Config: move to CC `settings.json` hierarchy (keep `~/.iroha.json` as legacy migration source). +- TUI: decouple `OnEvent` from `session.Event` (native `AgentEvent`); per-session retry budget reset; respect configured `max_tokens`. +- **Exit criteria:** a resumed session round-trips with tool-card fidelity; plan mode + ExitPlanMode flow works; `/model` swaps live. + +### Phase 4 — Verify +- `go build ./...` green; `go test ./...` green; `golangci-lint` 0 issues. +- Parity spot-checks: tool schemas vs CC docs; hook JSON I/O vs docs examples; transcript format vs a real CC session JSONL; permission rule precedence; headless `stream-json` end-to-end. +- Drop dead code (sanitizeADKStatePlaceholders, legacy chat render path, unused OAuth/HTTP if superseded). +- Optional: keep Gemini support by reimplementing via google generative-ai SDK (drop Genkit) — or document Gemini as unsupported. + +## Suggested package layout (additive) +``` +pkg/engine/ # NEW native core: message, event, tokenizer, model, loop, tool, registry, session, budget +pkg/agent/ # EXISTING — handlers/managers migrate to pkg/engine types; periphery stays +pkg/llm/ # adapters ported to native Model; genai becomes adapter-internal or removed +pkg/tui/ # OnEvent decoupled to native AgentEvent +pkg/config/ # settings.json 4-tier hierarchy +``` + +## Risk register +- **Risk:** Phase 1 is large; a half-migrated loop breaks everything. **Mitigation:** Phase 0 bridge keeps the old loop compiling; Phase 1 lands the native loop behind the same `Execute()`/`OnEvent` seam, swappable in one commit. +- **Risk:** Test suite assumes ADK `session.Event`/`model.LLM` shapes (68 test files). **Mitigation:** keep thin shims during migration; rewrite tests against native types as each file is touched. +- **Risk:** Genkit removal strands Gemini. **Mitigation:** Phase 0 ports Gemini to the google generative-ai SDK directly (no Genkit). +- **Risk:** Behavioral removals (auto-commit, persona) may be wanted by existing iroha users. **Mitigation:** keep them as config-gated opt-ins (`iroha.autoCommit`, `iroha.persona`), off by default for 1:1 fidelity. + +## Effort signal (rough, for sequencing only) +Phase 0 ~2-3 days · Phase 1 ~5-8 days (the crux) · Phase 2 ~3-4 days · Phase 3 ~4-6 days · Phase 4 ~2 days. Ultracode mode: quality over speed; each phase gets its own implementation workflow + verification pass. + +## What to do next +This plan is the blueprint. Recommended execution under ultracode: run a **Phase 0 implementation workflow** (native types + tokenizer + adapter port) as the first concrete step, verify build+tests, then proceed phase by phase — each phase its own workflow, each ending in a green build + parity check before the next begins. diff --git a/docs/claude-code-architecture/research/agent-loop.md b/docs/claude-code-architecture/research/agent-loop.md new file mode 100644 index 0000000..4c27d84 --- /dev/null +++ b/docs/claude-code-architecture/research/agent-loop.md @@ -0,0 +1,104 @@ +# Research: agent-loop + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code's agent loop is a single async generator (`query()` → `queryLoop()` in `query.ts`) that every caller (REPL, SDK, sub-agents, headless `-p`, compact agent) funnels through. One iteration = one model API call: gather context (system prompt + tool defs + conversation history, prompt-cached), stream a response, and if the response contains any `tool_use` content blocks, execute those tools and feed the `tool_result` blocks back as a `user` message, then loop. The loop yields control back to the caller ONLY when the assistant produces a response with zero `tool_use` blocks (i.e. `stop_reason: "end_turn"` or text-only) AND no stop-hook forces continuation AND no token-budget continuation fires. The loop yields 5 core SDK message types: `SystemMessage` (subtype `"init"` at start, `"compact_boundary"` after compaction), `AssistantMessage` (after each model response, incl. final text-only one), `UserMessage` (after each tool execution, carrying tool_result content), `StreamEvent` (only when `include_partial_messages`/`includePartialMessages` is enabled — raw API SSE events like `content_block_delta` with `text_delta`/`input_json_delta`), and `ResultMessage` (terminal, carries final text + `usage` + `total_cost_usd` + `session_id` + `stop_reason` + `num_turns`). A turn counts ONLY tool-use round trips; `max_turns`/`maxTurns` and `max_budget_usd`/`maxBudgetUsd` cap the loop and surface as `ResultMessage.subtype` = `error_max_turns` / `error_max_budget_usd`. Read-only tools (Read, Glob, Grep, MCP readOnlyHint) execute in parallel within a turn; stateful tools (Edit, Write, Bash) run sequentially. + +## Components +### SDK query() entry point + message protocol +**Purpose:** The public surface of the agent loop: a single async generator function that drives the entire turn cycle and yields typed messages. + +**Mechanism:** query() is an async generator (Python `async for message in query(...)`; TS `for await (const message of query({...}))`). It yields messages in this lifecycle order: (1) SystemMessage subtype='init' with session metadata (session_id, tools, models, agent info); (2) per turn: AssistantMessage (text + tool_use blocks) → UserMessage (tool_result content); (3) repeat; (4) final AssistantMessage with text-only (no tool_use); (5) ResultMessage with final text, token usage, cost (total_cost_usd), num_turns, session_id, stop_reason. Default (non-streaming) yields complete AssistantMessage after each model response completes; with include_partial_messages/includePartialMessages=true it also yields StreamEvent (TS: SDKPartialAssistantMessage, type 'stream_event') carrying raw API SSE events (message_start, content_block_start, content_block_delta with text_delta/input_json_delta, content_block_stop, message_delta, message_stop). IMPORTANT: a small number of trailing system events (e.g. prompt_suggestion) can arrive AFTER ResultMessage — callers must drain the stream to completion, not break on the result. check stop_reason === 'refusal' to detect refusals. + +**Data model:** Python dataclasses: SystemMessage (subtype 'init'|'compact_boundary', data nested w/ session_id), AssistantMessage (content blocks), UserMessage (tool result content), ResultMessage (subtype, result, usage, total_cost_usd, num_turns, session_id, stop_reason), StreamEvent (uuid, session_id, event:dict, parent_tool_use_id). TS equivalents: SDKAssistantMessage.type='assistant', SDKUserMessage.type='user', SDKResultMessage.type='result', SDKSystemMessage.type='system' subtype 'init', SDKCompactBoundaryMessage.type='compact_boundary' (NOT a SystemMessage subtype in TS), SDKPartialAssistantMessage.type='stream_event'. SDKMessage union also includes SDKUserMessageReplay, SDKStatusMessage, SDKLocalCommandOutputMessage, SDKHookStartedMessage, SDKHookProgressMessage. + +**Config:** options.max_turns (Python) / maxTurns (TS) — int, no default limit. options.max_budget_usd (Python) / maxBudgetUsd (TS) — no default limit. options.effort in {"low","medium","high","xhigh","max"} (xhigh recommended on Opus 4.7+/Fable 5). options.model e.g. "claude-sonnet-4-6", "claude-opus-4-8". options.permission_mode / permissionMode in {default, acceptEdits, plan, dontAsk, auto, bypassPermissions}. options.include_partial_messages (Py) / includePartialMessages (TS) bool — gates StreamEvent emission. + +### queryLoop() — the while(true) core (query.ts) +**Purpose:** The single internal generator that every caller (REPL, SDK, sub-agents, headless -p, compact agent) delegates to. ~1,730 lines, one code path. + +**Mechanism:** Skeleton: init state → while(true){ run context-management pipeline → callModel via withRetry (streaming) → for each streamed AssistantMessage check for tool_use blocks (sets needsFollowUp) → if any tool_use: execute tools (StreamingToolExecutor runs concurrency-safe tools during streaming, sequential for stateful), append tool_result blocks, reconstruct NEW State object with transition.reason='next_turn', continue → if NO tool_use: run prompt-too-long recovery, max-output-token escalation/recovery, then stop hooks, then token-budget check → return Terminal }. Every continue site reconstructs a complete new immutable State object (not field mutation). Errors are WITHHELD from the yield stream during recovery (isWithheldPromptTooLong, isWithheldMaxOutputTokens) so SDK consumers that disconnect on any error field keep listening; withheld errors are pushed to internal assistantMessages so downstream recovery can find them, surfaced only if ALL recovery fails. + +**Data model:** Terminal discriminated union: {reason: 'blocking_limit'|'image_error'|'model_error'|'aborted_streaming'|'prompt_too_long'|'completed'|'stop_hook_prevented'|'aborted_tools'|'hook_stopped'|'max_turns'}. Continue transition.reason: 'next_turn'|'collapse_drain_retry'|'reactive_compact_retry'|'max_output_tokens_escalate'|'max_output_tokens_recovery'|'stop_hook_blocking'|'token_budget_continuation'. LoopState carries messages, toolUseContext, turnCount, transition, autoCompactTracking, maxOutputTokensRecoveryCount, hasAttemptedReactiveCompact, maxOutputTokensOverride, pendingToolUseSummary (background Haiku summary promise), stopHookActive. + +**Config:** Internal (source-level, not public API): MAX_OUTPUT_TOKENS_RECOVERY_LIMIT=3, MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES=3, hasAttemptedReactiveCompact one-shot, stopHookActive flag, turnCount monotonic counter, maxOutputTokensOverride (64K during escalation, cleared after). + +### callModel / queryModel — API streaming + retry ladder +**Purpose:** Make the streaming Anthropic API call with model fallback and recover from transient failures. + +**Mechanism:** queryModel is an async function* calling Anthropic messages.create(stream=true) wrapped in withRetry() (DEFAULT_MAX_RETRIES=10, exponential backoff base*2^(attempt-1) capped maxDelayMs=32000 + 0-25% jitter, honors Retry-After header). SSE sequence reconstructed into AssistantMessage objects: message_start → (content_block_start → content_block_delta* → content_block_stop)* → message_delta (carries final usage + stop_reason) → message_stop. Usage mutated in-place on last message only when message_delta arrives. Retry decision rules: 529 overloaded → only foreground query sources retry (background bails to avoid cascade); after 3 consecutive 529s on non-custom Opus model → throw FallbackTriggeredError → queryLoop switches to fallbackModel; OAuth 401 → handleOAuth401Error token refresh; context-overflow 400 → parse token counts, compute maxTokensOverride; ECONNRESET/EPIPE → disableKeepAlive then retry; persistent UNATTENDED_RETRY mode retries indefinitely with 30-min cap + 30s heartbeat. + +**Data model:** callModel yields AssistantMessage (type 'assistant', .message.content with text/tool_use/thinking blocks, optional .error field) and StreamEvent. withRetry yields SystemAPIErrorMessage before each sleep. On FallbackTriggeredError, currentModel=fallbackModel and signature/thinking blocks stripped (they are model-bound — replaying across models => 400). Orphaned partial AssistantMessages are tombstoned: yielded as {type:'tombstone', message} so UI/transcript removes them (prevents 'thinking blocks cannot be modified' error). + +**Config:** DEFAULT_MAX_RETRIES=10. maxDelayMs=32000. Persistent mode UNATTENDED_RETRY: 30-min backoff cap, heartbeat every 30s. feature('HISTORY_SNIP'), feature('TOKEN_BUDGET'), feature('CONTEXT_COLLAPSE') gates evaluated at bundle time. + +### Tool execution + round trips +**Purpose:** Execute requested tool_use blocks and feed tool_result blocks back so the loop continues. + +**Mechanism:** Each assistant response may contain multiple tool_use blocks. Parallel execution is decided by tool type: read-only tools (Read, Glob, Grep, MCP readOnlyHint=true tools) run concurrently; stateful tools (Edit, Write, Bash, custom tools default) run sequentially. StreamingToolExecutor (gated feature streamingToolExecution) starts executing concurrency-safe tools as soon as their tool_use block's input is complete during streaming — before the full response finishes. 14-step execution pipeline per tool: Zod validation → input backfill (e.g. expand path) → PreToolUse hook → permission check (canUseTool callback) → execute → PostToolUse hook → format result. A background Haiku summary of tool results is kicked off (pendingToolUseSummary) and resolved/overlapped during the NEXT iteration's streaming (yielded as ToolUseSummaryMessage). Permission denial returns a rejection tool_result to Claude. + +**Data model:** Request: {type:'tool_use', id:'toolu_', name, input}. Response: {type:'tool_result', tool_use_id, content: str | content_block[], is_error?: bool}. yieldMissingToolResultBlocks fires in 3 abort/error paths (outer error, fallback mid-stream, user abort) creating synthetic error tool_results for every tool_use lacking a result — prevents next-call protocol errors. + +**Config:** tool() helper accepts annotations.readOnlyHint (default false) to opt custom tools into parallel execution. Built-in read-only: Read, Glob, Grep, MCP tools marked readOnly. Stateful (always sequential): Edit, Write, Bash. PreToolUse hook can short-circuit: reject → tool skipped, Claude gets rejection tool_result instead. Deny via permission → Claude typically tries another approach or reports it couldn't proceed. + +### Stop conditions + ResultMessage subtypes +**Purpose:** Decide when the loop yields control back to the user and report why. + +**Mechanism:** PRIMARY stop condition = assistant response with zero tool_use blocks (model produced text only) AND no stop-hook blocking errors AND token budget says stop. Caps: max_turns/maxTurns counts ONLY tool-use turns (the final text-only response is NOT counted — so max_turns=2 in a 3-tool-turn task stops before the 3rd tool). max_budget_usd/maxBudgetUsd stops on spend threshold. Hitting either → ResultMessage.subtype = error_max_turns | error_max_budget_usd (result field absent). Other ResultMessage.subtypes: success (result present), error_during_execution (API failure/cancel), error_max_structured_output_retries. Normal completion → subtype 'success' + result text. stop_hook_prevented is its own Terminal reason but still surfaces via ResultMessage. API stop_reason on the final turn: end_turn (normal), max_tokens (truncated; triggers escalation/recovery ladder), refusal (declined — detect via stop_reason=='refusal'), pause_turn (server-tool sampling-loop iteration limit, default 10 — handle by appending assistant response and re-requesting), model_context_window_exceeded. + +**Data model:** ResultMessage.subtype discriminated union above; .result field ONLY present on 'success'. .stop_reason (string|null) from last assistant response. All subtypes carry total_cost_usd, usage, num_turns, session_id (Python: total_cost_usd/usage typed Optional, guard None on error paths). + +**Config:** max_turns/maxTurns, max_budget_usd/maxBudgetUsd (no defaults). ResultMessage subtype values: success, error_max_turns, error_max_budget_usd, error_during_execution, error_max_structured_output_retries. API stop_reason values the loop inspects: end_turn, tool_use, max_tokens, pause_turn, refusal, model_context_window_exceeded, stop_sequence. + +### Streaming vs buffered turn modes +**Purpose:** Two output delivery modes: buffered (complete AssistantMessage per turn) vs streaming (raw SSE deltas as they arrive). + +**Mechanism:** When include_partial_messages/includePartialMessages=true the generator interleaves StreamEvent (raw API SSE wrapped) between the buffered messages: message_start → content_block_start(text) → content_block_delta(text_delta)* → content_block_stop → content_block_start(tool_use) → content_block_delta(input_json_delta)* → content_block_stop → message_delta → message_stop → buffered AssistantMessage → [tool exec] → next turn's stream events → ResultMessage. Text is in delta.type=='text_delta'.delta.text; tool input accumulates from delta.type=='input_json_delta'.partial_json. Known limitation: structured-output JSON does NOT stream — only appears in final ResultMessage.structured_output. + +**Data model:** StreamEvent: {uuid: str, session_id: str, event: dict[str,Any] (raw API SSE), parent_tool_use_id: str|None}. TS SDKPartialAssistantMessage.type === 'stream_event'. + +**Config:** Check via `message.type === 'result'` (TS) or isinstance(message, ResultMessage) (Python). For streaming check isinstance(message, StreamEvent) then message.event.get('type'). + +## Key behaviors +- maxTurns counts ONLY tool-use turns — the final text-only response is not counted. max_turns=2 in a 3-tool-turn task stops before the 3rd tool. This is the single most commonly mis-stated fact about the loop. +- Withholding pattern: recoverable errors (prompt_too_long from context collapse/reactive compact, max_output_tokens) are NOT yielded to the stream during recovery because SDK consumers (Cowork, desktop app) terminate the session on any message carrying an error field. They are pushed to internal assistantMessages and surfaced only if recovery fails. +- Empty-response gotcha (API-level): adding a text block immediately AFTER a tool_result teaches Claude to expect user input after every tool use and yields empty responses (2-3 tokens, stop_reason end_turn). Correct: send tool_result directly with no trailing text. The agent loop in Claude Code handles this internally — re-implementors must format tool_result user messages without extra text. +- Context window never resets within a session — accumulates system prompt + tool defs + CLAUDE.md + conversation + tool I/O across turns. Static prefixes (system prompt, tool defs, CLAUDE.md) are prompt-cached so only the first request pays full cost. +- Subagents get a FRESH conversation (no parent turns) — only their final response returns to the parent as a tool_result. Subagents ALWAYS stop on token budget (budget is top-level only). +- Streaming input mode (default, recommended) supports images, queued messages, real-time interruption, full tool access, mid-loop user input via async generator yielding SDKUserMessage. Single-message mode does NOT support images/queueing/interruption and raises on error results (e.g. error_max_turns) — wrap in try block. +- pause_turn handling: when using server tools (web_search_20250305, web fetch) and the server-side sampling loop hits its 10-iteration default limit, the response may contain a server_tool_use without a matching server_tool_result. Agent loop must append the assistant response and re-request to let Claude finish. +- Trailing events after ResultMessage: a few system events (prompt_suggestion etc.) can arrive AFTER ResultMessage — iterate the stream to completion, do NOT break on the result message. +- Stop hooks can force another iteration: when the model produces text-only (thinks it's done) but a stop hook returns blocking errors, the errors are appended as a user message and the loop continues with stopHookActive=true (prevents re-running same hooks). preventContinuation → Terminal reason 'stop_hook_prevented'. Stop hooks are SKIPPED when the last assistant message is an API error — prevents death spiral (error→hook blocking→retry→error). +- Effort vs extended-thinking are independent: effort in {low,medium,high,xhigh,max} controls reasoning depth per response; extended thinking produces visible chain-of-thought blocks. You can combine effort='low' with extended thinking on, or effort='max' without it. +- thinking/redacted_thinking blocks have 3 inviolable rules: (1) a message with a thinking block must be in a query with max_thinking_length>0; (2) a thinking block may never be the last block in a message; (3) thinking blocks must be preserved for the whole assistant trajectory. Violations → opaque API 400s. Model fallback must STRIP signature blocks (they are model-bound). +- Orphaned tool_use safety net: yieldMissingToolResultBlocks synthesizes error tool_results for every tool_use lacking a result — fires on model crash, fallback mid-stream, and user abort. Without it the next API call 400s on the protocol violation. +- Abort has two distinct paths: abort-during-streaming (executor drains queued results or synthesizes them; signal.reason distinguishes hard Ctrl+C from submit-interrupt which skips the interruption message since the queued user msg provides context) vs abort-during-tool-execution (interruption message carries toolUse:true flag). +- compact_boundary message: Python emits SystemMessage subtype='compact_boundary'; TS emits a SEPARATE SDKCompactBoundaryMessage type (not a SystemMessage subtype). Compaction replaces older messages with a summary — early instructions may be lost; persistent rules belong in CLAUDE.md (re-injected each request). + +## External interfaces +- Python: from claude_agent_sdk import query, ClaudeAgentOptions, AssistantMessage, UserMessage, ResultMessage, SystemMessage; from claude_agent_sdk.types import StreamEvent, AgentDefinition, TaskBudget, HookEvent +- TypeScript: import { query, tool, createSdkMcpServer, startup, listSessions, getSessionMessages } from '@anthropic-ai/claude-agent-sdk'; SDKMessage union of SDKAssistantMessage|SDKUserMessage|SDKUserMessageReplay|SDKResultMessage|SDKSystemMessage|SDKPartialAssistantMessage|SDKCompactBoundaryMessage|SDKStatusMessage|SDKLocalCommandOutputMessage|SDKHookStartedMessage|SDKHookProgressMessage|... +- query() returns AsyncGenerator (TS) / async iterator (Python). CLI binary bundled as optional dep @anthropic-ai/claude-agent-sdk-. +- Anthropic Messages API: model (e.g. claude-opus-4-8), messages[], system, tools[], max_tokens, stream=true, beta headers e.g. context-1m-2025-08-07, task-budgets-2026-03-13 +- Transcript: JSONL, one entry per message incl. isMeta nudge messages; user msg persisted before API call for resume +- Hooks: PreToolUse, PostToolUse, PostToolUseFailure, UserPromptSubmit, Stop, SubagentStop, PreCompact, Notification, SubagentStart, PermissionRequest + +## Open questions +- Exact public option key for the +500k-style token-budget auto-continue on the SDK surface vs the internal output_config.task_budget (task-budgets-2026-03-13 beta) — the source dives describe the internal feature flag TOKEN_BUDGET but the public ClaudeAgentOptions field name for per-turn token budget is not pinned in the fetched docs. +- Precise current default value of the server-side sampling-loop iteration limit that triggers pause_turn (docs say 'default 10' — verify it hasn't changed for the newest server tools). +- Whether the StreamingToolExecutor gate `config.gates.streamingToolExecution` is on by default in the latest shipped CLI binary, or still feature-flagged — affects whether tools begin executing before the assistant response completes. +- Exact behavior of permission_mode='auto' (TS-only, model classifier) availability across models in mid-2026 — docs mark it as conditional. + +## Sources +- [How the agent loop works — Claude Code Docs](https://code.claude.com/docs/en/agent-sdk/agent-loop) — Official authoritative spec of the turn cycle, message types (SystemMessage/AssistantMessage/UserMessage/ResultMessage), max_turns semantics (counts tool-use turns only), ResultMessage subtypes, permission modes, effort levels, parallel tool execution, context window + auto-compaction. +- [Stream responses in real-time — Claude Code Docs](https://code.claude.com/docs/en/agent-sdk/streaming-output) — Official spec of include_partial_messages/includePartialMessages, StreamEvent dataclass fields, raw SSE event ordering (message_start, content_block_start/delta/stop, message_delta, message_stop), text_delta vs input_json_delta, known structured-output limitation. +- [Streaming Input vs Single Message — Claude Code Docs](https://code.claude.com/docs/en/agent-sdk/streaming-vs-single-mode) — Official distinction between persistent streaming-input mode (images, queued msgs, interruption) and one-shot single-message mode; SDKUserMessage generator shape; single-message raises on error results. +- [Stop reasons and fallback — Claude API Docs](https://platform.claude.com/docs/en/build-with-claude/handling-stop-reasons) — Authoritative enumeration of API stop_reason values (end_turn, max_tokens, stop_sequence, tool_use, pause_turn, refusal, model_context_window_exceeded), the empty-response-after-tool_result gotcha, pause_turn default 10-iteration limit, streaming stop_reason appears only in message_delta. +- [Ch 5. The Agent Loop — Claude Code from Source](https://claude-code-from-source.com/ch05-agent-loop/) — Source-level reverse engineering of query.ts (~1730 lines): why async generator (backpressure, typed Terminal return, yield*), 10-field LoopState, immutable state reconstruction, 4-layer context compression (snip/microcompact/context collapse/auto-compact), withholding pattern, escalation ladder, 10 Terminal + 7 Continue reasons, exact thresholds (13k/3k buffers, MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES=3, MAX_OUTPUT_TOKENS_RECOVERY_LIMIT=3), token-budget diminishing-returns rules, thinking-block rules, orphaned tool_result safety net. +- [Lesson 04 — Query Engine & LLM API (source deep dive)](https://github.com/inematds/claudecode-manual/blob/main/01-core-architecture/04-query-engine.md) — Independent source-level confirmation of QueryEngine.submitMessage → query() → queryLoop() → queryModel/callModel → stop hooks, transcript-first persistence, SSE→AssistantMessage reconstruction, withRetry() internals (DEFAULT_MAX_RETRIES=10, getRetryDelay formula, 529 routing, Opus 3x529→FallbackTriggeredError, OAuth 401 refresh, context-overflow token parse), exact token-budget constants (COMPLETION_THRESHOLD=0.9, DIMINISHING_THRESHOLD=500, continuationCount>=3), stop-hook categories and fire-and-forget background tasks. +- [Agent SDK reference — TypeScript — Claude Code Docs](https://code.claude.com/docs/en/agent-sdk/typescript) — Authoritative TypeScript wire format: SDKMessage discriminated union (type field values 'assistant'|'user'|'result'|'system'|'stream_event'|'compact_boundary'|...), query() signature, startup() pre-warm, tool()/ToolAnnotations (readOnlyHint gates parallel exec), SessionMessage shape from transcripts. +- [claude-agent-sdk-python types.py](https://github.com/anthropics/claude-agent-sdk-python/blob/main/src/claude_agent_sdk/types.py) — Authoritative Python wire format and config: PermissionMode literal, EffortLevel literal, AgentDefinition fields (maxTurns, effort, model, permissionMode), TaskBudget (output_config.task_budget with task-budgets-2026-03-13 beta), full HookEvent literal, ToolPermissionContext/PermissionResult, permission update protocol (addRules/replaceRules/setMode destinations). +- [Agent SDK — Claude Wiki (message categories)](https://claude-wiki.com/agent-sdk.html) — Corroborating summary of SDKMessage stream categories and that SDKAssistantMessage may carry an error field (basis for the withholding-pattern behavior). diff --git a/docs/claude-code-architecture/research/context-compaction.md b/docs/claude-code-architecture/research/context-compaction.md new file mode 100644 index 0000000..28dd045 --- /dev/null +++ b/docs/claude-code-architecture/research/context-compaction.md @@ -0,0 +1,134 @@ +# Research: context-compaction + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code (latest v2.1.68+ as of mid-2026) manages a finite context window through a layered pipeline: (1) a client-side microcompact that runs inline before every API call to strip old tool results without an LLM, (2) an optional API-native "cached microcompact" using the new clear_tool_uses_20250919 / clear_thinking_20251015 context-editing strategies (beta, ant-only for tool clearing, GA for thinking), (3) a full auto-compact that fires when actual token usage crosses getAutoCompactThreshold() = effectiveWindow - 13,000 tokens (effectiveWindow = contextWindow - min(maxOutputTokens, 20,000)), and (4) a manual /compact command that reuses the same compactConversation() path with optional custom focus instructions and optional partial scope. Compaction sends the full history + a structured 9-section summarization prompt (which first wraps analysis in tags then a block) to the SAME mainLoopModel with thinkingConfig disabled and maxOutputTokens capped at 20,000, then replaces history with [boundaryMarker][continuation message][kept messages][re-injected files/skills/plan]. The system prompt layer is cached separately (cache_control breakpoint at end of system prompt) so it survives compaction; the conversation layer is rebuilt from the summary. Prompt cache TTL is 5-minute by default on API keys and 1-hour on Claude subscriptions (auto-selected), with up to 4 cache_control breakpoints. Server-side compaction (beta compact-2026-01-12) is a newer API-native alternative that returns a "compaction" content block; Claude Code's client-side path is the legacy but still-primary mechanism. + +## Components +### Auto-compact trigger & threshold (getAutoCompactThreshold / shouldAutoCompact / autoCompactIfNeeded) +**Purpose:** Decides when to fire full conversation compaction, based on actual token usage from the API response vs a computed threshold. + +**Mechanism:** After each turn completes, shouldAutoCompact() is invoked in the query loop. It short-circuits false for forked-agent query sources ('session_memory', 'compact', and 'marble_origami' under CONTEXT_COLLAPSE). If disabled via env/config, returns false. Under feature('REACTIVE_COMPACT') or CONTEXT_COLLAPSE, proactive auto-compact is suppressed and reactiveCompact handles the API 413. Otherwise: tokenCount = tokenCountWithEstimation(messages) - snipTokensFreed; compares against getAutoCompactThreshold(model). If above threshold: autoCompactIfNeeded() first tries trySessionMemoryCompaction (no-LLM, reuses stored memory); if that fails, calls compactConversation(messages, ctx, cacheSafeParams, suppressUserQuestions=true, customInstructions=undefined, isAutoCompact=true, recompactionInfo). MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES=3 circuit breaker stops retrying doomed compactions (added 2026-03-10 to stop ~250K wasted API calls/day). On success, runPostCompactCleanup + setLastSummarizedMessageId(undefined) + notifyCompaction (reset cache-read baseline). + +**Data model:** AutoCompactTrackingState = {compacted: bool, turnCounter: number, turnId: string, consecutiveFailures?: number}. RecompactionInfo = {isRecompactionInChain: bool, turnsSincePreviousCompact: number, previousCompactTurnId, autoCompactThreshold, querySource}. calculateTokenWarningState returns {percentLeft, isAboveWarningThreshold, isAboveErrorThreshold, isAboveAutoCompactThreshold, isAtBlockingLimit}. + +**Config:** Env: CLAUDE_CODE_AUTO_COMPACT_WINDOW (int>0, clamps effective window down), CLAUDE_AUTOCOMPACT_PCT_OVERRIDE (float 1-100, returns min(percentageThreshold, base)), DISABLE_COMPACT (disables ALL incl /compact), DISABLE_AUTO_COMPACT (auto only, /compact works), CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE (int>0, overrides blocking limit), CLAUDE_CODE_MAX_OUTPUT_TOKENS. Settings.json: autoCompactEnabled (bool). Feature flags (ant-only, wrapped in feature()): REACTIVE_COMPACT (gate tengu_cobalt_raccoon -> reactive only, suppress proactive), CONTEXT_COLLAPSE (separate headroom system owns 90%/95% gates). + +### Effective context window & buffers +**Purpose:** Computes the usable context size by subtracting reserved output space and safety buffers from the raw model context window. + +**Mechanism:** getEffectiveContextWindowSize(model): contextWindow = getContextWindowForModel(model, getSdkBetas()) (200k standard, or 1M for [1m]/extended models: Opus 4.6+, Sonnet 4.6, Fable 5); if CLAUDE_CODE_AUTO_COMPACT_WINDOW set & valid, contextWindow = min(contextWindow, parsed); return contextWindow - reservedTokensForSummary where reservedTokensForSummary = min(getMaxOutputTokensForModel(model), 20_000). getAutoCompactThreshold(model): base = effectiveWindow - 13_000; if CLAUDE_AUTOCOMPACT_PCT_OVERRIDE (float 1-100) set, return min(floor(effectiveWindow*pct/100), base). Blocking limit (hard stop) = effectiveWindow - 3_000 (or CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE). Warning shown at threshold - 20_000. + +**Data model:** Constants (v2.1.68 / current autoCompact.ts): MAX_OUTPUT_TOKENS_FOR_SUMMARY=20_000; AUTOCOMPACT_BUFFER_TOKENS=13_000; WARNING_THRESHOLD_BUFFER_TOKENS=20_000; ERROR_THRESHOLD_BUFFER_TOKENS=20_000; MANUAL_COMPACT_BUFFER_TOKENS=3_000; MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES=3. + +**Config:** Env: CLAUDE_CODE_MAX_OUTPUT_TOKENS (overrides model max output). Constants hardcoded in autoCompact.ts: AUTOCOMPACT_BUFFER_TOKENS=13_000, WARNING_THRESHOLD_BUFFER_TOKENS=20_000, ERROR_THRESHOLD_BUFFER_TOKENS=20_000, MANUAL_COMPACT_BUFFER_TOKENS=3_000, MAX_OUTPUT_TOKENS_FOR_SUMMARY=20_000. + +### API-based microcompact (apiMicrocompact.ts -> clear_tool_uses_20250919 / clear_thinking_20251015) +**Purpose:** Server-side context-editing strategies attached to every request via context_management.edits[] — the native path that mirrors client microcompact behavior. + +**Mechanism:** getAPIContextManagement({hasThinking, isRedactThinkingActive, clearAllThinking}): if hasThinking && !isRedactThinkingActive, push {type:'clear_thinking_20251015', keep: clearAllThinking ? {thinking_turns:1} : 'all'}. Tool clearing is ant-only: if USER_TYPE==='ant' && (USE_API_CLEAR_TOOL_RESULTS || USE_API_CLEAR_TOOL_USES): push {type:'clear_tool_uses_20250919', trigger:{input_tokens: API_MAX_INPUT_TOKENS ?? 180_000}, clear_at_least:{input_tokens: trigger - keepTarget}, clear_tool_inputs: TOOLS_CLEARABLE_RESULTS} and/or the uses variant (exclude_tools: TOOLS_CLEARABLE_USES). API_MAX_INPUT_TOKENS default 180_000, API_TARGET_INPUT_TOKENS default 40_000. clear_thinking_20251015 must be listed first in edits[]. Beta header: context-management-2025-06-27. + +**Data model:** ContextEditStrategy union: {type:'clear_tool_uses_20250919', trigger:{type:'input_tokens',value}, keep:{type:'tool_uses',value}, clear_tool_inputs?, exclude_tools?, clear_at_least?} | {type:'clear_thinking_20251015', keep:{type:'thinking_turns',value}|'all'}. TOOLS_CLEARABLE_RESULTS = SHELL_TOOL_NAMES + Glob + Grep + Read + WebFetch + WebSearch. TOOLS_CLEARABLE_USES = FileEdit + FileWrite + NotebookEdit. Response: context_management.applied_edits[] with cleared_tool_uses/cleared_input_tokens. + +**Config:** Env: DISABLE_MICROCOMPACT. NOTE: in shipped CC, tool-result clearing via clear_tool_uses_20250919 is ant-only (gated on process.env.USER_TYPE==='ant' AND USE_API_CLEAR_TOOL_RESULTS / USE_API_CLEAR_TOOL_USES); the thinking-block strategy is always emitted when thinking is active. + +### Client-side microcompact (legacy in-memory, Rg()) +**Purpose:** In-process tool-result pruning that runs inline during message serialization (no LLM, no API context_management), the fallback when API strategies unavailable. + +**Mechanism:** Function Rg() runs during message serialization before each API call. Triggered when isAboveWarningThreshold AND clearable tool-result tokens > 20k. Algorithm: (1) find tool_use/tool_result pairs for eligible tools (bash, read_file, grep, glob, web_fetch, web_search); (2) always keep last F3Y=3 tool results; (3) scan backwards accumulating tool-result sizes until > g3Y=40k counted; (4) everything beyond that 40k window is eligible; (5) if eligible tokens > B3Y=20k, strip them (result -> '[Tool result cleared]', images/docs -> '[image]'/'[document]'); (6) cleared tool IDs tracked in U96 set across turns. NO LLM call. + +**Data model:** U96 = Set cleared IDs (persists across turns). Cleared tool result replaced with string '[Tool result cleared]' (or written to temp file with re-read instruction). Images/documents -> '[image]' / '[document]'. + +**Config:** Env: DISABLE_MICROCOMPACT. Constants (v2.1.68 deobf): g3Y=40_000, F3Y=3, B3Y=20_000, eV8=2_000. + +### Manual /compact & full compaction (compactConversation / bG6) +**Purpose:** LLM-based summarization that replaces the entire message history with a structured summary. Same code path for auto and manual; manual can take custom focus instructions and scope (partial). + +**Mechanism:** compactConversation(): (1) Run PreCompact hooks (can inject custom instructions); (2) check session memory (QP1) — if a stored summary exists and fits, skip the LLM; (3) build API request = full history + system prompt (same as conversation) + summary prompt as a final USER message, using mainLoopModel, thinkingConfig:{type:'disabled'}, maxOutputTokensOverride=20_000, tools = read_file only; (4) stream response, extract ... block (the model first emits an block for its own reasoning, then the ); (5) clear readFileState; (6) re-inject recently-read files (bM4), plan file (IP1), skills (uM4), plan-mode (mM4); (7) run session-start hooks; (8) return {boundaryMarker:'Conversation compacted', summaryMessages, attachments, hookResults}. The summary request SHARES the prefix with the live conversation, so it reads the existing cache rather than reprocessing history. Server-side variant: beta compact-2026-01-12, context_management.edits=[{type:'compact_20260112'}], returns a 'compaction' content block; API drops all blocks before it on subsequent requests. + +**Data model:** 9 sections: Primary Request/Intent; Key Technical Concepts; Files & Code Sections (with snippets); Errors & fixes; Problem Solving; All user messages (non-tool); Pending Tasks; Current Work; Optional Next Step (verbatim quotes). CompactionResult = {boundaryMarker, summaryMessages, attachments, hookResults}. Usage.iterations[] = {type:'compaction'|'message', input_tokens, output_tokens}. + +### Continuation message & post-compaction reconstruction (JQ6) +**Purpose:** The user-role message injected as the first item of the new history after a compaction, framing the summary and pointing to the full transcript. + +**Mechanism:** After compaction, history is rebuilt as: [boundaryMarker message 'Conversation compacted'][summaryMessage JQ6 containing analysis+summary as plain text][messagesToKeep (partial /compact only)][attachments: re-injected files/skills/plan][hookResults: session-start outputs]. JQ6 text: 'This session is being continued from a previous conversation that ran out of context. The summary below covers the earlier portion...' followed by the analysis and summary blocks, then 'If you need specific details from before compaction... read the full transcript at: {transcriptPath}', and for auto-compact: 'Please continue the conversation from where we left off without asking the user any further questions. Continue with the last task.' + +**Data model:** Continuation message = USER role with: intro line, plain-text analysis block, plain-text summary block, optional transcriptPath pointer, optional 'Recent messages preserved verbatim', optional auto-compact tail instruction. + +### Prompt cache layering & breakpoints (cache_control) +**Purpose:** How Claude Code orders the request and places cache_control breakpoints to maximize prefix reuse and minimize invalidation. + +**Mechanism:** cache_control breakpoint at end of system prompt keeps the system prompt cached separately so a compaction summary write doesn't invalidate it. Up to 4 breakpoints allowed. TTL selection: on Claude subscription, CC auto-requests 1h TTL (drops to 5m when over plan limit, drawing usage credits); on API key/Bedrock/Vertex/Foundry/Claude Platform on AWS, default 5m, opt into 1h via ENABLE_PROMPT_CACHING_1H=1; FORCE_PROMPT_CACHING_5M=1 forces 5m regardless. Cache scope is per machine+directory (system prompt embeds cwd, platform, shell, OS version, auto-memory paths, branch, recent commits). Subagents use 5m TTL even on subscription; forks inherit parent prefix and read parent cache. + +**Data model:** Layers: System prompt (core instructions, tool defs, output style) | Project context (CLAUDE.md, auto memory, unscoped rules) | Conversation (messages, results). cache key includes model + effort level + fast-mode header. current_usage fields: cache_creation_input_tokens, cache_read_input_tokens. + +**Config:** For sharing cache across machines (Agent SDK), suppress per-machine system-prompt sections (working dir, platform, etc.). + +### System-prompt & project-context token budgeting +**Purpose:** Controls what fills the fixed prefix vs the compaction-volatile conversation layer, and what survives compaction. + +**Mechanism:** At session start: system prompt + tool definitions + project-root CLAUDE.md + user-level CLAUDE.md + auto memory load once (held in memory, ~2-5k tokens typical; recommendation: keep CLAUDE.md <200 lines / ~2-2.5k tokens). After compaction: system prompt & output style unchanged (not message history); project-root CLAUDE.md + unscoped rules re-injected from disk; auto memory re-injected from disk; path-scoped rules (paths: frontmatter) LOST until a matching file is read again; nested CLAUDE.md LOST until a file in that subdir is read; invoked skill bodies re-injected, capped at 5,000 tokens/skill and 25,000 total, oldest dropped first (truncation keeps the start of SKILL.md). Manual /compact with focus instructions lets the user steer what survives. + +**Data model:** Invocation counter per skill; total bytes counter; oldest-first eviction. Re-injection keys: skills (capped), CLAUDE.md (re-read from disk), auto memory (re-read from disk). + +### Server-side compaction vs SDK compaction (compact_20260112) +**Purpose:** Two API-level compaction modes: server-side (recommended, beta) vs SDK client-side (deprecated compaction_control). + +**Mechanism:** Server-side (beta compact-2026-01-12, context_management.edits with type:'compact_20260112'): trigger default 150k (min 50k), pause_after_compaction to inject extra blocks, custom instructions fully replace default prompt, supports streaming (single compaction_delta event), returns usage.iterations[] (compaction + message iterations; top-level usage excludes compaction iteration). SDK client-side (tool_runner, compaction_control — DEPRECATED in favor of server-side): threshold default 100k, optional separate summary model, injects summary prompt as user turn, replaces history with ..., can use a cheaper summary model (server-side cannot). Token-count note: cache_read_input_tokens from server tools (web search) can inflate perceived usage and trigger premature compaction. + +**Data model:** Server stop_reason='compaction'. context_management.original_input_tokens vs input_tokens (after edits). token-count endpoint applies existing compaction blocks but triggers no new compaction. + +**Config:** compaction_control deprecated in Python/TS/Ruby SDKs in favor of server-side compact_20260112. + +## Key behaviors +- DEFAULT AUTO-COMPACT THRESHOLD (the headline number a re-implementor must get right): effectiveWindow - 13,000, where effectiveWindow = contextWindow - min(maxOutputTokens, 20,000). For a 200k model with 8192 max output: 200,000 - 8,192 - 13,000 = 178,808 (~89.4%). For a 1M model: ~987k. The buffer of 13k was DROPPED from an earlier 20k/33k/45k in early-2026 changes; current constant is 13,000. +- TOKEN SOURCE FOR THE TRIGGER: must use ACTUAL token count from the API response (input_tokens + cache_creation_input_tokens + cache_read_input_tokens + output_tokens), NOT a client-side estimate. shouldAutoCompact does use tokenCountWithEstimation for the proactive check, but the authoritative numbers come from the API usage object. Using estimates will mis-fire. +- BLOCKING LIMIT (hard stop) = effectiveWindow - 3,000. This is where the session truly cannot proceed. Below autocompact threshold but above warning threshold, microcompact fires. There are 5 distinct token states: normal / above warning (threshold-20k) / above error / above autocompact (threshold) / at blocking limit (effectiveWindow-3k). +- MICROCOMPACT IS NON-LLM: client-side microcompact (Rg) does pure in-memory string replacement ('[Tool result cleared]') and never calls the model. It runs INLINE during message serialization before every API call, can fire in the same turn as full compaction, and tracks cleared tool IDs in a persistent set U96. Constants: protect last 40k tokens of tool results, always keep last 3 tool results, only act if >20k tokens clearable. +- API-BASED MICROCOMPACT IS ANT-ONLY for tool clearing: clear_tool_uses_20250919 strategy is gated behind process.env.USER_TYPE==='ant' AND USE_API_CLEAR_TOOL_RESULTS/USES. The clear_thinking_20251015 strategy (keep:'all') IS shipped to everyone when extended thinking is active. The beta header is context-management-2025-06-27. A 1h-idle condition sets clearAllThinking -> keep only last thinking turn (value:1, since schema requires >=1). +- COMPACT INVOKES THE MODEL WITH thinking DISABLED and maxOutputTokens capped at 20,000, tools = read_file only. Extended thinking is turned off during the summarization sub-call. The summary request reuses the SAME system prompt + history prefix so it gets a cache hit (the slow part is generation, not cache miss). +- CIRCUIT BREAKER: MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES=3. After 3 consecutive failed auto-compacts (e.g. irrecoverable prompt_too_long), CC stops trying for the rest of the session. Added 2026-03-10 because 1,279 sessions had 50+ consecutive failures (up to 3,272), wasting ~250K API calls/day. +- RECOMPACTON METADATA is threaded through: isRecompactionInChain (was the previous turn already a compaction?), turnsSincePreviousCompact, previousCompactTurnId. This lets the summarization prompt know it is summarizing an already-summarized history. +- COMPACT CAN FAIL if the model calls a tool during summarization instead of writing a summary -> returns compaction block with content:null (server-side) or throws 'Failed to generate conversation summary' (client). Workaround: custom instructions explicitly telling the model not to call tools. +- CACHE INVALIDATION LIST (a re-impl must replicate exactly): switching models, changing effort level (/effort), enabling fast mode (header is cache key, fixed to persist across toggles in v2.1.86+), connecting/disconnecting an MCP server whose tools load into prefix (deferred tools are safe), enabling/disabling a plugin with MCP servers, denying an entire tool via bare-name deny rule, compacting, upgrading Claude Code. Cache-SAFE: file edits, editing CLAUDE.md mid-session (doesn't apply until restart), changing output style, changing permission mode, invoking skills/commands (append-only), /recap, /rewind, spawning subagents. +- TTL LOGIC: subscription auth -> 1h auto (drops to 5m when over limit using credits); API key/Bedrock/Vertex/Foundry -> 5m default, ENABLE_PROMPT_CACHING_1H=1 for 1h; FORCE_PROMPT_CACHING_5M=1 forces 5m everywhere. Subagents ALWAYS 5m even on subscription. Forks inherit parent cache. Cache scope = per machine+directory (system prompt embeds cwd/platform/shell/OS/branch/recent-commits). +- WHAT SURVIVES COMPACTION (exact table): system prompt + output style = unchanged; project-root CLAUDE.md + unscoped rules + auto memory = re-injected from disk; path-scoped rules (paths: frontmatter) = LOST until matching file read; nested subdir CLAUDE.md = LOST until file in subdir read; invoked skills = re-injected capped 5,000 tokens/skill, 25,000 total, oldest dropped first, truncation keeps TOP of SKILL.md; hooks = N/A (run as code). +- SESSION MEMORY COMPACTION is tried FIRST (no LLM) before the full compactConversation path — if a stored session-memory summary exists and fits, it's reused. Cache-sharing feature flag tengu_compact_cache_prefix tries to reuse a compaction result cached from another session with the same conversation prefix. Streaming retry flag tengu_compact_streaming_retry retries compaction on stream failure. +- REACTIVE COMPACT (feature('REACTIVE_COMPACT'), gate tengu_cobalt_raccoon, ant-only): suppresses proactive auto-compact and instead lets the API return prompt_too_long (413), then reactiveCompact handles it as a fallback (it consults isAutoCompactEnabled directly, bypassing the suppression). +- CONTEXT COLLAPSE (feature('CONTEXT_COLLAPSE')): a separate headroom system with 90% commit-start / 95% blocking-spawn gates. When enabled, autocompact is suppressed (would race collapse at ~93% effective). marble_origami (ctx-agent) query source is also excluded from autocompact because runPostCompactCleanup would destroy the main thread's committed log. + +## External interfaces +- Anthropic API beta header: compact-2026-01-12 (server-side compaction, compact_20260112 edit in context_management.edits) +- Anthropic API beta header: context-management-2025-06-27 (clear_tool_uses_20250919, clear_thinking_20251015) +- API request field: context_management.edits = [ContextEditStrategy...] (compaction, clear_tool_uses, clear_thinking) +- API response field: context_management.applied_edits[] (cleared_tool_uses, cleared_thinking_turns, cleared_input_tokens) +- API response: content block type 'compaction' (stop_reason 'compaction'); streaming content_block_delta type 'compaction_delta' +- API response: usage.iterations[] = [{type:'compaction'|'message', input_tokens, output_tokens}] +- API: cache_control = {type:'ephemeral', ttl:'5m'|'1h'} on system prompt / messages / compaction blocks (max 4 breakpoints) +- Slash command: /compact [instructions] (full or partial from message index) +- Slash command: /context (live breakdown by category) +- Slash command: /clear (full reset, reloads startup) +- Slash command: /memory (show loaded CLAUDE.md + auto memory) +- Settings.json key: autoCompactEnabled (bool) +- Env vars: DISABLE_COMPACT, DISABLE_AUTO_COMPACT, DISABLE_MICROCOMPACT, DISABLE_PROMPT_CACHING[_HAIKU|_SONNET|_OPUS|_FABLE], ENABLE_PROMPT_CACHING_1H, FORCE_PROMPT_CACHING_5M, CLAUDE_AUTOCOMPACT_PCT_OVERRIDE, CLAUDE_CODE_AUTO_COMPACT_WINDOW, CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE, CLAUDE_CODE_MAX_OUTPUT_TOKENS, CLAUDE_AFTER_LAST_COMPACT +- PreCompact hook (injects custom instructions into summary prompt) +- sessionMemory / transcript files (transcriptPath pointer in JQ6 continuation message) + +## Open questions +- Exact current value of the autocompact buffer in the very latest shipped version (sources show 13,000 as of v2.1.68 / early 2026; community write-ups reference an older 20k/33k/45k progression — a re-impl should treat 13,000 as the constant but verify against the installed package). +- Whether server-side compact_20260112 is actually wired into shipped Claude Code yet, or whether CC still uses the client-side LLM-summarization path (compactConversation) as of mid-2026 — the API feature is beta and the SDK compaction_control is deprecated, but CC's own usage is not publicly confirmed. +- The exact set of tools eligible for client-side microcompact clearing in the current build (deobf v2.1.68 lists bash, read_file, grep, glob, web_fetch, web_search + edit/write/notebook for the uses path; whether TodoWrite, Task, etc. are now included). +- Exact behavior of 'snip' (snipTokensFreed parameter) — a separate pruning mechanism whose rough-delta is subtracted from the token estimate; its trigger and algorithm are not fully documented. +- Whether the 1M context window now requires a beta header or [1m] model variant on Opus 4.6+/Sonnet 4.6 (sources say GA/no-beta as of the 1M GA announcement, but Bedrock/Vertex still gate it behind model selection). + +## Sources +- [Compaction - Claude API Docs (server-side compact_20260112)](https://platform.claude.com/docs/en/build-with-claude/compaction) — Official server-side compaction spec: beta header compact-2026-01-12, trigger default 150k, pause_after_compaction, custom instructions, compaction block handling, usage.iterations, cache_control on compaction blocks, streaming events, model-list (Opus 4.8/Sonnet 4.6), limitations (tool-call-during-summary). +- [autoCompact.ts source (deobfuscated) - alex000kim/claude-code](https://github.com/alex000kim/claude-code/blob/main/src/services/compact/autoCompact.ts) — Authoritative source for exact thresholds/buffers/env vars: MAX_OUTPUT_TOKENS_FOR_SUMMARY=20000, AUTOCOMPACT_BUFFER_TOKENS=13000, WARNING/ERROR=20000, MANUAL_COMPACT=3000, MAX_CONSECUTIVE_FAILURES=3, getEffectiveContextWindowSize, getAutoCompactThreshold, calculateTokenWarningState, isAutoCompactEnabled, shouldAutoCompact, circuit breaker, CLAUDE_CODE_AUTO_COMPACT_WINDOW, CLAUDE_AUTOCOMPACT_PCT_OVERRIDE, CLAUDE_CODE_BLOCKING_LIMIT_OVERRIDE, DISABLE_COMPACT/DISABLE_AUTO_COMPACT, REACTIVE_COMPACT and CONTEXT_COLLAPSE feature gating. +- [Claude Code compaction deep dive v2.1.68 (deobfuscated gist)](https://gist.github.com/sam-saffron-jarvis/9d8e291c4e696ac7948702d6c4884448) — Deobfuscated v2.1.68 details: the 5 mechanisms table, exact full-compact/partial-compact/sub-agent prompts, JQ6 continuation message, client-side microcompact constants (g3Y=40000, F3Y=3, B3Y=20000, eV8=2000), bG6() flow, post-compaction re-injection, edge cases, full env-var table. +- [Context editing - Claude API Docs (clear_tool_uses_20250919 / clear_thinking_20251015)](https://platform.claude.com/docs/en/build-with-claude/context-editing) — Official server-side context-editing spec: beta header context-management-2025-06-27, strategy params (trigger default 100k, keep default 3 tool uses, clear_at_least, exclude_tools, clear_tool_inputs), thinking clearing keep model-class defaults (Opus 4.5+/Sonnet 4.6+ keep all), cache invalidation rules, applied_edits response, token-count endpoint, SDK compaction_control deprecation + defaults (100k, custom model, summary prompt). +- [How Claude Code uses prompt caching - Claude Code Docs](https://code.claude.com/docs/en/prompt-caching) — Official cache layering: prefix-match rule, 3-layer order (system prompt / project context / conversation), exhaustive invalidation list, cache-safe list, TTL selection (subscription=1h auto, API key=5m, ENABLE_PROMPT_CACHING_1H, FORCE_PROMPT_CACHING_5M), cache scope per machine+directory, subagent/fork cache behavior, cache token fields. +- [Explore the context window - Claude Code Docs](https://code.claude.com/docs/en/context-window) — Official what-survives-compaction table (system prompt unchanged, CLAUDE.md/auto-memory re-injected from disk, path-scoped rules & nested CLAUDE.md lost, skills re-injected capped 5,000/skill + 25,000 total oldest-first), /context and /memory commands, 1M context on Fable 5/Opus 4.6+/Sonnet 4.6. +- [apiMicrocompact.ts source (API context-management strategies)](https://claude-code-os.vercel.app/docs/claude-src/file/services/compact/apiMicrocompact.ts) — Source for getAPIContextManagement: DEFAULT_MAX_INPUT_TOKENS=180_000, DEFAULT_TARGET_INPUT_TOKENS=40_000, clear_thinking_20251015 keep:'all' vs clearAllThinking keep:{thinking_turns:1}, TOOLS_CLEARABLE_RESULTS (shell/glob/grep/read/webfetch/websearch) and TOOLS_CLEARABLE_USES (edit/write/notebook), ant-only gating (USER_TYPE==='ant' + USE_API_CLEAR_TOOL_RESULTS/USES), env API_MAX_INPUT_TOKENS/API_TARGET_INPUT_TOKENS. diff --git a/docs/claude-code-architecture/research/hooks.md b/docs/claude-code-architecture/research/hooks.md new file mode 100644 index 0000000..1dcb41b --- /dev/null +++ b/docs/claude-code-architecture/research/hooks.md @@ -0,0 +1,101 @@ +# Research: hooks + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code's hooks system lets users attach deterministic handlers (shell commands, HTTP endpoints, MCP tool calls, or LLM prompt/agent evaluations) to ~30 named lifecycle events (PreToolUse, PostToolUse, PostToolUseFailure, PostToolBatch, PermissionRequest, PermissionDenied, UserPromptSubmit, UserPromptExpansion, Notification, Stop, StopFailure, SubagentStart, SubagentStop, TeammateIdle, TaskCreated, TaskCompleted, SessionStart, Setup, SessionEnd, PreCompact, PostCompact, ConfigChange, CwdChanged, FileChanged, WorktreeCreate, WorktreeRemove, InstructionsLoaded, MessageDisplay, Elicitation, ElicitationResult). Hooks are configured in settings.json under a top-level `hooks` key (3-level nesting: event -> matcher group -> handler array). Command hooks receive event JSON on stdin and signal via exit code (0=success/JSON, 2=blocking error, other=non-blocking error) plus optional stdout JSON. The JSON output supports universal fields (continue, stopReason, suppressOutput, systemMessage, terminalSequence) plus event-specific decision fields: PreToolUse uses hookSpecificOutput.permissionDecision (allow/deny/ask/defer); PermissionRequest uses hookSpecificOutput.decision.behavior (allow/deny) + updatedPermissions; PostToolUse/Stop/etc use top-level decision:"block"+reason; PermissionDenied uses hookSpecificOutput.retry. PreToolUse precedence is deny>defer>ask>allow, and PreToolUse hooks fire BEFORE permission-mode checks (a deny hook blocks even in bypassPermissions). Hooks run in parallel with dedup; output capped at 10000 chars. + +## Components +### Configuration schema & resolution +**Purpose:** Defines where/how hooks are declared and merged across scopes + +**Mechanism:** JSON config at 3 nesting levels: hook event name -> array of matcher groups (each {matcher, hooks:[]}) -> array of hook handler objects. On event fire: matcher evaluated against the input field (tool_name for tool events, source/reason/type for others); matched groups' handlers run in PARALLEL; identical handlers auto-deduped (command dedup by command+args, HTTP by URL). For tool events, an optional per-handler `if` field (permission-rule syntax like "Bash(git *)") filters further before spawning the process. Hooks run with user's full permissions and cwd = session cwd; env inherits parent plus CLAUDE_PROJECT_DIR, CLAUDE_PLUGIN_ROOT, CLAUDE_PLUGIN_DATA, CLAUDE_ENV_FILE, CLAUDE_CODE_REMOTE, CLAUDE_EFFORT. As of v2.1.139 macOS/Linux hooks run in their own session WITHOUT a controlling terminal (no /dev/tty). + +**Data model:** settings.json: {"hooks": {: [ {"matcher": "", "hooks": [ ] } ] }}. Matcher group = {matcher, hooks[]}. Handler (command) = {type:"command", command, args?, timeout?, async?, asyncRewake?, shell?, if?, statusMessage?, once?}. HTTP = {type:"http", url, headers?, allowedEnvVars?, timeout?}. mcp_tool = {type:"mcp_tool", server, tool, input?, timeout?}. prompt = {type:"prompt", prompt, model?, timeout?, continueOnBlock?}. agent = {type:"agent", prompt, model?, timeout?}. + +**Config:** Hook timeout defaults: command/http/mcp_tool = 600s (10 min); UserPromptSubmit lowers these to 30s; MessageDisplay lowers to 10s; prompt = 30s; agent = 60s; SessionEnd = 1.5s default (raised to highest per-hook timeout up to 60s; CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS overrides). disableAllHooks:true disables all (managed hooks need managed-level disable). allowManagedHooksOnly blocks user/project/plugin hooks. + +### Hook event catalog +**Purpose:** Enumerates every lifecycle point that can fire a hook + +**Mechanism:** Events: SessionStart, Setup, UserPromptSubmit, UserPromptExpansion, PreToolUse, PermissionRequest, PermissionDenied, PostToolUse, PostToolUseFailure, PostToolBatch, Notification, MessageDisplay, SubagentStart, SubagentStop, TaskCreated, TaskCompleted, Stop, StopFailure, TeammateIdle, InstructionsLoaded, ConfigChange, CwdChanged, FileChanged, WorktreeCreate, WorktreeRemove, PreCompact, PostCompact, Elicitation, ElicitationResult, SessionEnd. Cadences: once/session (SessionStart/SessionEnd), once/turn (UserPromptSubmit/Stop/StopFailure), every tool call (PreToolUse/PostToolUse/etc.). Events without matcher support (always fire): UserPromptSubmit, PostToolBatch, Stop, TeammateIdle, TaskCreated, TaskCompleted, WorktreeCreate, WorktreeRemove, CwdChanged, MessageDisplay. + +**Data model:** 30+ events total. Tool-loop: PreToolUse, PermissionRequest, PermissionDenied, PostToolUse, PostToolUseFailure, PostToolBatch. Per-turn: UserPromptSubmit, UserPromptExpansion, Stop, StopFailure. Per-session: SessionStart, Setup, SessionEnd. Subagent/team: SubagentStart, SubagentStop, TeammateIdle, TaskCreated, TaskCompleted. Display: MessageDisplay. Async/side-effect: Notification, InstructionsLoaded, ConfigChange, CwdChanged, FileChanged, WorktreeCreate, WorktreeRemove. Compaction: PreCompact, PostCompact. MCP elicitation: Elicitation, ElicitationResult. + +**Config:** Event-specific matchers: PreToolUse/PostToolUse/PostToolUseFailure/PermissionRequest/PermissionDenied on tool_name; SessionStart on source(startup|resume|clear|compact); Setup on init|maintenance; SessionEnd on reason(clear|resume|logout|prompt_input_exit|bypass_permissions_disabled|other); Notification on permission_prompt|idle_prompt|auth_success|elicitation_dialog|elicitation_complete|elicitation_response; SubagentStart/SubagentStop on agent_type; PreCompact/PostCompact on manual|auto; ConfigChange on user_settings|project_settings|local_settings|policy_settings|skills; StopFailure on error type; InstructionsLoaded on load reason; UserPromptExpansion on command name; Elicitation/ElicitationResult on MCP server name; FileChanged = literal filenames split on |. + +### Stdin JSON input contract +**Purpose:** The exact JSON payload passed to every hook + +**Mechanism:** Every event's stdin JSON carries common fields plus event-specific fields. The matcher is evaluated against a specific field from this JSON (e.g. tool_name for PreToolUse). + +**Data model:** Common stdin JSON: {session_id, transcript_path, cwd, permission_mode (default|plan|acceptEdits|auto|dontAsk|bypassPermissions), hook_event_name, effort:{level:low|medium|high|xhigh|max}}. Under --agent/subagent also: agent_id, agent_type. PreToolUse adds: tool_name, tool_input (tool-specific), tool_use_id. PostToolUse adds: tool_input, tool_response, tool_use_id, duration_ms. PermissionRequest adds: tool_name, tool_input, permission_suggestions[] (NO tool_use_id). Notification adds: message, title?, notification_type. Stop adds: stop_hook_active, last_assistant_message, background_tasks[], session_crons[]. SubagentStop adds: agent_id, agent_type, agent_transcript_path, last_assistant_message, stop_hook_active, background_tasks, session_crons. SessionStart adds: source, model?, agent_type?, session_title?. SessionEnd adds: reason. PreCompact/PostCompact add: trigger, custom_instructions/compact_summary. + +**Config:** agent_id/agent_type only added when running under --agent or inside subagent. model field ONLY on SessionStart and not guaranteed. effort/CLAUDE_EFFORT only when model supports effort param. + +### Exit code / stdout contract +**Purpose:** How a hook signals block/allow/error + +**Mechanism:** Exit 0 = success; stdout parsed for JSON (only on exit 0). For UserPromptSubmit/UserPromptExpansion/SessionStart, stdout (even non-JSON) is added to Claude context. Exit 2 = BLOCKING error: stdout/JSON IGNORED, stderr fed back to Claude as error. Effect per event (PreToolUse blocks tool, UserPromptSubmit rejects prompt, Stop prevents stopping, PostToolUse just shows stderr since tool already ran, etc.). Any other exit code (incl 1) = NON-blocking error; transcript shows notice + first stderr line, execution continues. WorktreeCreate is the exception: ANY non-zero exit aborts creation. + +**Data model:** Exit 0 + JSON: {continue:true, stopReason?, suppressOutput:false, systemMessage?, terminalSequence?, [decision/reason for block-events], [hookSpecificOutput:{hookEventName, ...}]}. Exit 2 + stderr -> blocking. Exit other -> non-blocking error notice ' hook error' + first stderr line in transcript. + +**Config:** exclusive: exit codes OR exit-0 JSON, never both (exit 2 ignores JSON). stdout must be ONLY the JSON object (shell profile echoes break parsing). terminalSequence allowlist: OSC 0/1/2/9/99/777 + BEL only; anything else (CSI, OSC 8/52/1337) ignored. terminalSequence requires v2.1.141+. + +### Decision control / output fields +**Purpose:** Per-event structured control beyond exit codes + +**Mechanism:** Different events use different JSON shapes. (1) Top-level decision: UserPromptSubmit, UserPromptExpansion, PostToolUse, PostToolUseFailure, PostToolBatch, Stop, SubagentStop, ConfigChange, PreCompact -> {decision:"block", reason}. (2) hookSpecificOutput.permissionDecision: PreToolUse (allow/deny/ask/defer + reason + updatedInput + additionalContext). (3) hookSpecificOutput.decision.behavior: PermissionRequest (allow/deny + updatedInput + updatedPermissions + message + interrupt). (4) hookSpecificOutput.retry: PermissionDenied. (5) Exit code or continue:false: TeammateIdle, TaskCreated, TaskCompleted. (6) Path return: WorktreeCreate. (7) hookSpecificOutput.action: Elicitation/ElicitationResult. (8) hookSpecificOutput.displayContent: MessageDisplay. (9) Context only: SessionStart, Setup, SubagentStart. (10) None: Notification, SessionEnd, PostCompact, InstructionsLoaded, StopFailure, CwdChanged, FileChanged, WorktreeRemove. + +**Data model:** Top-level decision: {decision:"block", reason}. PreToolUse: {hookSpecificOutput:{hookEventName:"PreToolUse", permissionDecision:"allow|deny|ask|defer", permissionDecisionReason?, updatedInput?, additionalContext?}}. PermissionRequest: {hookSpecificOutput:{hookEventName:"PermissionRequest", decision:{behavior:"allow|deny", updatedInput?, updatedPermissions?, message?, interrupt?}}}. PermissionDenied: {hookSpecificOutput:{hookEventName:"PermissionDenied", retry:true}}. PostToolUse: {hookSpecificOutput:{hookEventName:"PostToolUse", decision?, reason?, additionalContext?, updatedToolOutput?, updatedMCPToolOutput?}}. Stop/SubagentStop: top-level {decision:"block", reason} OR {hookSpecificOutput:{hookEventName:"Stop", additionalContext}}. SessionStart: {hookSpecificOutput:{hookEventName:"SessionStart", additionalContext?, initialUserMessage?, sessionTitle?, watchPaths?, reloadSkills?}}. + +**Config:** PreToolUse precedence deny>defer>ask>allow. defer only in -p non-interactive (v2.1.89+), only single tool call in turn. additionalContext/updatedInput ignored on defer. PreToolUse deny fires BEFORE permission-mode checks (blocks even in bypassPermissions). Hooks can tighten but never loosen past deny rules. + +### Prompt & agent hooks +**Purpose:** LLM-based judgment hooks vs deterministic command hooks + +**Mechanism:** prompt hook: sends prompt+input to a Claude model (Haiku default, overridable via model field) single-turn; model returns {ok:true|false, reason}. ok:false -> decision:block with per-event behavior (Stop/SubagentStop feeds reason to Claude; PreToolUse denies; PostToolUse ends turn/warning). continueOnBlock:true feeds reason back instead of ending. agent hook: spawns subagent w/ Read/Grep/Glob, up to 50 turns, returns same {ok,reason}. Both support only the 13 events that allow prompt/agent type. + +**Data model:** prompt hook: {type:"prompt", prompt:"...$ARGUMENTS...", model?, timeout:30, continueOnBlock?:false}. agent hook: {type:"agent", prompt, model?, timeout:60}. + +**Config:** SessionStart/Setup only support command+mcp_tool (not http/prompt/agent). prompt default timeout 30s, agent 60s (up to 50 turns). continueOnBlock default false. + +### Async hooks +**Purpose:** Non-blocking background execution + +**Mechanism:** async:true (command hooks only): runs in background, Claude continues immediately. On exit, additionalContext delivered on NEXT turn (waits if idle). Cannot block/return decisions. asyncRewake:true implies async AND wakes Claude on exit code 2 (stderr or stdout shown as system reminder). No dedup across async firings. + +**Data model:** async command hook: {type:"command", command, async:true, timeout?:600}. asyncRewake: {type:"command", command, asyncRewake:true}. + +**Config:** async only on type:command. async hooks cannot block. asyncRewake implies async. + +## Key behaviors +- PreToolUse fires BEFORE permission-mode checks: a hook returning permissionDecision:deny blocks the tool even in bypassPermissions mode or with --dangerously-skip-permissions. The reverse is NOT true — a hook allow does not override deny rules from any settings scope (incl managed). Hooks tighten but never loosen. +- Exit code 1 is NON-blocking (conventional Unix failure but treated as non-blocking error; action proceeds). ONLY exit code 2 blocks (exception: WorktreeCreate, where any non-zero aborts). Use exit 2 to enforce policy. +- Exit 2 and JSON output are mutually exclusive: exit 2 ignores stdout/JSON entirely. JSON is only parsed on exit 0. stdout must contain ONLY the JSON object (shell profile echoes break parsing — wrap in `if [[ $- == *i* ]]`). +- All matching hooks run to completion in parallel before results merge (one hook's deny does NOT stop sibling hooks). For PreToolUse the most restrictive wins: deny > defer > ask > allow. additionalContext from ALL hooks is kept and combined. +- PreToolUse previously used top-level decision/reason (now DEPRECATED for this event); legacy values 'approve'/'block' map to 'allow'/'deny'. Use hookSpecificOutput.permissionDecision instead. Other events (PostToolUse, Stop, etc.) STILL use top-level decision/reason as current format. +- Stop hooks have an 8-consecutive-block cap (CLAUDE_CODE_STOP_HOOK_BLOCK_CAP env raises it). Hooks receive stop_hook_active=true to detect re-entry and exit early. Stop hooks do NOT fire on user interrupts; API errors fire StopFailure instead (whose output/exit code are ignored). +- defer (PreToolUse) only works in -p non-interactive mode (v2.1.89+), only when Claude makes a SINGLE tool call in the turn, and exits with stop_reason:tool_deferred preserving deferred_tool_use{id,name,input}. Resume with claude -p --resume . If deferred tool gone on resume -> stop_reason:tool_deferred_unavailable + is_error. +- Output cap: additionalContext, systemMessage, and plain stdout capped at 10000 chars. Over-cap saved to a file in session dir and replaced with preview+path. description fields in background_tasks/session_crons capped at 1000 chars. +- PostToolUse updatedToolOutput must match the tool's output schema (e.g. Bash returns {stdout,stderr,interrupted,isImage}); mismatched shape is IGNORED and original used. MCP tool output passes through without schema validation. Telemetry captures ORIGINAL output before hook. +- when multiple PreToolUse hooks return updatedInput, the LAST to finish wins (non-deterministic since parallel). Avoid >1 hook modifying same tool's input. +- Matchers are CASE-SENSITIVE. A matcher with ONLY letters/digits/_/| is exact-match or |-separated exact list. Any other char => treated as JavaScript regex. mcp__memory (only letters/_) matches NO tool — must use mcp__memory__.* (the .* makes it a regex). +- MessageDisplay is display-only (transcript + Claude see original; only on-screen rendered text changes), runs per-batch-of-lines interactively (once per full message in -p/SDK). default timeout 10s. No matcher. Only fires for assistant text messages, not tool results or typed text. +- PermissionRequest does NOT fire in -p non-interactive mode — use PreToolUse for automated decisions. updatedPermissions entries: addRules/replaceRules/removeRules/setMode/addDirectories/removeDirectories, each with destination session|localSettings|projectSettings|userSettings. setMode bypassPermissions only if session launched with bypass available; never persisted as defaultMode. +- ConfigChange can block all sources EXCEPT policy_settings (managed settings always apply; hooks fire for audit but block ignored). SessionEnd has 1.5s default timeout, budget raisable to 60s via per-hook timeout or CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS. +- Hooks in skills/agents use YAML frontmatter (same nested format). For subagents, Stop hooks auto-convert to SubagentStop. `once:true` only honored in skill frontmatter (ignored in settings/agent frontmatter). + +## Open questions +- Exact JSON shape returned to the SDK for each exit-code/decision combination (e.g. the precise fields of the SDK result object beyond stop_reason:tool_deferred) — requires reading the claude-code-sdk TypeScript types, not just docs. +- Precise merge order when hooks from multiple scopes (user/project/local/managed/plugin/skill) collide on the same event+matcher — docs say plugin hooks 'merge' but the precedence on conflicts is underspecified. +- How `if` permission-rule syntax parses non-Bash tools (Edit(*.ts) etc.) at the token level — docs give a Bash table but not the full grammar for other tools. + +## Sources +- [Hooks reference - Claude Code Docs](https://code.claude.com/docs/en/hooks) — Primary authoritative source: full reference for all 30+ hook events, config schema (matcher/handler fields), stdin JSON input, exit-code/JSON output contract, decision control table, async/prompt/agent/HTTP/mcp_tool hook types, and version-specific thresholds (v2.1.139/141/145/174/85/89, 10000-char cap, 1.5s SessionEnd, 8-block cap). Fetched via .md for complete untruncated content. +- [Automate actions with hooks - Claude Code Docs](https://code.claude.com/docs/en/hooks-guide) — Official guide confirming exit-code semantics (0=proceed/2=block/other=non-blocking error), PreToolUse permissionDecision allow/deny/ask + defer precedence, hooks-and-permission-modes interaction (deny blocks even in bypassPermissions), prompt/agent hook ok/reason schema, hook-not-firing and Stop-cap troubleshooting. +- [Claude Code & Agent SDK Hooks (2026) - morphllm](https://www.morphllm.com/claude-code-hooks) — Independent 2026 corroboration of the 30 hook events, stdin JSON shapes, exit codes, matchers, and timeouts; cross-checks official docs for currentness. +- [Claude Code Hooks: Complete Guide - claudefa.st](https://claudefa.st/blog/tools/hooks/hooks-guide) — Community cross-check confirming PreToolUse exit 2 stops the tool and the decision/JSON-output control flow. +- [Hooks reference - Claude Wiki](https://claude-wiki.com/hooks-reference.html) — Secondary corroboration of the command-vs-HTTP input/output contract and stdin/stdout/exit-code semantics. diff --git a/docs/claude-code-architecture/research/mcp.md b/docs/claude-code-architecture/research/mcp.md new file mode 100644 index 0000000..bcf8003 --- /dev/null +++ b/docs/claude-code-architecture/research/mcp.md @@ -0,0 +1,128 @@ +# Research: mcp + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code's MCP integration (src/services/mcp/) connects to external MCP servers over four transports (stdio, SSE [deprecated], HTTP/streamable-HTTP, WebSocket), discovers their tools/resources/prompts, and exposes them to the model with prefixed names. Servers are configured at three scopes (local, project via .mcp.json, user via ~/.claude.json) plus plugins and claude.ai connectors, with a strict precedence (Local > Project > User > Plugins > claude.ai) that connects to a server once using the single highest-precedence entry (no field merging). MCP tools are named mcp____ (plugin-bundled tools use mcp__plugin____), and by default are NOT loaded upfront — Tool Search defers tool definitions until Claude invokes a ToolSearch call, so context usage stays low. HTTP/SSE servers support OAuth 2.0 (with dynamic client registration, CIMD, or pre-configured credentials), automatic token refresh via keychain, and dynamic headersHelper scripts; stdio servers run as child processes with CLAUDE_PROJECT_DIR injected. Enterprise control is layered on via managed-mcp.json (exclusive fixed set), allowedMcpServers/deniedMcpServers allow/denylists, and managed settings. The /mcp slash command and `claude mcp list/get/add/remove` CLI manage the lifecycle, connection status, and OAuth flows. + +## Components +### Transports +**Purpose:** The 4 wire transports Claude Code uses to talk to MCP servers. + +**Mechanism:** stdio: spawn child process, JSON-RPC over stdin/stdout, CLAUDE_PROJECT_DIR injected into child env, lifecycle = full session, NOT auto-reconnected. http: streamable-HTTP per MCP 2025-03-26 spec; POST for JSON-RPC, optional GET for SSE stream; supports OAuth; auto-reconnect with exponential backoff (up to 5 attempts, start 1s doubling). sse: deprecated legacy HTTP+SSE; same reconnection. ws: persistent bidirectional WebSocket (wss), header-only auth, no OAuth, configurable only via .mcp.json/add-json (NOT via --transport flag). Initial connection (v2.1.121+) retries up to 3 times on transient errors (5xx/refused/timeout); auth/404 errors not retried. + +**Data model:** { "type":"http", "url":"https://...", "headers":{...}, "timeout":600000, "alwaysLoad":true, "headersHelper":"...", "oauth":{...} } + +**Config:** type: 'http' | 'streamable-http' (alias) | 'sse' | 'stdio' | 'ws'. Only http/sse/ws take 'url'. Only stdio takes 'command'+'args'+'env'. 'timeout' (ms, per-server hard tool-call wall-clock) and 'alwaysLoad' (bool) apply to all types. + +### Configuration scopes +**Purpose:** Where server definitions live and how precedence resolves duplicates. + +**Mechanism:** Local: stored in ~/.claude.json under the current project's path key; private to user+project; DEFAULT scope (was named 'project' in old versions). Project: written to /.mcp.json; shared via VCS; requires per-user approval (prompt on load; reset via `claude mcp reset-project-choices`). User: stored in ~/.claude.json; cross-project; private to user (was named 'global' in old versions). On name collision across scopes, Claude Code connects ONCE using the single highest-precedence entry — entire entry wins, fields are NOT merged. Plugins and claude.ai connectors dedupe by endpoint (URL/command), the three scopes dedupe by name. + +**Data model:** ~/.claude.json: { "projects": { "/abs/project/path": { "mcpServers": { "": {} } } } } (local & user scopes). project .mcp.json: { "mcpServers": { "": {} } }. + +**Config:** --scope flag on `claude mcp add` (local default / project / user). Precedence highest-first: Local > Project > User > Plugins > claude.ai connectors. + +### OAuth / Auth +**Purpose:** Authenticating remote (HTTP/SSE) servers. + +**Mechanism:** Triggered when server returns 401/403 (or WWW-Authenticate header). Flow: Claude opens browser -> user authorizes -> callback to http://localhost:PORT/callback (random port unless --callback-port pins it) -> token stored securely in OS keychain (macOS) or credentials file, auto-refreshed. oauth.scopes pins requested scopes (space-separated, overrides discovery); offline_access auto-appended if advertised. A configured headers.Authorization that the server rejects is a hard failure (no OAuth fallback). headersHelper runs arbitrary shell command at connect time, stdout = JSON object of string headers, 10s timeout, env vars CLAUDE_CODE_MCP_SERVER_NAME + CLAUDE_CODE_MCP_SERVER_URL injected; overrides static headers; requires workspace-trust dialog at project/local scope. + +**Data model:** OAuth discovery: GET /.well-known/oauth-protected-resource (RFC 9728) -> fallback /.well-known/oauth-authorization-server (RFC 8414). Supports Dynamic Client Registration, CIMD (Client ID Metadata Document), and pre-configured credentials. + +**Config:** Serverdef optional oauth: { clientId, callbackPort, clientSecret(stored in keychain only), authServerMetadataUrl (v2.1.64+, must be https), scopes (space-separated string, RFC 6749 format) }. CLI: --client-id, --client-secret (masked prompt; or MCP_CLIENT_SECRET env), --callback-port. + +### Tool exposure & Tool Search +**Purpose:** How MCP tools become callable by the model. + +**Mechanism:** MCP tools are NOT all loaded into the system prompt upfront. By default Tool Search is ON: only tool NAMES + server instructions load at session start; Claude calls a `ToolSearch` tool to pull a specific tool's schema on demand (uses beta `tool_reference` blocks). Fallback (no tool search, e.g. Vertex, custom ANTHROPIC_BASE_URL, ENABLE_TOOL_SEARCH=false): a `WaitForMcpServers` tool makes Claude wait for connecting servers. Haiku models do NOT support tool_reference. ENABLE_TOOL_SEARCH=auto loads tools upfront if they fit within 10% of context window, defers overflow. `alwaysLoad:true` on a server forces all its tools upfront regardless of setting and blocks startup until connect (capped at 5s connect timeout). Server instructions and tool descriptions truncated at 2KB each. + +**Data model:** Tool exposed to model: name `mcp____`. tool_reference block (beta) carries deferred defs. alwaysLoad: true on server OR _meta['anthropic/alwaysLoad']=true on a tool forces upfront load. + +**Config:** ENABLE_TOOL_SEARCH env: unset=default(defer), true=force defer+send beta header, auto / auto:N = threshold (<=10% context upfront), false=load all upfront. + +### Output limits +**Purpose:** Bounding MCP tool output token usage. + +**Mechanism:** When an MCP tool returns >10000 tokens, Claude Code warns. Default hard cap 25000 tokens (MAX_MCP_OUTPUT_TOKENS). Oversized text results persisted to disk and replaced with a file reference in the conversation. A tool can opt into a larger threshold via _meta['anthropic/maxResultSizeChars'] in its tools/list entry (hard ceiling 500000 chars) — applies to text content only. + +**Data model:** Result text content subject to MAX_MCP_OUTPUT_TOKENS unless _meta['anthropic/maxResultSizeChars'] set (max 500000 chars). Image content ALWAYS subject to token limit regardless of annotation. + +**Config:** MAX_MCP_OUTPUT_TOKENS env (default 25000). Warning fires >10000 tokens. MCP_TIMEOUT env = startup timeout. MCP_TOOL_TIMEOUT env = global per-call default (~28h). + +### /mcp command & CLI surface +**Purpose:** User-facing management UI and commands. + +**Mechanism:** `/mcp` (in-session): lists servers with connection status (connected/pending/failed), tool count, flags servers advertising tools capability but exposing none, OAuth 'Clear authentication', approve pending project servers, retry failed. `claude mcp list` shows ⏸ Pending approval for unapproved project servers; `claude mcp get ` shows pending/rejected status. `claude mcp serve` turns Claude Code itself into a stdio MCP server exposing View/Edit/LS etc. Reserved server name `workspace` is skipped at load with a warning. + +**Data model:** /mcp shows: per-server tool count, pending/failed/rejected status, 'Show unused connectors' row (v2.1.161+). + +**Config:** Commands: claude mcp add, add-json, add-from-claude-desktop, list, get, remove, reset-project-choices, serve. + +### Enterprise policy (managed MCP) +**Purpose:** Centralized control over which MCP servers users may connect to. + +**Mechanism:** managed-mcp.json (system path: macOS /Library/Application Support/ClaudeCode/, Linux /etc/claude-code/, Windows C:\Program Files\ClaudeCode\; same format as .mcp.json; deploy via MDM/GPO, NOT server-managed settings): if present, ONLY those servers load (exclusive mode), user adds blocked with 'enterprise MCP configuration is active'. Evaluation order: merge allow/deny from all sources -> denylist match blocks unconditionally -> allowlist: remote needs serverUrl (or serverName only if no serverUrl entries exist), stdio needs serverCommand (or serverName only if no serverCommand entries). Commands match EXACTLY (all args in order). URLs support * wildcards anywhere incl scheme; hostname case-insensitive ignoring trailing dot; path case-sensitive. + +**Data model:** Entry = { "serverUrl": "https://*" } | { "serverCommand": ["npx","-y","pkg"] } | { "serverName": "label" }. managed-mcp.json empty mcpServers => MCP disabled. + +**Config:** Settings keys: allowedMcpServers, deniedMcpServers, allowManagedMcpServersOnly (managed-source-only), allowAllClaudeAiMcps (v2.1.149+, managed-source-only). + +### claude.ai connectors +**Purpose:** MCP servers configured in the claude.ai web app. + +**Mechanism:** Connectors added at claude.ai/customize/connectors auto-appear in CC when active auth method is Claude.ai subscription (NOT loaded if ANTHROPIC_API_KEY/AUTH_TOKEN/apiKeyHelper/Bedrock/Vertex active). Fetched at runtime, shown with claude.ai indicator. Unused connectors collapsed behind 'Show unused connectors' (v2.1.161+). + +**Data model:** claude.ai connector precedence: lowest. A CC-configured server pointing at same URL hides the connector. + +**Config:** ENABLE_CLAUDEAI_MCP_SERVERS=false disables. Anthropic-hosted connectors (Microsoft 365, Gmail, Google Calendar) require claude.ai-side connect (v2.1.162+). + +## Key behaviors +- Scope name history: current 'local' was 'project'; current 'user' was 'global'. 'project' scope now means the shared .mcp.json file. Do not confuse MCP local scope (lives in ~/.claude.json) with general local settings (live in .claude/settings.local.json). +- Precedence on duplicate is winner-take-all per entire server entry (Local > Project > User > Plugins > claude.ai); fields are NOT merged. The 3 scopes dedupe by name; plugins and connectors dedupe by endpoint (URL/command). +- Project-scoped servers from .mcp.json REQUIRE interactive approval before use; status shows ⏸ Pending approval until approved / ✗ Rejected. Reset via `claude mcp reset-project-choices`. +- Server name `workspace` is reserved/skipped at load with a rename warning. +- streamable-http is an alias for http in the `type` field (so configs copied from MCP docs work unchanged). SSE is deprecated; http preferred. +- WebSocket (`type: ws`) cannot be added via `claude mcp add --transport` — only via .mcp.json or add-json. WS has no OAuth (header-only). HTTP is the only transport supporting OAuth + the --transport flag. +- Stdio servers are NOT auto-reconnected (local processes); http/sse auto-reconnect up to 5 attempts, 1s->doubling backoff. Initial connect retries up to 3x on transient errors since v2.1.121. +- Per-server `timeout` (ms) is a hard per-call wall-clock; progress notifications do NOT extend it. Values <1000 are IGNORED (fall through to MCP_TOOL_TIMEOUT default ~28h) since v2.1.162; before v2.1.162 they were floored to 1 second. HTTP/SSE first-byte budget min 60s. +- MAX_MCP_OUTPUT_TOKENS default 25000; warning at >10000 tokens. Oversized text persisted to disk + replaced by file ref unless tool sets _meta['anthropic/maxResultSizeChars'] (ceiling 500000). Image content always subject to token cap regardless. +- Tool Search ON by default: tools deferred, discovered via `ToolSearch` tool using beta `tool_reference` blocks. Disabled by default on Vertex AI and when ANTHROPIC_BASE_URL is non-first-party. Haiku lacks tool_reference support. ENABLE_TOOL_SEARCH=auto = upfront if <=10% context. alwaysLoad:true forces upfront + blocks startup (5s cap). +- Env var expansion `${VAR}` and `${VAR:-default}` works in command/args/env/url/headers of .mcp.json. Missing var with no default = config parse failure. CLAUDE_PROJECT_DIR must use a default like ${CLAUDE_PROJECT_DIR:-.} in project/user .mcp.json (plugin configs substitute it directly). +- MCP resources: `@server:protocol://path` @-mention; Claude Code auto-provides tools to list/read resources when server supports them; fuzzy-searched in @ autocomplete. MCP prompts: surface as `/mcp____ [args]` slash commands; names normalized (spaces->_). +- Dynamic updates: servers sending MCP `list_changed` notification cause auto-refresh of tools/prompts/resources without reconnect. +- Elicitation: servers can request structured input mid-task (form or URL mode) via MCP elicitation; auto-displayed; auto-respond via Elicitation hook. +- OAuth precedence: oauth.scopes > authServerMetadataUrl > discovered /.well-known scopes. offline_access auto-appended if advertised. 403 insufficient_scope triggers re-auth with same pinned scopes. headersHelper runs fresh each connect (no caching), overrides static headers, needs workspace trust at project/local scope. +- claude.ai connectors only load when active auth = Claude.ai subscription; disabled by ANTHROPIC_API_KEY/AUTH_TOKEN/apiKeyHelper/Bedrock/Vertex. ENABLE_CLAUDEAI_MCP_SERVERS=false disables. Some Anthropic-hosted connectors (MS 365, Gmail, Google Calendar) require claude.ai-side connect (v2.1.162+). +- Enterprise allowlist semantics: allowlist with only serverName entries is NOT a security control (user can name any server 'github'). serverUrl/serverCommand entries make name entries stop matching. Denylist always wins, always merges from all sources. +- managed-mcp.json empty mcpServers = MCP fully disabled; suppresses claude.ai connectors unless allowAllClaudeAiMcps:true (managed-source-only, v2.1.149+). + +## External interfaces +- CLI: claude mcp add [--transport http|sse|stdio] [--scope local|project|user] [--header "K: V"] [--env K=V] [--client-id] [--client-secret] [--callback-port N] [--channels] [args...]> +- CLI: claude mcp add-json '' [--scope user] [--client-secret] +- CLI: claude mcp add-from-claude-desktop +- CLI: claude mcp list | get | remove | reset-project-choices | serve +- In-session slash command: /mcp (status panel, OAuth, retry, clear auth) +- MCP prompt as slash command: /mcp____ [args] +- Resource @-mention: @::// +- Config files: .mcp.json (project root), ~/.claude.json (local+user), managed-mcp.json (system path) +- Env vars: MCP_TIMEOUT, MCP_TOOL_TIMEOUT, MAX_MCP_OUTPUT_TOKENS, ENABLE_TOOL_SEARCH, ENABLE_CLAUDEAI_MCP_SERVERS, MCP_CLIENT_SECRET, CLAUDE_PROJECT_DIR (injected into stdio child), CLAUDE_CODE_MCP_SERVER_NAME/URL (injected into headersHelper) +- Agent SDK: options.mcpServers{...}, options.allowedTools=["mcp____*"] +- Tool name surface: mcp____ ; plugin: mcp__plugin____ + +## Open questions +- Exact internal JSON-RPC initialize negotiation params and protocol version string Claude Code sends (likely '2025-03-26' or '2025-06-18'); not in public docs. +- Precise file/key format of the OAuth token store on disk and per-OS keychain service name. +- Whether `headersHelper` JSON merge is shallow-only and exact precedence vs `headers` beyond 'same name overrides'. +- Exact behavior of `WaitForMcpServers` internal tool name and its output schema when tool search is disabled. + +## Sources +- [Connect Claude Code to tools via MCP — official docs](https://code.claude.com/docs/en/mcp) — Primary source: transports, scopes, tool naming, OAuth, output limits, tool search, resources, prompts, elicitation, channels — the entire MCP subsystem reference. +- [Control MCP server access for your organization (managed-mcp) — official docs](https://code.claude.com/docs/en/managed-mcp) — Authoritative on managed-mcp.json paths/format, allowedMcpServers/deniedMcpServers matching rules, allowManagedMcpServersOnly, evaluation order, allowAllClaudeAiMcps. +- [MCP server-types deep dive — anthropics/claude-code repo](https://github.com/anthropics/claude-code/blob/main/plugins/plugin-dev/skills/mcp-integration/references/server-types.md) — First-party repo reference documenting stdio/sse/http/ws config shapes, lifecycles, ${CLAUDE_PLUGIN_ROOT} expansion, and comparison matrix. +- [Connect to external tools with MCP (Agent SDK) — official docs](https://code.claude.com/docs/en/agent-sdk/mcp) — Confirms exact tool naming convention mcp____, mcpServers option, allowedTools wildcard, .mcp.json loading via settingSources. +- [MCP Transports specification — modelcontextprotocol.io](https://modelcontextprotocol.io/specification/2025-03-26/basic/transports) — Underlying protocol spec for stdio, HTTP+SSE, and streamable-HTTP semantics that Claude Code implements. +- [Streamable HTTP specification (2025-03-26 / draft) — modelcontextprotocol.io](https://modelcontextprotocol.io/specification/draft/basic/transports/streamable-http) — Confirms streamable-http replaced HTTP+SSE in protocol version 2025-03-26, which Claude Code aliases to http. diff --git a/docs/claude-code-architecture/research/memory-claudemd.md b/docs/claude-code-architecture/research/memory-claudemd.md new file mode 100644 index 0000000..5cbf02e --- /dev/null +++ b/docs/claude-code-architecture/research/memory-claudemd.md @@ -0,0 +1,119 @@ +# Research: memory-claudemd + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code's memory subsystem has two parallel, complementary mechanisms. (1) CLAUDE.md files are human-authored instruction files loaded into every session as context (NOT enforced config) via a strict precedence hierarchy: managed-policy → user (~/.claude/CLAUDE.md) → project (./CLAUDE.md or ./.claude/CLAUDE.md) → local (./CLAUDE.local.md), all concatenated root-to-cwd and never overriding each other. CLAUDE.md supports `@path` import syntax (relative resolves against the importing file, not cwd; recursion capped at max depth 4 hops; HTML comments stripped before injection). (2) Auto memory (Claude-written, requires v2.1.59+) lives in ~/.claude/projects/<project>/memory/ keyed by git repo root (shared across worktrees), with MEMORY.md as a pointer-index (first 200 lines OR 25KB loaded into context) and topic .md files surfaced on-demand by a Sonnet side-query. A separate generic API "memory" tool (tool_type memory_20250818, name "memory") exists for SDK clients operating a /memories directory. The `#` prefix in the REPL quick-adds a memory to the relevant CLAUDE.md. CLAUDE.md content is injected as a USER message after the system prompt, and the InstructionsLoaded hook fires whenever any CLAUDE.md or .claude/rules/*.md enters context. + +## Components +### CLAUDE.md directory-walk + concatenation order +**Purpose:** Resolve and assemble all CLAUDE.md/CLAUDE.local.md into one context blob, root-to-cwd, no overriding. + +**Mechanism:** Claude Code walks up from cwd to (but not including) filesystem root, checking each dir for CLAUDE.md + CLAUDE.local.md. All discovered files are concatenated (not overridden), ordered root-down so cwd-level is read LAST. At each level CLAUDE.local.md is appended after CLAUDE.md. Subdirectory files load lazily on demand when Claude reads files there. Managed-policy + user + project-root files survive /compact (re-read from disk); nested subdir files do NOT auto-reinject. + +**Data model:** Files: CLAUDE.md, CLAUDE.local.md. Target size <200 lines (guideline). + +**Config:** Path: ./CLAUDE.md (lower precedence) then ./CLAUDE.local.md appended after at same level. Excludable via claudeMdExcludes. + +### Settings-scope precedence (managed → user → project → local) +**Purpose:** Determines which scope wins and how CLAUDE.md content is sourced from settings vs files. + +**Mechanism:** Managed-policy CLAUDE.md is highest precedence (above CLI args), loaded BEFORE user and project CLAUDE.md, and CANNOT be excluded by claudeMdExcludes. Three delivery mechanisms: server-managed (Claude.ai admin console), MDM/OS plist (macOS com.anthropic.claudecode domain / Windows HKLM\SOFTWARE\Policies\ClaudeCode registry 'Settings' JSON value), file-based managed-settings.json + drop-in managed-settings.d/. Settings precedence overall: Managed > CLI args > Local > Project > User. Permissions MERGE across scopes; most other settings OVERRIDE. + +**Data model:** managed-settings.json: {"claudeMd": "Always run make lint\nNever push to main"}. managed-settings.d/*.json merged systemd-style (alphabetical, arrays concat+dedup, objects deep-merged, dotfiles ignored). + +**Config:** OS-specific managed paths: macOS /Library/Application Support/ClaudeCode/CLAUDE.md; Linux/WSL /etc/claude-code/CLAUDE.md; Windows C:\Program Files\ClaudeCode\CLAUDE.md. Or in managed-settings.json via the `claudeMd` key (managed/policy scope only; ignored in user/project/local). + +### @import expansion + --add-dir +**Purpose:** Compose memory from multiple files; load memory from additional directories. + +**Mechanism:** Regex/token expansion of @-prefixed paths inside CLAUDE.md. First-encounter of EXTERNAL imports in a project triggers an approval dialog listing files; if declined, imports stay disabled and dialog does not reappear. AGENTS.md is NOT read natively — bridge via `@AGENTS.md` import or symlink. + +**Data model:** Loaded files: CLAUDE.md, .claude/CLAUDE.md, .claude/rules/*.md, CLAUDE.local.md (skipped if local excluded via --setting-sources). + +**Config:** Set CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1. + +### Auto memory (MEMORY.md index + topic files) +**Purpose:** Claude-written scratchpad: index always loaded, topic files surfaced on-demand. + +**Mechanism:** At session start, first 200 lines OR first 25KB of MEMORY.md (whichever first) is loaded into system prompt. Topic files are NOT loaded at startup. Per-turn, a Sonnet side-query scans up to 200 .md files (excluding MEMORY.md), extracts filename/mtime/description/type, returns JSON {selected_memories:[]} (max 256 tokens, up to 5 files), which are injected as `relevant_memories` attachments (NOT FileReadTool calls). Topic files use 2-step save: (1) write file with YAML frontmatter name/description/type, (2) add one-line pointer to MEMORY.md. Background autoDream consolidation fires after >=24h since last consolidation AND >=5 sessions, runs as forked agent, protected by .consolidate-lock PID file with 60-min stale guard. + +**Data model:** Files: MEMORY.md (index, <200 lines / 25KB), topic files with frontmatter name/description/type(one of: user, feedback, project, reference). Line format: '- [Title](file.md) — hook' (~150 chars). + +**Config:** Settings: autoMemoryEnabled (bool, default true), autoMemoryDirectory (absolute or ~/). Env: CLAUDE_CODE_DISABLE_AUTO_MEMORY=1. + +### memory tool (API tool_type memory_20250818) +**Purpose:** Generic file-based memory CRUD primitive (API/SDK clients), distinct from Claude Code's built-in auto-memory. + +**Mechanism:** Client-side tool; the app implements handlers. Claude auto-views /memories before tasks. Tool returns: directories listed 2-deep with human sizes (tab-separated, excluding dotfiles + node_modules); files returned with line numbers (6-char right-aligned, tab sep, 1-indexed, max 999,999 lines). Auto system-prompt injection: 'IMPORTANT: ALWAYS VIEW YOUR MEMORY DIRECTORY BEFORE DOING ANYTHING ELSE. MEMORY PROTOCOL...'. NOTE: this is the API/SDK memory tool, distinct from Claude Code's built-in auto-memory subsystem — Claude Code's auto-memory does not expose this tool by default; the CLI uses its own filesystem-based memory instead. + +**Data model:** Tool type 'memory_20250818', name 'memory'. Commands: view{path,view_range?}, create{path,file_text}, str_replace{path,old_str,new_str}, insert{path,insert_line,insert_text}, delete{path}, rename{old_path,new_path}. Paths confined to /memories/. + +**Config:** Subclass betaMemoryTool (TS) / BetaAbstractMemoryTool (Python/C#) / BetaMemoryToolHandler (Java). Tool name='memory'. Must restrict to /memories dir, validate canonical paths, reject ../ sequences and URL-encoded traversal. + +### InstructionsLoaded hook +**Purpose:** Observability for memory/rules loading. + +**Mechanism:** Fires at session start AND when files lazily load mid-session (e.g. subdir CLAUDE.md read, path-glob rule triggered, @import include resolved, /compact re-inject). Matcher field = load reason. Non-blocking (exit code ignored), cannot decision-control; useful for logging which files load and why. + +**Data model:** Hook stdin JSON includes load_reason field. JSON output via exit 0 stdout. hookSpecificOutput.hookEventName='InstructionsLoaded'. + +**Config:** Hooks key: InstructionsLoaded with matcher values session_start|nested_traversal|path_glob_match|include|compact. Exit code ignored (non-blocking). Output capped 10,000 chars. + +### .claude/rules/ path-scoped rules +**Purpose:** Modular, conditional memory injection scoped to file globs. + +**Mechanism:** Rules in .claude/rules/*.md are discovered recursively. Those with a `paths:` frontmatter field only inject when Claude reads a file matching the glob. User-level rules load before project rules (lower precedence). Trigger on file read, not every tool use. Symlinks supported, circular handled. Loaded on demand when matching files opened. Also loadable from --add-dir dirs when CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1. + +**Data model:** YAML frontmatter `paths: ["src/api/**/*.ts"]`. Rules WITHOUT paths frontmatter load unconditionally at launch at .claude/CLAUDE.md priority. + +**Config:** Rule files in .claude/rules/ (recursive) or ~/.claude/rules/. frontmatter: paths: [globs]. + +## Key behaviors +- CLAUDE.md is CONTEXT, NOT config — injected as a user message AFTER the system prompt, never guaranteed to be followed. To hard-enforce behavior use PreToolUse hooks or managed settings permissions.deny. +- Concatenation is root-to-cwd, cwd-level read LAST; per level CLAUDE.local.md appended after CLAUDE.md. Files never override each other across the tree. +- Block-level HTML comments are STRIPPED before context injection (saves tokens). Comments INSIDE code fences are preserved. Read tool shows comments unstripped. +- @import relative paths resolve relative to the file CONTAINING the import, NOT cwd. Both relative and absolute paths allowed. Home-dir imports (@~/.claude/x.md) for cross-worktree sharing. +- @import recursion MAX DEPTH = 4 hops (per current official docs code.claude.com/docs/en/memory). NOTE: several third-party write-ups and some mirror sites say 5; the canonical Anthropic doc states 4 — verify against live docs before hardcoding. +- Auto memory needs Claude Code v2.1.59+. MEMORY.md load cap: first 200 lines OR first 25KB, whichever first; content beyond NOT loaded at start. CLAUDE.md is loaded in FULL regardless of length (no 200-line hard cap, but adherence degrades). +- Project path in ~/.claude/projects//memory/ is derived from the GIT REPO root, so all worktrees + subdirs in one repo share ONE auto-memory dir. Outside a git repo, project root is used. +- autoMemoryDirectory must be absolute or start with ~/. When set in .claude/settings.json or settings.local.json, honored only AFTER workspace trust dialog accepted (same gate as hooks). +- claudeMdExcludes matches ABSOLUTE file paths via glob, configurable at any settings layer, arrays MERGE across layers. Managed-policy CLAUDE.md is NEVER excludable. +- Subagents can maintain their own auto memory (per-subagent memory dirs). +- Topic files surfaced by a Sonnet side-query (NOT FileReadTool): up to 5 files/turn, returned as JSON {selected_memories:string[]} max 256 tokens, injected as relevant_memories attachments, already-surfaced filtered out. +- autoDream background consolidation: triggers after >=24h since last consolidation AND >=5 sessions, forked subagent, 4 phases (orient/gather/consolidate/prune), PID lock file .consolidate-lock with 60-min stale guard, rollback rewinds mtime on failure. +- Topic file 4 types: user, feedback, project, reference. YAML frontmatter name/description/type. description is what Sonnet selector reads for relevance — vague = never surfaced. +- What NOT to save: code patterns/architecture/paths (derivable), git history (git log authoritative), debugging fixes (in commit msg), anything already in CLAUDE.md, ephemeral task details. +- Managed settings parse tolerantly since v2.1.169: invalid entries stripped with warning, rest enforced. Security fields (allowedMcpServers, enforceAvailableModels, forceLoginOrgUUID, etc.) have per-field fail-closed behavior. +- Legacy Windows managed path C:\ProgramData\ClaudeCode\managed-settings.json removed in v2.1.75; must migrate to C:\Program Files\ClaudeCode\. +- Settings files are watched and hot-reloaded mid-session (permissions, hooks, apiKeyHelper) firing ConfigChange hook; but `model` and outputStyle are read-once at start (use /model or restart). +- # quick-add memory: typing '#' prefix in prompt triggers Claude Code to write the memory into the relevant CLAUDE.md file (had a regression bug on Windows, issue #14868, Dec 2025). + +## External interfaces +- File paths: ./CLAUDE.md, ./.claude/CLAUDE.md, ./CLAUDE.local.md, ~/.claude/CLAUDE.md, ~/.claude/rules/*.md, .claude/rules/*.md, ~/.claude/projects//memory/MEMORY.md + topic .md files +- Managed CLAUDE.md paths: macOS /Library/Application Support/ClaudeCode/CLAUDE.md | Linux/WSL /etc/claude-code/CLAUDE.md | Windows C:\Program Files\ClaudeCode\CLAUDE.md +- managed-settings.json + managed-settings.d/*.json drop-in dir in same system dir (drop-in requires v2.1.x+) +- Settings keys: claudeMd (managed-only), claudeMdExcludes (glob array, mergeable), autoMemoryEnabled (bool), autoMemoryDirectory (abs or ~/), --setting-sources, --add-dir flag +- Env vars: CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1, CLAUDE_CODE_DISABLE_AUTO_MEMORY=1, CLAUDE_CODE_NEW_INIT=1 +- API memory tool: tools=[{"type":"memory_20250818","name":"memory"}], path root /memories/, commands view/create/str_replace/insert/delete/rename +- CLI commands: /init, /memory +- Hook event: InstructionsLoaded (matcher values: session_start, nested_traversal, path_glob_match, include, compact) +- UI keybinding: '#' prefix in prompt = quick-add memory to CLAUDE.md + +## Open questions +- EXACT import recursion depth: official docs say max 4 hops, but several mirrors/third-party deep-dives say 5 — needs live re-verification against code.claude.com/docs/en/memory and the actual MAX_IMPORT_DEPTH constant in source. +- Exact JSON schema of the InstructionsLoaded hook stdin payload (full field list, not just load_reason) — not fully captured; would need the hooks reference #hook-events section. +- Whether Claude Code's built-in auto-memory uses the SAME memory_20250818 tool under the hood or a separate proprietary filesystem layer (manavgup deep-dive implies a separate subsystem: memdir/autoDream/extractMemories services, NOT the API memory tool). +- Exact '' directory-name hashing/encoding scheme used under ~/.claude/projects//memory/ (how repo path -> folder name). +- Whether the Sonnet-side-query memory surfacing (up to 5 files, 256-token JSON) is documented officially or only reverse-engineered — official docs only state 'first 200 lines/25KB loaded'. + +## Sources +- [How Claude remembers your project — Claude Code Docs (code.claude.com/docs/en/memory)](https://code.claude.com/docs/en/memory) — Canonical source for the full memory subsystem: CLAUDE.md hierarchy table, @import 4-hop limit, walk-up resolution order, CLAUDE.local.md appending, auto memory (MEMORY.md 200-line/25KB cap, ~/.claude/projects//memory/, autoMemoryEnabled/Directory/CLAUDE_CODE_DISABLE_AUTO_MEMORY, v2.1.59+ requirement, compaction survival, claudeMd managed key, claudeMdExcludes, --add-dir env, InstructionsLoaded hook reference, .claude/rules/ path-scoping. +- [Claude Code settings — Claude Code Docs (code.claude.com/docs/en/settings)](https://code.claude.com/docs/en/settings) — Authoritative settings-scope precedence (Managed > CLI > Local > Project > User), managed-settings.json locations per OS, managed-settings.d/ drop-in systemd-style merge, managed CLAUDE.md path equivalence, v2.1.75 Windows legacy-path removal, v2.1.169 tolerant parsing, hot-reload + ConfigChange hook, model/outputStyle read-once. +- [Memory tool — Claude API Docs (platform.claude.com/docs/en/agents-and-tools/tool-use/memory-tool)](https://platform.claude.com/docs/en/agents-and-tools/tool-use/memory-tool) — Defines the API memory tool (type memory_20250818, name memory, commands view/create/str_replace/insert/delete/rename, /memories dir, path-traversal security, return formats, auto MEMORY PROTOCOL prompt). Distinct from Claude Code's built-in auto-memory. +- [Hooks reference — Claude Code Docs (code.claude.com/docs/en/hooks)](https://code.claude.com/docs/en/hooks) — Confirms InstructionsLoaded event exists, fires at session start + lazy load, matcher = load reason (session_start, nested_traversal, path_glob_match, include, compact), exit code ignored (non-blocking), plus full hook lifecycle including PreCompact/PostCompact relevant to memory re-injection. +- [09 — Memory System · Inside Claude Code (manavgup.github.io/shipai)](https://manavgup.github.io/shipai/deep-dives/claude-code/09-memory.html) — Reverse-engineered internals: src/memdir/autoDream/extractMemories services, MEMORY.md pointer-index format, 4 memory types (user/feedback/project/reference), Sonnet side-query surfacing (up to 5 files, 256-token JSON), autoDream 24h+5-session trigger with .consolidate-lock 60-min stale guard, 200-line/25KB truncation detail. Useful for a faithful reimplementation even though it's community-sourced. +- [[BUG] # memory shortcut no longer saves to CLAUDE.md — anthropics/claude-code#14868](https://github.com/anthropics/claude-code/issues/14868) — Confirms the '#' prefix quick-add-memory-to-CLAUDE.md behavior is a real, official feature (and documents a Dec 2025 Windows regression). +- [Boris Cherny Threads post — '#' quick-add memory announcement](https://www.threads.com/@boris_cherny/post/DHq60G7vkNz) — Anthropic staff announcement confirming '#' prefix writes memories to CLAUDE.md files. diff --git a/docs/claude-code-architecture/research/permissions.md b/docs/claude-code-architecture/research/permissions.md new file mode 100644 index 0000000..2a5be91 --- /dev/null +++ b/docs/claude-code-architecture/research/permissions.md @@ -0,0 +1,137 @@ +# Research: permissions + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code's permission system layers three independent mechanisms: (1) six session-level permission MODES (default, acceptEdits, plan, auto, dontAsk, bypassPermissions) that set the auto-approval baseline; (2) pattern-based RULE LISTS (allow/ask/deny) in settings.json (and via --allowedTools/--disallowedTools) that are evaluated in fixed order deny->ask->allow with first-match-wins regardless of specificity; and (3) a runtime INTERACTIVE callback (`canUseTool` in SDK; `control_request`/`control_response` NDJSON over stdin/stdout in headless CLI). Rules are enforced by the harness, never the model — CLAUDE.md/prompt text only shapes what Claude attempts, not what is allowed. Deny rules at ANY settings scope cannot be overridden (managed > CLI args > local project > shared project > user). The system is heavily version-evolved (2025-2026): `auto` mode (v2.1.83+, research preview, server-side classifier, fallback at 3-consecutive/20-total blocks), `dontAsk` (locked-down CI), `acceptEdits`/`auto`/`plan` aliases, protected-path write guards (bypass no longer prompts as of v2.1.126), and `additionalDirectories` for multi-root file access. The Go replica must implement the exact 6-step SDK evaluation order, the exact rule syntax (gitignore-style path anchors for Read/Edit, glob for Bash with process-wrapper stripping and compound-command splitting, domain: prefix for WebFetch), and the exact NDJSON control protocol for tool approvals. + +## Components +### Permission Modes +**Purpose:** Global session-level policy controlling how often tools pause for approval. + +**Mechanism:** Shift+Tab cycles default->acceptEdits->plan. Enabled optional modes slot in after plan in order: bypassPermissions first, auto last. auto appears only via opt-in; dontAsk never appears in cycle (set via flag). bypassPermissions requires startup with --permission-mode bypassPermissions / --dangerously-skip-permissions / --allow-dangerously-skip-permissions (the --allow- variant adds to cycle without activating). On Linux/macOS bypassPermissions refuses to run as root/sudo (check auto-skipped inside recognized sandbox). Modes set the baseline; deny+explicit-ask rules apply in EVERY mode including bypassPermissions. + +**Data model:** PermissionMode = "default" | "acceptEdits" | "plan" | "auto" | "dontAsk" | "bypassPermissions". (Python SDK Literal only declares 4: default/acceptEdits/plan/bypassPermissions; CLI also supports auto and dontAsk.) + +**Config:** settings.json under `permissions.defaultMode`. CLI flag `--permission-mode ` overrides for one session. Valid values: default, acceptEdits, plan, auto, dontAsk, bypassPermissions. + +### Permission Rules (allow/ask/deny) +**Purpose:** Per-tool, pattern-based pre-approval / forced-prompt / block lists in settings.json. + +**Mechanism:** Evaluation order: DENY -> ASK -> ALLOW; first match wins regardless of specificity. A matching ASK prompts even when a more specific ALLOW also matches. Bare-name deny (e.g. `Bash`) removes the tool from Claude's context before evaluation; only scoped deny (e.g. `Bash(rm *)`) is matched at the per-call step. Enforced by Claude Code, NOT by the model (CLAUDE.md only shapes behavior, doesn't grant access). + +**Data model:** Rule = `Tool` | `Tool(specifier)`. `Bash`/`Bash(*)` = all uses (as deny, removes tool from model context entirely). Scoped deny like `Bash(rm *)` leaves tool available, blocks matching calls. + +**Config:** Keys live under top-level `permissions` object. Precedence (high->low): Managed > CLI args > local project (.claude/settings.local.json) > shared project (.claude/settings.json) > user (~/.claude/settings.json). Deny at ANY level cannot be overridden. Settings files are hot-reloaded (permissions/hooks/ConfigChange hook fire). + +### Bash Pattern Matching +**Purpose:** Match shell commands against allow/deny rules with prefix/suffix/wildcard globs. + +**Mechanism:** Glob `*` matches any chars including spaces (one wildcard spans multiple args). Space before `*` enforces word boundary: `Bash(ls *)` matches `ls -la` not `lsof`; `Bash(ls*)` matches both. Trailing `:*` is equivalent to trailing ` *` but ONLY at end of pattern. Claude Code is shell-operator-aware: command separators (&& || ; | |& & newline) split compound commands and EACH subcommand must match independently. Approving compound `git status && npm test` saves up to 5 separate rules (e.g. just `npm test`). Built-in read-only commands run without prompt in every mode: ls, cat, echo, pwd, head, tail, grep, find, wc, which, diff, stat, du, cd, and read-only git forms. Read-only forms allow unquoted globs; write/exec-capable flags (find -delete, sort, sed, git) still prompt. + +**Data model:** Separators: && || ; | |& & . Stripped wrappers: timeout, time, nice, nohup, stdbuf, bare xargs (no flags). NOT stripped: direnv exec, devbox run, mise exec, npx, docker exec (so `Bash(devbox run *)` matches anything after run). Exec wrappers (watch, setsid, ionice, flock) and find -exec/-delete always prompt. + +**Config:** Read-only set is built-in and NOT configurable (override via ask/deny rule). + +### Read/Edit Path Rules +**Purpose:** File-path-scoped allow/deny using gitignore-style patterns with 4 anchor types. + +**Mechanism:** Read rules apply to Read + Grep + Glob + @file mentions + IDE-open-file context. Edit rules apply to all built-in editing tools AND file commands recognized in Bash (cat, head, tail, sed) — but NOT arbitrary subprocesses. Four anchor types: `//abs/path` (filesystem root), `~/path` (home), `/path` (PROJECT ROOT, not absolute!), `path`/`./path` (cwd). A pattern like `/Users/alice/file` is relative to project root, NOT absolute. Windows paths normalized to POSIX (C:\Users\alice -> /c/Users/alice). + +**Data model:** Symlink rule: Allow requires BOTH symlink path AND target to match; Deny fires if EITHER matches. `*` = within one segment, `**` = across directories. Bare filename = gitignore semantics (any depth): `Read(.env)` == `Read(**/.env)`. + +**Config:** cd into working/additional dir is read-only; cd + git in one compound always prompts. + +### WebFetch + Sandbox Interaction +**Purpose:** Network/domain gating, complementary to OS sandbox. + +**Mechanism:** WebFetch rules use `domain:` prefix matching hostname (case-insensitive, trailing `.` stripped). `*` matches across `.` ONLY as leading `*.` or whole pattern; elsewhere within one label. Exact rule beats wildcard when both match. Sandbox (Bash-only, OS-level) merges with permissions: filesystem boundary = sandbox.filesystem + Read/Edit deny; network boundary = WebFetch rules + allowedDomains/deniedDomains. + +**Data model:** Network deny: WebFetch rules + sandbox deniedDomains both apply (deny-first). + +**Config:** autoAllowBashIfSandboxed: true (default) lets sandboxed Bash skip bare-Bash ask rule. + +### Settings Precedence + Managed-Only +**Purpose:** Merge rules across scopes with deny-wins semantics; org-level enforcement. + +**Mechanism:** High-precedence settings that cannot be overridden. Managed-only keys include allowManagedPermissionRulesOnly (only managed allow/ask/deny apply), disableBypassPermissionsMode, disableAutoMode. Precedence: Managed > CLI args > Local project > Shared project > User. If denied at any level, nothing can allow it. Embedder can tighten (not loosen) via managedSettings when parentSettingsBehavior=merge. + +**Data model:** Source enum: userSettings | projectSettings | localSettings | session. Behavior enum: allow | deny | ask. Update.type: addRules | replaceRules | removeRules | setMode | addDirectories | removeDirectories. + +**Config:** disableAutoMode / disableBypassPermissionsMode set to "disable" (any scope, typically managed). allowManagedPermissionRulesOnly prevents user/project allow/ask/deny rules. + +### canUseTool Callback (SDK) +**Purpose:** Runtime interactive approval surfaced to embedding application. + +**Mechanism:** SDK exposes `canUseTool(tool_name, input, context)` callback returning PermissionResultAllow (with updated_input + optional updated_permissions for 'always allow') or PermissionResultDeny (with message). In Python this callback requires streaming mode AND a PreToolUse hook returning {continue_:true} to keep the stream open. The callback can be pending indefinitely (defer decision to resume later). Also fires for AskUserQuestion clarifying questions. Hooks run BEFORE canUseTool and can allow/deny/modify. + +**Data model:** types.py: PermissionResultAllow{behavior:"allow", updated_input, updated_permissions?}; PermissionResultDeny{behavior:"deny", message, interrupt?}. ToolPermissionContext{signal, suggestions: [PermissionUpdate]}. CanUseTool = Callable[[str, dict, ToolPermissionContext], Awaitable[PermissionResult]]. + +**Config:** Output format determined by --output-format (text|stream-json|json). + +### NDJSON Control Protocol (CLI stdio) +**Purpose:** Wire protocol for embedding hosts to receive/approve permission prompts. + +**Mechanism:** Headless CLI driven by host over stdin/stdout NDJSON. With `--permission-prompt-tool stdio`, when a tool needs approval CLI emits a `control_request` (subtype `can_use_tool`) and BLOCKS (~60s default) until host replies with matching `control_response`. Allow MUST include `updatedInput` (original or modified); deny MUST include `message`; request_id must match. Without this flag tools auto-deny in non-interactive mode. Dynamic mid-session mode switch via control_request subtype `set_permission_mode`. + +**Data model:** control_request{type, request_id, request:{subtype:"can_use_tool"|"set_permission_mode", tool_name, input, decision_reason?, tool_use_id?, permission_suggestions?, mode?}}. control_response{type, response:{subtype:"success", request_id, response:{behavior:"allow"|"deny", updatedInput|message}}}. + +**Config:** Flags required: --output-format stream-json --input-format stream-json --verbose --permission-prompt-tool stdio. DEBUG_CLAUDE_AGENT_SDK=1 or --debug for logs. + +### Auto Mode Classifier +**Purpose:** Background model classifier that approves/blocks actions to eliminate routine prompts. + +**Mechanism:** Auto mode (v2.1.83+, research preview) routes non-trivial actions to a server-side classifier model (independent of /model). Trusts working dir + configured remotes; everything else external. Reads + working-dir edits skip classifier; shell/network go through it. Blocked by default: curl|bash, sensitive data exfil, prod deploys, mass deletion, IAM grants, force push/push to main. On 3 consecutive OR 20 total blocks, auto mode pauses and resumes prompting; non-interactive `-p` mode aborts. Boundaries stated in conversation act as block signals (re-read from transcript each check, lost on compaction). + +**Data model:** Non-configurable thresholds. Classifier sees user msgs + tool calls + CLAUDE.md; tool results STRIPPED (separate server-side probe flags suspicious tool-result content). + +**Config:** On enter auto mode, dropped: Bash(*)/PowerShell(*), Bash(python*) wildcards, package-manager run commands, Agent allow rules. Narrow rules (Bash(npm test)) carry over. Restored on exit. + +### Protected Paths +**Purpose:** Circuit breaker preventing corruption of repo state and Claude's own config. + +**Mechanism:** A fixed set of dirs/files (repo state + Claude config + shell/package config) whose writes are never auto-approved except in bypassPermissions (as of v2.1.126). default/acceptEdits/plan -> prompt; auto -> classifier; dontAsk -> deny; bypassPermissions -> allow. Prompt for .claude/ write offers 'Yes, and allow Claude to edit its own settings for this session'. + +**Data model:** Dirs: .git, .config/git, .vscode, .idea, .husky, .cargo, .devcontainer, .yarn, .mvn, .claude (except .claude/worktrees). Files: .gitconfig, .gitmodules, .bashrc, .zshrc, .profile, .envrc, .npmrc, .yarnrc.yml, .pnp.cjs, .bazelrc, .pre-commit-config.yaml, lefthook.yml, gradle-wrapper.properties, .devcontainer.json, .mcp.json, .claude.json, etc. + +**Config:** permissions.allow rules do NOT pre-approve protected-path writes — safety check runs before allow rules. `.claude/worktrees` is exempt (Claude's own worktrees). + +## Key behaviors +- Six modes total: default, acceptEdits, plan, auto, dontAsk, bypassPermissions. The Python SDK PermissionMode Literal only declares 4 (default/acceptEdits/plan/bypassPermissions) — auto and dontAsk are CLI-level and TypeScript-only for `auto`. +- auto mode requires v2.1.83+ AND plan + model (Opus 4.6+/Sonnet 4.6 on Anthropic API; Opus 4.7/4.8 only on Bedrock/Vertex/Foundry) AND on Bedrock/Vertex/Foundry the env var CLAUDE_CODE_ENABLE_AUTO_MODE=1 (v2.1.158+). Admins set permissions.disableAutoMode="disable" to lock off. auto is IGNORED in project/local settings as of v2.1.142 (must be in ~/.claude/settings.json or managed). +- bypassPermissions as of v2.1.126 NO LONGER prompts for protected-path writes (earlier versions did). It still prompts for explicit ask rules and for rm targeting / or ~. Refuses to run as root/sudo on Linux/macOS (auto-skipped in recognized sandbox). disableBypassPermissionsMode="disable" blocks it. +- dontAsk mode auto-DENIES every prompt; only permissions.allow rules and read-only Bash commands execute; explicit ask rules are DENIED (not prompted). Cloud (web) sessions ignore defaultMode dontAsk and bypassPermissions from settings files. +- acceptEdits auto-approves: Edit/Write + filesystem Bash cmds (mkdir, touch, rm, rmdir, mv, cp, sed) + their safe prefixes (LANG=C, NO_COLOR=1) + wrappers (timeout/nice/nohup). Only for paths inside cwd or additionalDirectories. PowerShell: Set-Content, Add-Content, Clear-Content, Remove-Item + aliases. +- Rule specificity does NOT change evaluation order: deny -> ask -> allow, first match wins. A matching ask prompts even if a more-specific allow also matches the same call. +- Bash pattern word-boundary subtlety: `Bash(ls *)` (space before *) matches `ls -la` NOT `lsof`; `Bash(ls*)` matches both. `:*` suffix == trailing ` *` but only at END of pattern (`Bash(git:* push)` treats colon literally). +- Bash compound commands: separators && || ; | |& & newline each split into subcommands; EVERY subcommand must independently match. Approving `git status && npm test` saves up to 5 separate rules (one per subcommand needing approval). Wrappers timeout/time/nice/nohup/stdbuf and bare xargs are stripped BEFORE matching; direnv/devbox/mise/npx/docker exec are NOT. +- Read/Edit deny applies to built-in file tools + cat/head/tail/sed in Bash, but NOT to arbitrary subprocesses (python/node scripts). For OS-level enforcement use the sandbox. +- Symlink asymmetry: allow requires BOTH symlink path AND target to match; deny fires if EITHER matches. So symlink inside allowed dir pointing to denied file is blocked. +- WebFetch domain: `*` crosses `.` only as leading `*.` or whole pattern; `domain:github.*` matches github.io but NOT github.evil.com (anti-homograph). Exact rule beats wildcard in same list. +- MCP rule glob constraint: allow rules accept tool-name globs ONLY after literal `mcp____` prefix (server segment glob-free). Unanchored allow globs like `*` or `mcp__*` are SKIPPED with a startup warning. Deny/ask globs are unrestricted (`mcp__*`, `*`). +- auto mode on-enter drops broad allow rules: Bash(*)/PowerShell(*), Bash(python*) wildcard interpreters, package-manager run commands, Agent allow rules. Narrow rules like Bash(npm test) carry over. Restored on exit. +- auto mode fallback thresholds are NON-configurable: 3 consecutive blocks OR 20 total blocks -> pause and resume prompting. Any allowed action resets consecutive counter; total counter persists for session. Non-interactive -p mode aborts on repeated blocks. +- Settings precedence (high->low): Managed > CLI args > Local project (.claude/settings.local.json) > Shared project (.claude/settings.json) > User (~/.claude/settings.json). Deny at ANY level is final. Settings files are hot-reloaded. +- additionalDirectories in settings grants FILE ACCESS only; --add-dir flag additionally loads some config (skills, partial plugin settings, CLAUDE.md only if CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1). +- Allow rules don't constrain bypassPermissions: allowed_tools only pre-approves listed tools; unlisted tools fall through to mode where bypassPermissions approves everything. Use disallowed_tools to block specific tools in bypass. +- Subagent inheritance: parent bypassPermissions/acceptEdits/auto is inherited by ALL subagents and cannot be overridden per-subagent; any permissionMode in subagent frontmatter is IGNORED in auto mode. Classifier checks subagents at 3 points (spawn task desc, each action, return history). +- Hook decisions do NOT bypass deny/ask rules: a hook returning allow still gets deny/ask rules evaluated; a hook exit code 2 (block) takes precedence over allow rules. PreToolUse runs before the prompt; PermissionRequest hook is for notifications. +- Tool names containing _ or * are exempt from the 'unknown tool' startup warning; otherwise deny/ask rules matching no known tool emit a warning. + +## Open questions +- Exact default ~60s control_request blocking timeout value and whether it is configurable (docs say '~60s default', gist says not configurable). +- Whether SDKControlPermissionRequest (control can_use_tool) carries permission_suggestions populated by default in the CLI build, or only in SDK-wrapped modes. +- Exact behavior of the auto-mode classifier's server-side tool-result suspicious-content probe (separate from classifier) — implementation detail not fully documented. +- Full enumeration of which `git` subcommands are classified read-only by the built-in read-only command set (only 'read-only forms of git' is documented generically). + +## Sources +- [Configure permissions - Claude Code Docs](https://code.claude.com/docs/en/permissions) — Primary source: full rule syntax (Tool/Tool(specifier)), deny->ask->allow evaluation, Bash/PowerShell/Read/Edit/WebFetch/MCP/Agent/Cd per-tool semantics, symlink handling, protected paths list, hooks interaction, settings precedence, managed-only keys. +- [Choose a permission mode - Claude Code Docs](https://code.claude.com/docs/en/permission-modes) — Primary source for all 6 modes (default/acceptEdits/plan/auto/dontAsk/bypassPermissions), auto-mode classifier details (v2.1.83+, model/provider gating, 3-consecutive/20-total fallback, subagent 3-point checks), v2.1.126/v2.1.142 version-specific behavior, protected-path per-mode matrix, disable flags. +- [Configure permissions (Agent SDK) - Claude Code Docs](https://code.claude.com/docs/en/agent-sdk/permissions) — Authoritative 6-step SDK evaluation order (Hooks->Deny->Ask->Mode->Allow->canUseTool), allowed_tools/disallowed_tools semantics, subagent mode inheritance, dontAsk/bypassPermissions edge cases, plan-mode forces edits through canUseTool. +- [Handle approvals and user input (Agent SDK) - Claude Code Docs](https://code.claude.com/docs/en/agent-sdk/user-input) — canUseTool callback signature/args, PermissionResultAllow/Deny shapes, updated_input/updated_permissions for 'approve and remember', ToolPermissionContext.suggestions, AskUserQuestion routing, dummy PreToolUse hook requirement in Python. +- [claude_code_sdk/types.py (PermissionMode/PermissionUpdate/PermissionResult dataclasses)](https://github.com/anthropics/claude-code-sdk-python/blob/cfdd28a2/src/claude_code_sdk/types.py) — Exact Python dataclass shapes for PermissionMode, PermissionUpdateDestination(userSettings/projectSettings/localSettings/session), PermissionRuleValue, PermissionUpdate(addRules/replaceRules/removeRules/setMode/addDirectories/removeDirectories), PermissionResultAllow/Deny, ToolPermissionContext. +- [ToolPermissionRequest struct - claude_codes Rust crate (docs.rs)](https://docs.rs/claude-codes/latest/claude_codes/io/struct.ToolPermissionRequest.html) — Authoritative CLI wire struct: {tool_name, input, permission_suggestions, blocked_path, decision_reason, tool_use_id} + builder methods allow/allow_with/allow_and_remember confirming updatedInput + permissions shape. +- [claude-cli-agent-protocol skill (NDJSON control_request/control_response)](https://playbooks.com/skills/bohdan-shulha/skills/claude-cli-agent-protocol) — Concrete NDJSON examples for control_request (subtype can_use_tool/set_permission_mode) and control_response (behavior allow needs updatedInput, deny needs message, request_id match, ~60s block, --permission-prompt-tool stdio requirement). +- [Claude Code settings - Claude Code Docs](https://code.claude.com/docs/en/settings) — Exact permissions.* settings keys (allow/ask/deny/additionalDirectories/defaultMode/disableBypassPermissionsMode/disableAutoMode/skipDangerousModePermissionPrompt), defaultMode valid values incl v2.1.142 auto-restriction, config scopes, hot-reload behavior, managed-only allowManagedPermissionRulesOnly. diff --git a/docs/claude-code-architecture/research/sandbox-security.md b/docs/claude-code-architecture/research/sandbox-security.md new file mode 100644 index 0000000..841d68c --- /dev/null +++ b/docs/claude-code-architecture/research/sandbox-security.md @@ -0,0 +1,144 @@ +# Research: sandbox-security + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code's sandbox-security subsystem (v2.1.x, 2025-2026) is a defense-in-depth layering of three mechanisms: (1) an in-process permission rule engine (deny→ask→allow, with gitignore-style path and Bash-wildcard specifiers), (2) a 4-stage Bash-command static-analysis wrapper that classifies command text as read-only / dangerous / too-complex before it is matched against rules or executed, and (3) an OS-level Bash sandbox (macOS Seatbelt via sandbox-exec; Linux/WSL2 bubblewrap+bwrap+socat+seccomp) that confines filesystem writes to cwd+$TMPDIR and forces all network egress through a host-side allowlist proxy over a Unix socket. The sandbox was introduced Oct 20 2025 (Anthropic engineering blog) and open-sourced as @anthropic-ai/sandbox-runtime. Two sandbox modes exist: "auto-allow" (sandboxed Bash runs unprompted; the sandbox boundary replaces the prompt) and "regular permissions" (sandboxed commands still prompt). Even in auto-allow, explicit deny rules, content-scoped ask rules (e.g. Bash(git push *)), and rm/rmdir targeting /, $HOME, or critical paths still force prompts. Secrets/PII are handled by subprocess-env scrubbing (CLAUDE_CODE_SUBPROCESS_ENV_SCRUB), a 40+-rule gitleaks-based client-side secret scanner that redacts tool output before team-memory sync, OAuth-param redaction, and API-key truncation in the UI. The bypassPermissions mode (--dangerously-skip-permissions) is gated by a remote GrowthBook killswitch (tengu_disable_bypass_permissions_mode) and blocked when running as root/sudo. + +## Components +### Permission rule engine (deny→ask→allow) +**Purpose:** Decides whether a tool call (Bash, Read, Edit, WebFetch, MCP, Agent, Cd) is allowed, denied, or must prompt — before the tool runs. + +**Mechanism:** Each Bash command is parsed (Stage 1, see Bash wrapper) and split on separators && || ; | |& & and newlines into independent subcommands; each must independently match an allow rule for a compound command to be allowed. Before matching, a fixed built-in set of process wrappers is stripped: timeout, time, nice, nohup, stdbuf, and bare xargs (only when flag-less). Dev runners like npx/docker exec/devbox run/mise exec are NOT stripped. Read-only command set (ls, cat, echo, pwd, head, tail, grep, find, wc, which, diff, stat, du, cd, read-only git) is auto-allowed in every mode. Known issue (Adversa AI, v2.1.88): deny checks silently stop after 50 subcommands in one pipeline. Symlink-aware: allow requires BOTH symlink path and target to match; deny triggers if EITHER matches. + +**Data model:** Rule = {tool: string, behavior: 'allow'|'deny'|'ask', specifier: string|undefined}. Settings shape: {permissions:{allow:[...],deny:[...],ask:[...],defaultMode:'default'|'acceptEdits'|'plan'|'auto'|'dontAsk'|'bypassPermissions'}}. Known source files: utils/permissions/PermissionMode.ts, PermissionRule.ts, permissionRuleParser.ts, bashPermissions.ts, permissionSetup.ts. + +**Config:** settings.json `permissions.allow/ask/deny` arrays; `permissions.defaultMode`; `permissions.disableBypassPermissionsMode`; `permissions.disableAutoMode`. CLI flags `--allowedTools`, `--disallowedTools`. Managed-only: `allowManagedPermissionRulesOnly`. + +### Bash sandbox — OS-level isolation +**Purpose:** Wraps each Bash subprocess (and all its children) in an OS-enforced filesystem + network boundary so commands can be auto-allowed without per-command prompts. + +**Mechanism:** When enabled, every Bash invocation is wrapped by the sandbox-runtime (standalone `@anthropic-ai/sandbox-runtime`, CLI `srt`, Rust crate `sandbox-runtime-rs`) before spawn. (1) Filesystem: default write = cwd subtree + session $TMPDIR; default read = whole machine except certain denied dirs (note: ~/.aws/credentials and ~/.ssh/ are readable by default — admins must add denyRead). Writable region extended via allowWrite. git worktree shared .git is writable for refs/index but .git/hooks and .git/config remain denied. settings.json files at every scope and the managed-settings dir are always write-denied inside the sandbox so a command can't edit its own policy. (2) Network: all outbound traffic is forced through a host-side proxy (loopback). The sandbox grants socket access only to the proxy; the proxy consults allowedDomains/deniedDomains by requested hostname (no TLS termination, no inspection — documented domain-fronting limitation). On Linux the inner net namespace is unshared (bubblewrap --unshare-net) and socat relays localhost to the host proxy via a mounted Unix socket; on macOS Seatbelt blocks non-loopback traffic at the socket layer as a backstop for tools ignoring proxy env vars. First request to a new domain prompts the user (auto-allow mode) or is blocked (allowManagedDomainsOnly). (3) Escape hatch: if a sandboxed command fails due to restrictions, Claude may re-invoke the Bash tool with dangerouslyDisableSandbox=true; that retry runs UNSANDBOXED and goes through the regular permission flow. Setting allowUnsandboxedCommands:false ('Strict sandbox mode') ignores dangerouslyDisableSandbox entirely. + +**Data model:** {sandbox:{enabled:bool, autoAllowBashIfSandboxed:bool, allowUnsandboxedCommands:bool, failIfUnavailable:bool, excludedCommands:[...], filesystem:{allowRead:[...], allowWrite:[...], denyRead:[...], denyWrite:[...], allowManagedReadPathsOnly:bool}, network:{allowedDomains:[...], deniedDomains:[...], httpProxyPort:int, socksProxyPort:int, allowUnixSockets:[...], allowAllUnixSockets:bool, allowLocalBinding:bool, allowMachLookup:[...]}}}. Filesystem arrays MERGE across scopes (managed+user+project+local). enableWeakerNestedSandbox and enableWeakerNetworkIsolation are top-level booleans. + +**Config:** sandbox.enabled (bool); sandbox.autoAllowBashIfSandboxed (default true); sandbox.allowUnsandboxedCommands (bool/array); sandbox.failIfUnavailable (bool); sandbox.excludedCommands (array, e.g. ['docker *']); sandbox.network.httpProxyPort / socksProxyPort; sandbox.network.allowUnixSockets / allowAllUnixSockets / allowLocalBinding / allowMachLookup (macOS XPC); sandbox.network.allowManagedDomainsOnly (managed-only). + +### Platform backends (Seatbelt / bubblewrap) +**Purpose:** Provide the actual OS primitives that enforce fs+net restrictions per platform. + +**Mechanism:** At startup Claude Code probes for the platform backend. macOS: /usr/bin/sandbox-exec present → Seatbelt. Linux/WSL2: bubblewrap (bwrap) + socat + (optional) the seccomp filter from @anthropic-ai/sandbox-runtime which blocks Unix domain sockets. If the backend is missing or platform unsupported (native Windows, WSL1), Claude warns and runs unsandboxed unless sandbox.failIfUnavailable=true. WSL1 unsupported (bubblewrap needs WSL2 kernel features). Ubuntu 24.04+ needs an AppArmor profile granting bwrap userns. + +**Data model:** macOS Seatbelt profile is SBPL text emitted with separate rules: `(allow file-write* (subpath ...))`, `(deny file-read* (subpath ...))` + re-allow `(allow file-read* (subpath ...))`. BUG (issue #39635, v2.1.85): the profile historically used `require-not` inside a deny clause, which is invalid SBPL and makes sandbox-exec abort → all bash silently fails exit 1. Valid generation requires separate deny then allow rules. + +**Config:** Drives sandbox selection via runtime probe. failIfUnavailable converts the silent unsandboxed fallback into a hard startup failure (for managed deployments). + +### Filesystem & network boundary config +**Purpose:** Define exactly which paths and domains the sandbox permits/blocks. + +**Mechanism:** Default read = entire machine minus denied set; default write = cwd + $TMPDIR. Path-prefix resolution table: '/x' absolute (stays /x), '~/x' -> $HOME/x, './x' or bare 'x' -> relative to project root for project settings OR relative to ~/.claude for user settings (so '.' in user settings resolves to ~/.claude, not the project — a known footgun). allowRead re-allows inside a denyRead region. Filesystem arrays from multiple scopes MERGE (combined, not replaced). Permission rules (Read/Edit allow and deny) and sandbox.filesystem paths are MERGED into the final sandbox boundary. Network merges WebFetch allow rules + sandbox.allowedDomains; deniedDomains blocks even when a wildcard would otherwise allow. Managed-only lockdowns: allowManagedReadPathsOnly and allowManagedDomainsOnly ignore user/project/local entries. + +**Data model:** denyWrite/allowWrite/allowRead/denyRead are string arrays. Path-prefix table: '/' absolute; '~/' home; './' or bare project-root-relative. Distinct from Read/Edit permission rule path syntax (which uses '//abs', '/proj', '~/home'). Network: allowedDomains/deniedDomains are hostname strings with '*' wildcards. + +**Config:** sandbox.filesystem.allowWrite / denyWrite / allowRead / denyRead; sandbox.network.allowedDomains / deniedDomains. + +### Bash wrapper multi-stage validation +**Purpose:** Parse, classify, and gate Bash command text before execution / permission matching; defends against parser-differential and shell-quoting attacks. + +**Mechanism:** Stage 1 AST parse (tree-sitter-bash; fallback shell-quote+regex in external builds) with allowlist of safe node types — anything unhandled -> 'too-complex' requiring approval (fail-closed; PARSE_ABORTED distinguishes timeout/panic). Stage 2 (bashSecurity.ts): 23+ checks for command substitution $(...) and backticks, process substitution <(...) >(..), IFS injection, control chars, Unicode whitespace (U+00A0, U+2000-200B), brace expansion with quotes, heredoc extraction; plus zsh-specific bypass detection (=cmd expansion, =(cmd) process sub, zmodload/zpty/ztcp, PowerShell <# comments). Stage 3 semantic: only static >/dev/null and 2>&1 redirections are stripped; dynamic targets (vars, command subst, globs, tilde) reject and prompt. Stage 4 permission match against argv[0]+subcommands. In auto mode, dangerous-pattern rules are auto-stripped so Bash(python:*) etc. can't auto-approve code execution. + +**Data model:** BASH_SECURITY_CHECK_IDS enum (23+ ids, bashSecurity.ts lines 76-101). DANGEROUS_BASH_PATTERNS list (all-users) + ANT-only extension list (dangerousPatterns.ts lines 58-79). Unknown AST nodes become `too-complex` sentinel. Failed parse -> PARSE_ABORTED sentinel. + +**Config:** Gated by build-time `USER_TYPE === 'ant'` for the extended list (curl/wget/git/gh/kubectl/aws/gcloud/gsutil/sudo/zsh/fish/eval/exec/env/xargs). TRANSCRIPT_CLASSIFIER build flag gates the auto-mode ML classifier. + +### Shell quoting & provider security +**Purpose:** Prevent injection when assembling the command line passed to the shell. + +**Mechanism:** spawn() with a separate args array, never shell:true with raw input. The shell provider wraps the command: bash disables extglob and wraps the payload in eval for alias expansion; PowerShell uses -EncodedCommand base64 UTF-16LE (not -Command). pwd captured via `pwd -P >| quoted_path`. O_NOFOLLOW on file opens prevents symlink attacks. Heredocs are extracted before parsing and restored after to work around shell-quote limitations. Command separators recognized for splitting: && || ; | |& & and newlines. 'Yes dont ask again' on a compound command saves up to 5 separate per-subcommand rules. + +**Data model:** Token normalization uses a cryptographic placeholder salt (8 random bytes hex) so injected placeholder tokens can't collide. Quoted patterns preserved; unquoted globs allowed only when every flag is read-only. + +**Config:** Process wrapper stripping list is hardcoded and NOT configurable. Exec wrappers (watch, setsid, ionice, flock) and find -exec/-delete always prompt. + +### Sandbox↔permission interaction & circuit breakers +**Purpose:** Define how the OS sandbox boundary composes with the in-process permission system and which prompts can never be suppressed. + +**Mechanism:** Auto-allow mode (default when sandbox enabled) runs sandboxed commands without prompts; the sandbox boundary substitutes for the prompt. Even so, these always still apply: explicit deny rules; rm/rmdir targeting /, home, or critical system paths; content-scoped ask rules like Bash(git push *); a bare Bash ask rule is skipped for sandboxed commands but still applies to commands that fall back to unsandboxed. bypassPermissions mode (--dangerously-skip-permissions) skips prompts but STILL prompts for explicit ask rules and for rm -rf /, rm -rf ~, and writes to protected dirs (.git, .claude, .vscode, .idea, .husky, .cargo, .devcontainer, .yarn, .mvn, .config/git); blocked entirely when running as root/sudo on Linux/macOS unless inside a recognized sandbox. + +**Data model:** PermissionMode enum: default, plan, acceptEdits, bypassPermissions, dontAsk, auto. Modes default to prompting; deny rules from ANY scope (managed/user/project/local) always win and cannot be overridden at any other scope. + +**Config:** sandbox.autoAllowBashIfSandboxed (default true). bypassPermissions gated by remote killswitch gate `tengu_disable_bypass_permissions_mode` (GrowthBook/Statsig, fail-open). permissions.disableBypassPermissionsMode and permissions.disableAutoMode = 'disable' to forbid. + +### Secret/PII handling in tool results & subprocess env +**Purpose:** Prevent credential leakage via subprocess env, tool output, logs, team-memory sync, and error messages. + +**Mechanism:** Credentials: macOS Keychain (hex-encoded so invisible in process monitors) with plaintext fallback to ~/.claude/.credentials.json at 0o600 with explicit user warning. API keys never logged; auth status logged only as booleans; keys truncated in UI (sk-ant-...{last}). When CLAUDE_CODE_SUBPROCESS_ENV_SCRUB is set (auto in GitHub Actions with untrusted content), subprocessEnv.ts strips Anthropic/cloud/GitHub-Actions secrets from child envs before spawning Bash. Client-side secretScanner (40+ gitleaks rules) replaces detected secrets with [REDACTED] before uploading to team memory. OAuth params (state/nonce/code_challenge/code_verifier/code) redacted from logs via redactSensitiveUrlParams. Undercover mode (ant-only) strips internal codenames/versions from commits and PRs. + +**Data model:** Scrubbed env var categories: Anthropic (ANTHROPIC_API_KEY, CLAUDE_CODE_OAUTH_TOKEN, ANTHROPIC_AUTH_TOKEN, ANTHROPIC_FOUNDRY_API_KEY, ANTHROPIC_CUSTOM_HEADERS), OTEL (*_HEADERS for LOGS/METRICS/TRACES), cloud (AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, AWS_BEARER_TOKEN_BEDROCK, GOOGLE_APPLICATION_CREDENTIALS, AZURE_CLIENT_SECRET, AZURE_CLIENT_CERTIFICATE_PATH), GitHub Actions (ACTIONS_ID_TOKEN_REQUEST_TOKEN/URL, ACTIONS_RUNTIME_TOKEN/URL, ALL_INPUTS, OVERRIDE_GITHUB_TOKEN, DEFAULT_WORKFLOW_TOKEN, SSH_SIGNING_KEY) plus INPUT_ duplicates. GITHUB_TOKEN/GH_TOKEN intentionally NOT scrubbed. secretScanner.ts: 40+ gitleaks rules -> [REDACTED]. + +**Config:** CLAUDE_CODE_SUBPROCESS_ENV_SCRUB=1. plainTextStorage path ~/.claude/.credentials.json (0o600). Keychain uses hex encoding. redactSensitiveUrlParams strips state/nonce/code_challenge/code_verifier/code. + +### WebFetch security (preapproved domains, SSRF) +**Purpose:** Constrain Claude's own web fetches against SSRF, malicious domains, and redirect loops. + +**Mechanism:** Max URL length 2000 chars, max HTTP content 10MB, fetch timeout 60s, max 10 redirects, markdown truncation 100K chars. Blocks embedded user:password URLs, single-label hostnames (<2 domain parts), HTTP->HTTPS auto-upgrade. Only same-origin redirects allowed (www. variants OK); cross-domain needs approval. Preflight domain_info query to api.anthropic.com (10s timeout, 5-min LRU TTL; URL content cached 15 min). 130+ preapproved doc/registry domains for GET-only WebFetch (curated; not inherited by sandbox; some allow uploads so unsafe for unrestricted net). file:// implicitly blocked via empty-hostname parts<2 check. + +**Data model:** Preapproved list is WebFetch-GET-only and explicitly NOT inherited by the sandbox fs/net boundary. Path-prefix match uses segment boundary: pathname===p || pathname.startsWith(p+'/'). + +**Config:** permissions.deny WebFetch(domain:...) and sandbox.network.deniedDomains combine. WebFetch allow/deny rules and sandbox allowedDomains merge for the sandbox network boundary. + +## Key behaviors +- Default read policy is the WHOLE machine (including ~/.ssh and ~/.aws/credentials) — only writes are confined to cwd+$TMPDIR. Add denyRead for credential dirs. This is a frequent footgun for re-implementors who assume read is also confined. +- Permission precedence is deny>ask>allow with NO specificity override: a matching ask rule prompts even when a more specific allow also matches. Deny from ANY settings scope (managed>CLI>local project>shared project>user) cannot be overridden by allow at any other scope. +- Bash compound commands are split on && || ; | |& & and newlines; EACH subcommand must independently pass. Approving a compound with 'Yes, dont ask again' saves up to 5 separate per-subcommand rules (not one rule for the whole string). +- Process wrappers stripped before matching: timeout, time, nice, nohup, stdbuf, and bare (flag-less) xargs only. npx/docker exec/devbox run/mise exec are NOT stripped — Bash(devbox run *) matches everything after 'run' including 'devbox run rm -rf .'. Exec wrappers watch/setsid/ionice/flock always prompt. +- Space before '*' matters: Bash(ls *) matches 'ls -la' (word boundary) but not 'lsof'; Bash(ls*) matches both. Trailing ':*' is equivalent to trailing ' *' and is only recognized at the very end of a pattern. +- A bare tool-name deny (e.g. 'Bash' or 'mcp__*') REMOVES the tool from Claude's context entirely (Claude never sees it). A scoped deny ('Bash(rm *)') leaves the tool visible and blocks matching calls at runtime. +- Sandbox fs path-prefix syntax differs from Read/Edit permission syntax: sandbox uses '/abs', '~/', './proj' (standard); Read/Edit use '//abs', '/proj', '~/home'. Do NOT reuse one parser for the other. +- Filesystem arrays MERGE across scopes (managed+user+project+local) — they are combined, not replaced. But boolean keys (enabled, failIfUnavailable) take the managed value and ignore local. excludedCommands always merges and has no managed-only lockdown, so a developer can always append escape-hatch commands. +- '.' in sandbox fs config resolves to the project root only inside project settings; in user settings (~/.claude/settings.json) it resolves to ~/.claude — placing the denyRead ~/ + allowRead . example in user settings would NOT protect the project. +- Two sandbox modes: auto-allow (sandboxed commands run unprompted) and regular permissions (sandboxed commands still prompt). Auto-allow works independently of permission mode — even outside acceptEdits, sandboxed Bash modifying files runs without prompt. +- autoAllowBashIfSandboxed (default true) means a bare Bash ask rule is SKIPPED for sandboxed commands (sandbox substitutes for the prompt), but content-scoped ask rules like Bash(git push *) STILL force a prompt, deny rules still apply, and rm/rmdir of /, home, or critical paths still prompts. +- Sandbox does NOT cover built-in file tools (Read/Edit/Write — those use the permission system), computer use (runs on real desktop), or environment inheritance (sandboxed Bash inherits parent env incl. credentials unless CLAUDE_CODE_SUBPROCESS_ENV_SCRUB is set). Subagents share the parent sandbox config. +- bypassPermissions skips prompts but still prompts for: explicit ask rules, rm -rf / and rm -rf ~ (circuit breaker), and writes to protected dirs (.git/.claude/.vscode/.idea/.husky/.cargo/.devcontainer/.yarn/.mvn/.config/git). --dangerously-skip-permissions is BLOCKED when running as root/sudo on Linux/macOS unless inside a recognized sandbox. +- seatbelt SBPL generation must NOT use require-not inside a deny clause (aborts sandbox-exec, silent exit 1 — issue #39635). Emit separate (deny file-read* (subpath ...)) then (allow file-read* (subpath ...)) rules. +- Known parser-differential risk: tree-sitter-bash is the primary parser; external builds fall back to shell-quote+regex which is less robust. Fail-closed: unknown AST node -> 'too-complex' -> approval required. +- dangerousPatterns auto-mode stripping is split: python/node/ruby/perl/php/lua/deno/tsx/npx/npm|yarn|pnpm|bun run/bash/sh/ssh are stripped for ALL users; curl/wget/git/gh/kubectl/aws/gcloud/gsutil/sudo/zsh/fish/eval/exec/env/xargs are ant-internal only (USER_TYPE==='ant'). External users get weaker protection for those. +- Adversa AI disclosed deny-rule bypass: deny checks silently stop after 50 subcommands in a single pipeline (v2.1.88). A reimplementation must cap/iterate all subcommands, not just the first 50. +- bypassPermissions killswitch via GrowthBook gate `tengu_disable_bypass_permissions_mode` is one-way (Anthropic can revoke, not grant) and FAIL-OPEN (defaults to not-disable if GrowthBook unreachable). Checked once before first query per session; reset on /login. +- Domain safety preflight is cached 5 min (LRU), so a newly-compromised/-blocklisted domain stays reachable up to 5 min. URL content cached 15 min. +- Preapproved WebFetch domains (130+) are GET-only and explicitly NOT shared with the sandbox network boundary — some (huggingface.co, kaggle.com, nuget.org) allow uploads and would be unsafe as general sandbox egress. +- macOS Seatbelt + Go caveat: a faithful Go replica cannot use sandbox-exec's require-not-in-deny and must generate valid SBPL; also note enableWeakerNetworkIsolation (allow system TLS trust service) and enableWeakerNestedSandbox (bind-mount container /proc) deliberately weaken isolation and should only be opt-in. + +## External interfaces +- settings.json keys: sandbox.{enabled,autoAllowBashIfSandboxed,allowUnsandboxedCommands,failIfUnavailable,excludedCommands}, sandbox.filesystem.{allowRead,allowWrite,denyRead,denyWrite,allowManagedReadPathsOnly}, sandbox.network.{allowedDomains,deniedDomains,httpProxyPort,socksProxyPort,allowUnixSockets,allowAllUnixSockets,allowLocalBinding,allowMachLookup,allowManagedDomainsOnly}, enableWeakerNestedSandbox, enableWeakerNetworkIsolation +- settings.json keys: permissions.{allow,deny,ask,defaultMode,disableBypassPermissionsMode,disableAutoMode,additionalDirectories}, and bare allow/deny/ask/defaultMode shorthands +- Permission rule syntax: Tool / Tool(specifier); Bash(npm run *) / Bash(ls:*) (= Bash(ls *)); WebFetch(domain:example.com); Read(//abs|~/home|/proj|./cwd); mcp__server__tool and mcp__server__*; Agent(Name); Cd(path) +- Env vars: CLAUDE_CODE_SUBPROCESS_ENV_SCRUB (strip secrets from child envs), CLAUDE_CODE_UNDERCOVER=1 (force undercover), USER_TYPE=ant (build-time internal gating) +- CLI flags: --dangerously-skip-permissions (bypass mode), --allowedTools / --disallowedTools, --add-dir +- Bash tool parameter: dangerouslyDisableSandbox (bool) — retry outside sandbox; ignored under allowUnsandboxedCommands:false +- /sandbox slash command (panel: Mode/Overrides/Config/Dependencies); /permissions; /add-dir; /cd (v2.1.169+) +- Remote gates (GrowthBook/Statsig): tengu_disable_bypass_permissions_mode (bypass killswitch), TRANSCRIPT_CLASSIFIER (auto-mode gate) +- External tool: `srt` / `@anthropic-ai/sandbox-runtime` (npm) / sandbox-runtime-rs (Rust crate) — sandbox-exec (macOS) + bubblewrap + socat + seccomp filter (Linux/WSL2) +- WebFetch domain preflight: POST api.anthropic.com/api/web/domain_info (10s timeout, 5-min cache TTL) + +## Open questions +- Exact shape of the dynamically generated SBPL profile emitted for arbitrary allowWrite/denyRead combinations post-fix for issue #39635 (need to read sandbox-runtime source for the canonical generator). +- Whether the `allowUnsandboxedCommands` setting is a boolean (Strict mode toggle) or an array of commands permitted unsandboxed — the gist lists it as an array while docs describe it as bool false=Strict; likely both forms exist (bool false disables the escape hatch, array lists allowed unsandboxed commands). +- The full current DANGEROUS_BASH_PATTERNS + ant-only list as of the latest 2026 build (the v2.1.88 reconstruction may be slightly stale). +- Whether the 50-subcommand deny bypass is fixed in current 2026 builds and what the new cap is. + +## Sources +- [Configure the sandboxed Bash tool — Claude Code Docs](https://code.claude.com/docs/en/sandboxing) — Official, authoritative reference for sandbox modes, fs/network config, allowedDomains/deniedDomains, excludedCommands, dangerouslyDisableSandbox escape hatch, Seatbelt/bubblewrap platform mapping, WSL2 details, security limitations. +- [Configure permissions — Claude Code Docs](https://code.claude.com/docs/en/permissions) — Authoritative permission rule syntax: deny→ask→allow order, Bash wildcard/compound/wrapper rules, read-only command set, Read/Edit path anchors, WebFetch domain rules, MCP/Agent/Cd rules, managed-only keys, settings precedence. +- [Beyond permission prompts: making Claude Code more secure and autonomous with sandboxing — Anthropic Engineering](https://www.anthropic.com/engineering/claude-code-sandboxing) — Anthropic engineering post confirming fs+network isolation built on macOS Seatbelt and Linux bubblewrap, the Unix-socket→host-proxy network architecture, 84% prompt reduction, and the open-sourced sandbox-runtime. +- [Security — Claude Code Docs](https://code.claude.com/docs/en/security) — Official statement of read-only-by-default, built-in read-only Bash command set, write confined to launch dir, command-injection detection, fail-closed matching, network command approval, WebDAV/UNC warnings, macOS Keychain credential storage. +- [Security Analysis of Claude Code v2.1.88 — Source Reconstructed from Source Maps](https://b.zzn.im/blog/claude-code-v2.1.88-security-analysis/) — Source-map reconstruction giving internal file paths and mechanisms: 4-stage Bash validation, bashSecurity 23+ checks, dangerousPatterns ant-only split, subprocessEnv scrub var list, secretScanner, bypassPermissions killswitch gate name tengu_disable_bypass_permissions_mode, WebFetch limits, preapproved domains. +- [Seatbelt sandbox silently blocks all bash commands when denyRead is configured — anthropics/claude-code#39635](https://github.com/anthropics/claude-code/issues/39635) — Primary evidence for the exact SBPL generation bug (require-not in deny aborts sandbox-exec) and that valid generation uses separate (deny file-read* (subpath ...)) + (allow ...) rules. +- [anthropic-experimental/sandbox-runtime](https://github.com/anthropic-experimental/sandbox-runtime) — The open-sourced runtime Claude Code wraps: confirms sandbox-exec (macOS Seatbelt) + bubblewrap (Linux) + proxy-based network filtering; CLI srt / npm @anthropic-ai/sandbox-runtime. +- [Claude Code — Complete settings.json Reference (v2.1.104) — gist](https://gist.github.com/mculp/c082bd1e5a439410158974de90c89db7) — Compiled settings key catalog (~125 keys) including the full sandbox.* and permissions.* schema, enableWeakerNestedSandbox/enableWeakerNetworkIsolation, network sub-keys (allowUnixSockets, allowMachLookup, allowLocalBinding). +- [Critical Claude Code vulnerability: Deny rules silently bypassed after 50 subcommands — Adversa AI](https://adversa.ai/blog/claude-code-security-bypass-deny-rules-disabled/) — Documents the 50-subcommand deny-rule bypass disclosed by Adversa AI Red Team (v2.1.88) — load-bearing for the reimplementation to cap iteration correctly. +- [How /sandbox Works — Claude Code Camp](https://www.claudecodecamp.com/p/claude-code-sandboxing-how-sandbox-works-and-what-it-doesn-t-protect) — Confirms Seatbelt backstop blocking non-loopback traffic at the socket layer for tools that ignore proxy env vars, and the .git/hooks deny that breaks git init under sandbox. +- [Claude Code's Deny Rules Don't Protect You — adamkinney (AI All The Things)](https://adamkinney.com/aatt/claude-code/deny-rules-dont-protect-you-sandbox-does/) — Clarifies that permission deny rules are in-process (not OS-level), why Read deny doesn't stop `python -c 'open(...)'`, and that sandbox.filesystem.denyRead is the OS-enforced layer. diff --git a/docs/claude-code-architecture/research/session-transcript.md b/docs/claude-code-architecture/research/session-transcript.md new file mode 100644 index 0000000..cdae53c --- /dev/null +++ b/docs/claude-code-architecture/research/session-transcript.md @@ -0,0 +1,121 @@ +# Research: session-transcript + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code persists every conversation as an append-only JSONL transcript, one file per session, at $CLAUDE_CONFIG_DIR/projects//.jsonl (default ~/.claude). Each line is one JSON object — a user message, assistant response, system event, hook progress, queued input, or file-history snapshot — and every record carries a uuid plus parentUuid, forming a DAG/linked-list rather than a flat log. Long sessions are split by "compact_boundary" segments that inject a synthetic summary user message and reset the parent chain; cross-file continuation is detected by a sessionId that changes mid-file while parentUuid bridges the gap. Resume (--continue/--resume ), fork (--fork-session or /branch), and rewind (/rewind, double-Esc) all operate by walking this parentUuid chain and (for code rewind) the file-history-snapshot entries. The SDK's SessionStore interface is a dual-write mirror of the same JSONL entries (local disk first, then append()) and cannot be combined with persistSession:false or enableFileCheckpointing. + +## Components +### On-disk layout & project key encoding +**Purpose:** Determines the physical path each session transcript is written to and how the directory name is derived from the working directory. + +**Mechanism:** On session start Claude Code derives an encoded directory name from the absolute working directory by replacing every non-alphanumeric character with '-' and creates (or opens) ~/.claude/projects//.jsonl. Each line is appended as a self-contained JSON object; the file is append-only and never truncated/rewritten. Resume resolves the encoded dir from cwd, then scans for the target session-id (or the most-recently-modified one for --continue). Moving a session with /cd (v2.1.169+) relocates the file into the new directory's project storage. Session-ID lookup is scoped to the current project dir + its git worktrees; a session created elsewhere yields 'No conversation found with session ID: '. + +**Data model:** Path layout: $CLAUDE_CONFIG_DIR/projects//.jsonl + subagent sidecars under subagents/agent-.jsonl and file-history snapshots. Encoded-cwd = absolute cwd with every non-alphanumeric char replaced by '-' (e.g. /Users/me/proj -> -Users-me-proj); confirmed by docs and GitHub issues: non-ASCII chars collapse to '-' too (issue #19972), and even underscores get replaced (issue #39424), so two distinct paths can collide. session-id is a random UUID; the filename stem MUST equal the sessionId field on every line. + +**Config:** CLAUDE_CONFIG_DIR relocates the entire ~/.claude root. cleanupPeriodDays (settings.json, default 30, min 1, 0 rejected) sweeps stale files at startup and also sweeps orphaned subagent worktrees. CLAUDE_CODE_SKIP_PROMPT_HISTORY=1 / --no-session-persistence / persistSession:false suppress writes. There is no disable for cleanup, only delay (set 99999 for ~274 years). + +### Transcript entry schema (common fields) +**Purpose:** Defines the shape of each JSONL line so the chain can be reconstructed for resume/rewind/fork. + +**Mechanism:** Every line carries type, uuid, parentUuid, sessionId, timestamp, plus optional cwd/version/gitBranch. uuid is a per-record identifier; parentUuid points to the PRECEDING record's uuid, building a linked list / directed-acyclic-graph (in practice a tree) — this is what makes resume, rewind, and fork possible. The first record's parentUuid is null. Because it's a DAG not a flat log, the same file can represent branching (forks written into a new file but sharing prefix uuids). On the SDK SessionStore path, entries are emitted as SessionStoreEntry objects = opaque JSON-safe values one-per-line. + +**Data model:** { type, uuid, parentUuid, sessionId, timestamp, cwd, version, gitBranch, plus type-specific fields } + +**Config:** ISO-8601 UTC timestamps. version field carries the Claude Code release that wrote the line. gitBranch captured per-line for the Ctrl+B branch filter. + +### Message types: user & assistant +**Purpose:** The two conversational record kinds; everything else is metadata around them. + +**Mechanism:** Type 'user': message.role='user', content is EITHER a plain string OR an array of content blocks; tool results come back as a block { type:'tool_result', tool_use_id, content:string|text/image-block-array, is_error }. Extra user fields: userType ('external' for human input), todos (current task-list snapshot), permissionMode. Type 'assistant': message is the full API response with model, role, content (array of {type:'text',text} / {type:'tool_use',id,name,input} / {type:'thinking'} blocks), stop_reason, usage, id; extra field requestId. Compaction summary is a user-typed line with isCompactSummary:true, isVisibleInTranscriptOnly:true and content beginning 'This session is being continued from a previous conversation that ran out of context.' + +**Data model:** { type:'user'|'assistant', message:{ role, content, [usage, model, stop_reason, id] }, subtype, user/assistant-only fields } + +**Config:** userType distinguishes human vs system-injected. todos field persists the structured Task list state alongside the message. permissionMode records the session's permission level. + +### Metadata record types: system, progress, queue-operation, file-history-snapshot +**Purpose:** Non-conversational events written into the same JSONL so the transcript is a complete execution log. + +**Mechanism:** Type 'system': carries subtype. Notable subtypes: 'compact_boundary' (the compaction marker — see Compaction component), 'stop_hook_summary' (end-of-turn hook results: hookCount, hookInfos[command+duration], hookErrors, preventedContinuation, stopReason), and (SDK mirror) 'mirror_error'. Type 'progress': hook execution events; data.type e.g. 'hook_progress', data.hookEvent (e.g. 'PostToolUse'), data.hookName (e.g. 'PostToolUse:Bash'), data.command. Type 'queue-operation': operation:'enqueue', content = queued user text while the assistant was mid-turn. Type 'file-history-snapshot': snapshot.trackedFileBackups = map of file path -> backup state, used by /rewind to restore file trees. + +**Data model:** system subtype set includes: compact_boundary, stop_hook_summary, mirror_error (SDK sessionStore failure). progress.data: { type:'hook_progress', hookEvent, hookName, command }. + +**Config:** Hook events keyed by hookEvent (PreToolUse/PostToolUse) and hookName (e.g. PostToolUse:Bash). queue-operation records input-buffered text. + +### Compaction segments (within a single file) +**Purpose:** Keeps long sessions running past the context window by periodically summarizing and resetting the active chain, while preserving the original transcript. + +**Mechanism:** When context approaches the model's limit (~167K observed), Claude Code writes a system record { type:'system', subtype:'compact_boundary', logicalParentUuid:, parentUuid:null, content:'Conversation compacted', compactMetadata:{ trigger:'auto'|'manual', preTokens: } }. The referenced pre-compaction uuids are dropped from the active context. Immediately after, it appends a synthetic user message with isCompactSummary:true, parentUuid pointing at the boundary uuid, content = an LLM-generated summary of everything so far. A single file can contain MANY boundaries (observed 5 in a 21-hour session, compacting ~every 2h). getSessionMessages returns the post-compaction chain only (e.g. 18 msgs from 503 raw entries); raw history must be read via store.load(). + +**Data model:** Boundary: { type:'system', subtype:'compact_boundary', logicalParentUuid, parentUuid:null, content:'Conversation compacted', compactMetadata:{ trigger:'auto'|'manual', preTokens:number } } + +**Config:** CLAUDE_CODE_AUTO_COMPACT_WINDOW + CLAUDE_AUTOCOMPACT_PCT_OVERRIDE tune the trigger. preTokens lets external tools know how close to the limit the session was. + +### Cross-file session continuation (continuation files) +**Purpose:** Allows a single logical conversation to span multiple JSONL files when a session is resumed into a new file. + +**Mechanism:** Sometimes a fresh session-id file is created that logically continues an earlier session. The new file's first lines carry the PARENT session's sessionId (a byte-for-byte duplicate of the parent's trailing compact_boundary + messages), then at some line the sessionId switches to the new file's own id; that switch point's record has parentUuid bridging into the parent's last record. Detection is STRUCTURAL — there is no parentSessionId/resumedFrom field: extract session-id from the filename; if the first record's sessionId differs, the first id is the parent and only records whose sessionId == filename id belong to THIS file (prefix ones are duplicates to skip). A shared slug field (human-readable name, e.g. 'zesty-singing-newell') persists across continuations. + +**Data model:** File d621b0b1.jsonl contains: lines[0..N] with sessionId=d8af951f (parent, skip as duplicates) then lines[N+1..] with sessionId=d621b0b1 (this file's own). shared slug across both files. + +**Config:** slug is the cross-file conversation identifier. Continuation prefix lines are byte-duplicates of parent's tail — dedup by sessionId. + +### SessionStore mirror (SDK external storage) +**Purpose:** Mirrors transcript lines to an external backend (S3/Redis/Postgres) so sessions resume across hosts; defines the formal append/load contract the Go impl should mirror. + +**Mechanism:** SDK options.sessionStore replaces/augments local storage. projectKey = the same stable filesystem-safe cwd encoding; sessionId = session uuid; subpath set for subagent/sidecar transcripts ('subagents/agent-'). append(key,entries[]) called after each local batch; load(key) called once before subprocess spawn on resume. Dual-write: Claude Code subprocess ALWAYS writes local disk first, then forwards the batch to append(). If append rejects/times out, error is logged and a {type:'system',subtype:'mirror_error'} is emitted into the iterator; query continues (local copy is durable); failed batches are NOT retried. load must return entries deep-equal to appended (byte-equal not required). forkSession rewrites all sessionId fields + remaps uuids, then appends under a new key (NOT a byte/copy-object shortcut). Cannot combine sessionStore with persistSession:false (throws) nor with enableFileCheckpointing (throws — file-history blobs are local-disk-only). + +**Data model:** SessionKey={ projectKey:string, sessionId:string, subpath?:string }; subpath e.g. 'subagents/agent-' is opaque key suffix following on-disk layout. + +**Config:** Python SDK always persists; TypeScript-only persistSession:false for ephemeral. mirror_error system msg emitted (not retried) on append failure. SessionStore key includes subpath for sidecars. + +### Subagent transcripts & sidecar files +**Purpose:** Stores per-subagent conversation logs and supporting artifacts under the same project dir. + +**Mechanism:** Each subagent (Task tool) gets its own transcript at subpath 'subagents/agent-' (relative to the session directory). listSubagents requires the store's listSubkeys; getSubagentMessages uses listSubkeys when available else falls back to direct subpath. On resume, listSubkeys is called to restore subagent files; without it only the main transcript is materialized. Other sidecars include file-history snapshots for /rewind and the session summary. Subagent transcripts are excluded from --resume/--continue pickers and claude agents list when spawned under CLAUDE_CODE_CHILD_SESSION (v2.1.172+). + +**Data model:** Sibling/sidecar files alongside .jsonl in the project dir; listSubkeys enumerates them for resume. + +**Config:** Main file = main conversation. subagents/agent-.jsonl for each subagent. Permission decisions, summaries, and snapshots all sidecar'd under the same session dir. + +## Key behaviors +- project dir name = absolute cwd with EVERY non-alphanumeric char replaced by '-' (collapses underscores and non-ASCII, so non-ASCII paths fragment/collide — known issue #39424, #19972). +- --continue resumes most-recently-modified session for the current dir; --resume opens picker, or resumes by exact name (ambiguous name => picker with name prefilled) or by raw session-id. /resume on ambiguity ERRORS instead of opening picker. +- session-id lookup is scoped to current project dir + its git worktrees; --resume from a different cwd reports 'No conversation found with session ID: '. Session picker Ctrl+W widens to all worktrees, Ctrl+A to all projects. +- --fork-session + (--continue|--resume) OR /branch create a copy: prints BOTH new and original session ids, original stays in picker. 'Allow for this session' permissions do NOT carry into the fork. Resuming the same session in two terminals without forking INTERLEAVES into one transcript. +- Transcript file is append-only and never truncated/rewritten, even through /clear and compaction; /clear starts a fresh context but the old transcript remains resumable. +- Default cleanup: 30 days at startup; minimum 1; setting 0 is REJECTED with a validation error; you cannot disable deletion, only delay it (99999 ~= 274 years). cleanup also sweeps orphaned subagent worktrees. +- claude -p / Agent SDK sessions DO NOT appear in the session picker but are resumable by explicit id. Python SDK ALWAYS persists to disk; only TypeScript supports persistSession:false (in-memory only) and that cannot coexist with sessionStore. +- Compaction is detectable structurally: compact_boundary sets parentUuid:null + logicalParentUuid; the following user msg has isCompactSummary:true and content starting 'This session is being continued from a previous conversation that ran out of context.' Re-feeding isCompactSummary lines as real dialogue is a classic bug — skip them. +- Checkpoints (/rewind, double-Esc) revert CODE+conversation/conversation-only/code-only or summarize from/up to a point. Only edits via Claude's Write/Edit/NotebookEdit are tracked — Bash-driven file changes (rm/mv/cp) and external edits are NOT tracked. Original messages are always preserved in transcript even after summarize. +- CLAUDE_CODE_CHILD_SESSION (v2.1.172+) marks nested sessions and auto-excludes them from --resume/--continue/up-arrow history/agents list; CLAUDE_CODE_FORCE_SESSION_PERSISTENCE=1 overrides; honored on v2.1.169 and earlier, removed in v2.1.170-2.1.171. + +## External interfaces +- CLI flags: --continue (alias -c), --resume (alias -r) [], --fork-session, --from-pr , --no-session-persistence, -n +- In-session commands: /resume [], /rename , /branch [], /rewind, /clear, /compact [instructions], /export [filename] +- Env vars: CLAUDE_CONFIG_DIR, CLAUDE_CODE_SKIP_PROMPT_HISTORY, CLAUDE_CODE_CHILD_SESSION (v2.1.172+), CLAUDE_CODE_FORCE_SESSION_PERSISTENCE, CLAUDE_CODE_AUTO_COMPACT_WINDOW, CLAUDE_AUTOCOMPACT_PCT_OVERRIDE +- settings.json keys: cleanupPeriodDays (default 30, min 1, 0 rejected) +- SDK options: resume:, continue:true, fork_session:true, persistSession:false, sessionStore, enableFileCheckpointing +- SDK result message fields: session_id, subtype; SystemMessage carries session id early (TS direct field, Python nested in data) +- SDK functions: listSessions(), getSessionInfo(), getSessionMessages(), renameSession(), tagSession(), deleteSession(), forkSession(), listSubagents(), getSubagentMessages() +- File path scheme: $CLAUDE_CONFIG_DIR/projects//.jsonl (+ subagents/agent-.jsonl) + +## Open questions +- Exact set of all current system subtypes beyond compact_boundary / stop_hook_summary / mirror_error (e.g. tool approval, timing, init) — would require reading the latest claude-code-sdk source. +- Precise algorithm for slug generation (the human-readable name shared across continuation files) and where it is stored on each line. +- Exact JSON schema of file-history-snapshot.trackedFileBackups entries and how /rewind maps a snapshot to a restore point in the DAG. +- Whether sessionId lines that differ from the filename in a continuation file are byte-for-byte identical to the parent's tail or lightly transformed (the writeup claims byte-identical; confirm against source). + +## Sources +- [Manage sessions - Claude Code Docs (code.claude.com)](https://code.claude.com/docs/en/sessions) — Official source for --continue/--resume/--fork-session/--from-pr, /branch, /rewind, /rename, picker shortcuts (Ctrl+W/A/B), /export, and the exact transcript path ~/.claude/projects//.jsonl + cleanupPeriodDays default + CLAUDE_CONFIG_DIR. +- [How Claude Code Session Continuation Works - Massively Parallel Procrastination](https://blog.fsck.com/agent-blog/2026/02/22/claude-code-session-continuation/) — Deepest technical source for the JSONL record schema (user/assistant/system/progress), parentUuid DAG, compact_boundary fields (logicalParentUuid, parentUuid:null, compactMetadata.trigger/preTokens), isCompactSummary, and cross-file continuation detection algorithm + slug field. +- [docs/claude-code-transcript-format.md - kent/consciousness forge](https://evilpiepirate.org/forge/kent/consciousness/src/commit/6a7ec9732b8f6964f07e112b27eda8b4fa6920f7/docs/claude-code-transcript-format.md) — Concise field reference: common fields (uuid/parentUuid/sessionId/timestamp/cwd/version/gitBranch), tool_result content blocks, assistant usage/stop_reason/requestId, system subtypes (stop_hook_summary), progress/queue-operation/file-history-snapshot types, compaction segment model. +- [Persist sessions to external storage (SessionStore) - Claude Code Docs](https://code.claude.com/docs/en/agent-sdk/session-storage) — Authoritative SessionKey/SessionStore/SessionStoreEntry contract, subpath 'subagents/agent-', dual-write-first-to-disk semantics, mirror_error, forkSession uuid-rewrite (not byte copy), persistSession:false incompatibility, getSessionMessages returns post-compaction chain. +- [Work with sessions (Agent SDK) - Claude Code Docs](https://code.claude.com/docs/en/agent-sdk/sessions) — Official encoded-cwd rule (every non-alphanumeric char -> '-', /Users/me/proj -> -Users-me-proj), continue vs resume vs fork semantics, session_id on result/SystemMessage, resume-across-hosts mechanics. +- [Checkpointing - Claude Code Docs](https://code.claude.com/docs/en/checkpointing) — Official /rewind behavior, checkpoint = per user prompt, persists across sessions, 30-day cleanup, only Write/Edit/NotebookEdit tracked (Bash/external not tracked), summarize from/up-to here. +- [Claude Code settings - Claude Code Docs](https://code.claude.com/docs/en/settings) — Exact cleanupPeriodDays semantics: default 30, minimum 1, 0 rejected with validation error, also governs orphaned subagent worktree removal; worktree.baseRef/symlinkDirectories settings. +- [Environment variables - Claude Code Docs](https://code.claude.com/docs/en/env-vars) — Definitive env-var surface: CLAUDE_CODE_SKIP_PROMPT_HISTORY, CLAUDE_CODE_CHILD_SESSION (v2.1.172+), CLAUDE_CODE_FORCE_SESSION_PERSISTENCE, CLAUDE_AUTOCOMPACT_PCT_OVERRIDE, CLAUDE_CODE_DEBUG_LOGS_DIR default ~/.claude/debug/.txt. +- [Don't let Claude Code delete your session logs - Simon Willison](https://simonwillison.net/2025/Oct/22/claude-code-logs/) — Independently confirms ~/.claude/projects/encoded-directory/*.jsonl location, the 30-day deletion default (github issue 4172), and the cleanupPeriodDays:99999 workaround (cannot disable, only delay). +- [[FEATURE/BUG] project path encoding - anthropics/claude-code#19972](https://github.com/anthropics/claude-code/issues/19972) — Confirms the encoding replaces non-alphanumeric (and non-ASCII) chars with '-', causing collisions and readability loss for non-ASCII paths. diff --git a/docs/claude-code-architecture/research/skills.md b/docs/claude-code-architecture/research/skills.md new file mode 100644 index 0000000..c0cbd9c --- /dev/null +++ b/docs/claude-code-architecture/research/skills.md @@ -0,0 +1,92 @@ +# Research: skills + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +The Skills system lets Claude Code (and the Agent SDK) extend itself via directories each containing a SKILL.md with YAML frontmatter (metadata) + markdown body (instructions). It implements THREE levels of progressive disclosure: (1) at startup only each skill's name+description+when_to_use are loaded into the Skill tool's dynamically-generated description (not the system prompt), bounded by a char budget; (2) when the model (or user) invokes a skill the full SKILL.md body is read and injected as a hidden user message (isMeta:true) plus a visible loading-status message; (3) supporting files (scripts/, references/, assets/) are loaded on demand by Claude. Skills are NOT executable code — they are prompt templates that modify conversation + execution context (allowed-tools, model, effort). The model invokes them through a single meta-tool named "Skill" (capital S) whose input is just {command:""}; Claude decides which skill to call via pure LLM reasoning over the description list, with no algorithmic routing. Custom commands (legacy .claude/commands/) have been merged into skills: both produce /name and behave identically. Skills follow the open Agent Skills standard (agentskills.io) extended by Claude Code with invocation-control frontmatter, subagent execution (context:fork), and dynamic shell-context injection. + +## Components +### Skill definition file (SKILL.md) +**Purpose:** The single required entrypoint for each skill; carries metadata frontmatter + markdown body instructions. + +**Mechanism:** Startup scan loads skills/commands from user (~/.claude/skills/), project (.claude/skills/), parent dirs up to repo root, nested .claude/skills/ on demand (monorepo), --add-dir directories' .claude/skills/, plugins, and bundled set. Each SKILL.md parsed: frontmatter (between --- markers) becomes metadata; remainder is promptContent. Directory name (or plugin:dir name for plugins, or filename for legacy commands) becomes the command name typed after /. The frontmatter 'name' is the DISPLAY label only, EXCEPT for a plugin root SKILL.md where name (or plugin dir name fallback) sets the command. Live change detection watches SKILL.md text only (hooks/MCP/agents need /reload-plugins). + +**Data model:** YAML frontmatter block delimited by --- at file start. Fields use kebab-case (name, description, allowed-tools, disable-model-invocation, user-invocable, disallowed-tools, model, effort, context, agent, hooks, paths, shell, argument-hint, arguments, when_to_use). Note the snake_case when_to_use is the YAML-source key, mapped internally to whenToUse. JSON tool schema entry: { type:'skill', name, description, allowedTools:[...], disallowedTools:[...], model, isSkill:true, disableModelInvocation, userInvocable, context, agent, hooks, paths, promptContent }. + +**Config:** Frontmatter keys (all optional unless noted): name (defaults to dir name), description (recommended; default = first markdown paragraph), when_to_use (appended to description with ' - ', counts toward 1,536 cap), disable-model-invocation (bool, default false), user-invocable (bool, default true), allowed-tools (space/comma string or YAML list; supports Bash(git add *) / Skill(name *) syntax), disallowed-tools (same format, clears on next user message), model, effort, context (set to 'fork'), agent (Explore/Plan/general-purpose/custom), hooks, paths (globs limiting auto-activation), argument-hint, arguments (space string or YAML list), shell (bash default | powershell, requires CLAUDE_CODE_USE_POWERSHELL_TOOL=1). + +### Skill tool (model-invoked meta-tool) +**Purpose:** The single meta-tool exposed to the model that dispatches to any individual skill; implements progressive disclosure level 1. + +**Mechanism:** Unlike static tools (Read/Bash), the Skill tool's 'description' field is a dynamic async generator. At each API request it aggregates ALL skills eligible for model invocation, formats each as `"name": description - when_to_use` (when_to_use appended with ' - ' separator), and wraps them in + XML inside the description. Claude picks a skill via tool_use with input {command:'skill-name'}. Validation: errorCode 1 empty, 2 unknown, 3+ can't-load/permission/already-running. The Skill tool is gated by permission rules Skill / Skill(name) / Skill(name *) and the skills filter; when set, 'Skill' is auto-added to allowedTools. + +**Data model:** Tool schema: name='Skill', input_schema={command:string (skill name, no args)}, output_schema={success:boolean, commandName:string}. Prompt generated via async prompt() function. + +**Config:** Filter predicate: type==='prompt' && isSkill===true && !disableModelInvocation && (source!=='builtin' || isModeCommand===true) && (description || when_to_use present). Format: `"": - `. + +### Progressive disclosure + listing budget +**Purpose:** Keep token cost near-zero until a skill is actually needed; bound the always-loaded metadata. + +**Mechanism:** Level 1 = name+description preloaded into Skill tool description every turn (subject to char budget: scales at 1% of context window, least-invoked skills' descriptions dropped first when overflow, run /doctor to see). Level 2 = full SKILL.md body loaded only when Claude/user invokes the skill, injected as a single message persisting for the session. Level 3+ = supporting files (scripts/, references/, assets/) read on demand via Read/Bash by Claude. On auto-compaction: most recent invocation of each skill re-attached keeping first 5,000 tokens each, sharing a 25,000-token combined budget, filled most-recent-first so older skills can be dropped. + +**Data model:** ContextWindow = systemPrompt + [skill listing inside Skill tool desc] + conversation. Budget = 1% of model context window (default) OR SLASH_COMMAND_TOOL_CHAR_BUDGET fixed chars. + +**Config:** budget knobs: skillListingBudgetFraction (fraction of context, default 0.01), SLASH_COMMAND_TOOL_CHAR_BUDGET (fixed char env var), maxSkillDescriptionChars (per-entry cap, default 1536). skillOverrides states: on / name-only / user-invocable-only / off (written to settings.local.json via /skills menu; absent = on; does NOT affect plugin skills). + +### Argument + shell-context injection +**Purpose:** Pass user/model args into the skill and inline live command output before Claude sees the body. + +**Mechanism:** Before the body reaches Claude, substitutions run ONCE over the original file (command output is plain text, not re-scanned). Inline !`cmd` recognized only when ! starts a line or follows whitespace (KEY=!`cmd` is left literal). Multi-line via ```! fenced block. shell frontmatter selects bash (default) or powershell. Arguments: $ARGUMENTS (or appended as 'ARGUMENTS: ' if absent), $ARGUMENTS[N]/$N positional, $name from arguments list. \$ escapes a literal $. On invocation Claude receives base dir path so bundled resources are reachable. + +**Data model:** Skill invocation = metadata message + isMeta:true prompt message + optional command_permissions message ({type:'command_permissions', allowedTools, model}). + +**Config:** Strings honored: $ARGUMENTS, $ARGUMENTS[N] / $N (0-based, shell-style quoting), $name (declared via arguments: list), ${CLAUDE_SESSION_ID}, ${CLAUDE_EFFORT} (low/medium/high/xhigh/max; ultracode reports as xhigh), ${CLAUDE_SKILL_DIR} (skill's own dir, not plugin root). disableSkillShellExecution:true in settings replaces !`cmd` with '[shell command execution disabled by policy]' (bundled/managed unaffected). + +### Discovery precedence + SDK integration +**Purpose:** Resolve which skill wins when names collide across scopes; expose skills programmatically in the Agent SDK. + +**Mechanism:** Precedence enterprise > personal > project; plugin skills namespaced plugin-name:skill-name so they never conflict. SDK: settingSources/setting_sources controls loading (must include 'user'/'project'); skills option on query() is a filter ('all' | [names] | [] disable all). + +**Data model:** Sources: enterprise/managed (all users) > personal (~/.claude) > project (.claude) — same-name overrides in that order. Plugins are namespaced plugin:skill and never collide. Skill takes precedence over same-named command. + +**Config:** skills filter accepts: omitted (all discovered on + Skill tool auto-added), 'all', [name,...] (only those; plugin skills as plugin:skill), or [] (disable all). Unlisted skills' files remain reachable via Read/Bash (filter, not sandbox). + +## Key behaviors +- DEFAULTS: user-invocable=true, disable-model-invocation=false; a skill with neither description nor when_to_use is FILTERED OUT of the Skill tool entirely (won't be model-invoked). +- allowed-tools GRANTS approval-without-prompt for listed tools while skill is active but does NOT restrict the callable set; disallowed-tools REMOVES tools from the pool but CLEARS on the next user message (transient). Both support space/comma strings or YAML lists and Bash(git add *) wildcard syntax. +- Commands were MERGED into skills: .claude/commands/deploy.md and .claude/skills/deploy/SKILL.md both produce /deploy identically; a skill wins over a same-named command. legacy commands keep working and support the same frontmatter. +- In the SDK, SKILL.md allowed-tools is IGNORED — control tool access via the query() allowedTools option; passing skills=[...] adds 'Skill' to allowedTools automatically, but if you pass an explicit tools list you must include 'Skill' yourself. +- Plugin skills use namespace plugin-name:skill-name and CANNOT conflict with other levels; they are NOT affected by skillOverrides (manage via /plugin). Plugin root SKILL.md is the only place frontmatter name sets the command name. +- disable-model-invocation:true removes the skill's description from Claude's context entirely (level-0 disclosure) AND blocks preloading into subagents; user-invocable:false only hides from the / menu, NOT from Skill-tool access. +- context: fork runs the skill body as the subagent TASK prompt (no conversation history); agent: defaults to general-purpose; Explore/Plan agents skip CLAUDE.md+git status so a forked skill using them sees only SKILL.md + agent system prompt. +- Live change detection covers SKILL.md text only; if the skill folder is also a plugin, hooks/MCP/agents/output-styles changes need /reload-plugins. Creating a NEW top-level skills dir that didn't exist at startup requires a restart. +- Skill descriptions must be SINGLE-LINE in the YAML (multi-line breaks discovery — known gotcha). Keep SKILL.md body <500 lines; recommend <5,000 words. +- Security: project skills' allowed-tools take effect only after workspace trust dialog; bundled skills can be globally disabled via disableBundledSkills; malicious skills can exfiltrate data so audit before use. +- A few built-in commands (/init, /review, /security-review) are reachable via the Skill tool, but /compact and /help are NOT. +- ultrathink keyword in skill body requests deeper reasoning when the skill runs. + +## External interfaces +- Skill tool (model-invoked meta-tool): name='Skill', input_schema={command:string}, output_schema={success,commandName} +- CLI flag --add-dir and command /add-dir load .claude/skills from extra dirs (NOT permissions.additionalDirectories) +- Settings.json keys: disableBundledSkills, skillOverrides (object: skill->{on|name-only|user-invocable-only|off}), skillListingBudgetFraction, maxSkillDescriptionChars, disableSkillShellExecution +- Env vars: SLASH_COMMAND_TOOL_CHAR_BUDGET, CLAUDE_CODE_USE_POWERSHELL_TOOL=1, CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1 +- Built-in vars injected into skill body: $ARGUMENTS, $ARGUMENTS[N]/$N, $name, ${CLAUDE_SESSION_ID}, ${CLAUDE_EFFORT}, ${CLAUDE_SKILL_DIR} +- Slash menus: /skill-name, /skills (Space=cycle state, Enter=save), /doctor (budget overflow), /reload-plugins, /plugin (plugin skills) +- Permission rule syntax: Skill, Skill(name), Skill(name *) +- Agent SDK (Python/TS): setting_sources, skills option, allowed_tools; auto-adds 'Skill' to allowed_tools when skills set +- Plugin manifest: .claude-plugin/plugin.json; plugin root SKILL.md single-skill fallback uses name field or install-dir fallback + +## Open questions +- Exact precedence ordering when enterprise/managed vs plugin vs MCP-provided skills collide (docs say enterprise>personal>project and plugins can't conflict, but MCP-server-provided skill precedence relative to these is under-specified). +- Whether disallowed-tools clearing is strictly 'next user message' or 'end of turn' — docs say 'next message you send' which needs confirming against harness behavior. +- Precise behavior of effort override (low/medium/high/xhigh/max) interaction with model-specific level availability and the ultracode=>xhigh mapping. + +## Sources +- [Extend Claude with skills - Claude Code Docs](https://code.claude.com/docs/en/skills) — Primary authoritative spec: full frontmatter field reference, precedence, budget knobs (skillListingBudgetFraction/SLASH_COMMAND_TOOL_CHAR_BUDGET/maxSkillDescriptionChars/1536 cap), skillOverrides states, live change detection, bundled skills, lifecycle/compaction (5k/25k budgets), substitution vars. +- [Agent Skills in the SDK - Claude Code Docs](https://code.claude.com/docs/en/agent-sdk/skills) — Authoritative SDK behavior: skills option ('all'|list|[]), auto-add of Skill to allowedTools, setting_sources gating, allowed-tools IGNORED in SDK, filesystem-only registration (no programmatic API). +- [Plugins reference - Claude Code Docs](https://code.claude.com/docs/en/plugins-reference) — Plugin skill location/format, plugin-root SKILL.md fallback using name field vs install-dir fallback, plugin agent frontmatter fields, hook event list (SubagentStart etc.) +- [Equipping agents for the real world with Agent Skills - Anthropic Engineering](https://www.anthropic.com/engineering/equipping-agents-for-the-real-world-with-agent-skills) — Design rationale: three-level progressive disclosure (metadata -> SKILL.md -> bundled files), name+description preloaded into system prompt at startup, SKILL.md body loaded via Bash/Read on demand, Agent Skills open standard (Dec 18 2025). +- [Claude Agent Skills: A First Principles Deep Dive - Han, Not Solo](https://leehanchung.github.io/blogs/2025/10/26/claude-skills-deep-dive/) — Reverse-engineered internals: Skill tool input_schema {command}/output_schema {success,commandName}, dynamic async prompt() generator, isMeta dual-message injection (visible // + hidden full prompt), when_to_use->whenToUse mapping, filter predicate requiring description|when_to_use, plugin name format plugin:skill and (plugin:name) suffix. +- [Create custom subagents - Claude Code Docs](https://code.claude.com/docs/en/sub-agents) — Subagent skills: preload field, cannot preload skills with disable-model-invocation:true, Explore/Plan skip CLAUDE.md. diff --git a/docs/claude-code-architecture/research/slash-commands-plan.md b/docs/claude-code-architecture/research/slash-commands-plan.md new file mode 100644 index 0000000..2cce42f --- /dev/null +++ b/docs/claude-code-architecture/research/slash-commands-plan.md @@ -0,0 +1,99 @@ +# Research: slash-commands-plan + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code's slash-command system is split into (a) built-in commands hardcoded in the CLI (/help, /clear, /init, /model, /plan, /mcp, /agents, /memory, /compact, /permissions, etc.) and (b) user-defined commands, which since the 2025-2026 "skills merge" are implemented identically whether they live at .claude/commands/*.md or .claude/skills//SKILL.md — both create the same / command and share the same YAML frontmatter (description, allowed-tools, disallowed-tools, model, argument-hint, arguments, disable-model-invocation, user-invocable, etc.). Commands support $ARGUMENTS/$1/$N positional substitution, @file inlining, and !`bash`/```! fenced pre-processing of the prompt before it reaches the model. Plan Mode is a permission mode (mode === 'plan') that is read-only by enforcement: it is a permission context plus a recurring plan-mode system prompt, plus an EnterPlanMode/ExitPlanMode tool pair (the public tool name is literally "ExitPlanMode" — both V1 and V2 constants resolve to that string). The model writes a markdown plan to a file under the plans directory (default ~/.claude/plans/.md, or -agent-.md for subagents; configurable via settings.json plansDirectory), then calls ExitPlanMode (which takes NO plan content parameter — it reads the file from disk) to trigger a 5-option approval UI; on approval the session switches to the chosen permission mode (default/acceptEdits/auto) and the approved plan text is echoed back into the tool_result so the model can act on it. + +## Components +### Custom slash commands / Skills (merged system) +**Purpose:** Reusable, parameterized prompts invoked by typing /name or auto-invoked by the model via the Skill tool. + +**Mechanism:** Discovery scans project, personal, and plugin trees; command name is derived from filename (commands/) or directory name (skills/), namespaced for plugins as plugin-name:command-name. When the user types '/cmd args', the harness parses args (positional, shell-style quoting), reads the .md file, resolves frontmatter, then RENDERs the body in this order: (1) expand string substitutions ($ARGUMENTS, $N, ${CLAUDE_*}); (2) execute !`cmd` / ```! blocks (preprocessing, output inserted as plain text, NOT re-scanned); (3) inline @file references. The rendered markdown is injected as a single user message. allowed-tools are pre-approved for that turn (permission grant, not availability restriction); model/effort override the session for the turn. disable-model-invocation:true removes it from the Skill tool's catalog so the model cannot self-invoke it. Descriptions are loaded into context (budget = 1% of context window, scales with skillListingBudgetFraction/SLASH_COMMAND_TOOL_CHAR_BUDGET) so Claude knows what is available; full body loads only on invocation. + +**Data model:** File: .claude/commands/.md OR .claude/skills//SKILL.md. Body = markdown prompt. Supported substitutions: $ARGUMENTS (whole string; auto-appended as 'ARGUMENTS: ' if absent), $ARGUMENTS[N] / $N (0-based; shell-style quoting, $0 = first), $name (declared arg), ${CLAUDE_SESSION_ID}, ${CLAUDE_EFFORT}, ${CLAUDE_SKILL_DIR}, ${CLAUDE_PLUGIN_ROOT}. Inline shell injection: !`command` (recognized only at line start or after whitespace; KEY=!`cmd` is literal). Multi-line shell: fenced block opened with ```! . Escaping: \$1 yields literal; only single backslash directly before token escapes. @file refs inline file contents. + +**Config:** YAML frontmatter: description (recommended, ~60 chars for /help; combined description+when_to_use truncated at 1,536 chars in listing, configurable via maxSkillDescriptionChars); allowed-tools (string|array); disallowed-tools (clears on next user message); model (sonnet|opus|haiku|inherit, or full values like /model; session resumes next turn); effort (low|medium|high|xhigh|max); argument-hint; arguments; disable-model-invocation (bool, default false — hides description from Claude's context and blocks Skill tool); user-invocable (bool default true; false hides from / menu but Claude can still Skill-invoke); context: fork; agent (Explore|Plan|general-purpose|custom); hooks; paths (glob activation filter); shell (bash|powershell, needs CLAUDE_CODE_USE_POWERSHELL_TOOL=1); name (display name, defaults to dir/file name). Settings: disableBundledSkills, disableSkillShellExecution, skillListingBudgetFraction / SLASH_COMMAND_TOOL_CHAR_BUDGET, skillOverrides, maxSkillDescriptionChars. + +### Built-in commands +**Purpose:** Hardcoded session-control commands parsed at the start of a user message. + +**Mechanism:** These are hardcoded behaviors in the CLI (not markdown prompts). When the first whitespace-delimited token of a user message starts with '/', the harness looks it up in the built-in registry; if matched, it executes native logic (e.g. /clear empties context but keeps project memory; /compact summarizes; /model opens a picker or sets the model and saves it; /plan enters plan mode with an optional immediate task). MCP servers expose prompts as commands using the format /mcp____ (dynamically discovered). Any remaining text after the command is passed as arguments. A few built-in commands (/init, /review, /security-review, /fewer-permission-prompts, /simplify, /code-review, /run, /verify) are exposed to the model via the Skill tool; most (/compact, /clear, etc.) are NOT. + +**Data model:** Recognized only at start of message. Each command has a purpose string shown in /help. Aliases map to canonical (/reset,/new→/clear; /quit→/exit; /continue→/resume; /checkpoint,/undo→/rewind; /allowed-tools→/permissions; /bg→/background; /cost,/stats→/usage; /ios,/android→/mobile; /rc→/remote-control; /tp→/teleport; /proactive→/loop). Version-gated commands report 'Unknown command: /cd' on older versions. Many appear only on certain platforms/plans (/desktop macOS+Windows+subscription; /upgrade Pro/Max; /setup-bedrock needs CLAUDE_CODE_USE_BEDROCK=1; /sandbox supported platforms only). + +**Config:** N/A (hardcoded in CLI) + +### Plan Mode (EnterPlanMode / ExitPlanMode tool pair) +**Purpose:** A read-only permission mode where Claude researches and writes a plan to a file, then requests user approval before making any changes. + +**Mechanism:** EnterPlanMode (no parameters) switches the permission context mode to 'plan', saving the prior mode as prePlanMode. While mode==='plan', a recurring plan-mode system prompt is injected (read-only enforcement + 4-phase workflow: Understanding → Design → Review → Final Plan), and the ONLY file the model may edit is the plan file. The model writes/edits the plan using the standard Edit/Write tools (Edit is NOT disabled; it's permitted specifically for the plan path). The model then calls ExitPlanMode when done. ExitPlanMode.isReadOnly() returns false (it writes to disk); shouldDefer:true; isEnabled gated (disabled when --channels active). validateInput rejects if called outside plan mode (errorCode 1, message 'You are not in plan mode...'). checkPermissions returns behavior:'ask' with message 'Exit plan mode?' (for non-teammates) — this is the approval prompt. On approval, call() reads the plan from disk (getPlan(agentId)), restores prePlanMode (with circuit-breaker fallback to 'default' if auto gate now off), sets hasExitedPlanMode + needsPlanModeExitAttachment flags, and the tool_result echoes the approved plan back to the model. + +**Data model:** Tool name (both constants resolve to the string 'ExitPlanMode'). inputSchema = z.strictObject({ allowedPrompts?: array of {tool: enum['Bash'], prompt: string} }).passthrough(). Note: the INTERNAL inputSchema does NOT include plan content (plan is read from disk by call()). The SDK-facing _sdkInputSchema EXTENDS inputSchema with plan? and planFilePath? injected by normalizeToolInput (CCR web UI can send an edited plan via permissionResult.updatedInput). outputSchema = { plan: string|null, isAgent: bool, filePath?: string, hasTaskTool?: bool, planWasEdited?: bool, awaitingLeaderApproval?: bool, requestId?: string }. + +**Config:** Entry vectors: Shift+Tab cycle (default → acceptEdits → plan, with auto/bypassPermissions/dontAsk gated in), --permission-mode plan startup flag, /plan [description] command, or the model calling EnterPlanMode tool. settings.json: permissions.defaultMode = 'plan'. + +### Plan file location & persistence +**Purpose:** Where the plan markdown lives on disk and how it survives clear/resume/fork. + +**Mechanism:** getPlansDirectory() (memoized): reads settings.plansDirectory; if set, resolves relative to cwd and validates it stays within project root (path-traversal guard, else falls back to ~/.claude/plans); default = join(getClaudeConfigHomeDir(), 'plans'). mkdirSync(recursive) ensures it exists. getPlanSlug(sessionId): lazily generates a random word slug (generateWordSlug), retries up to MAX_SLUG_RETRIES=10 to find a non-colliding filename, caches per session. getPlanFilePath(agentId): main → /.md; subagent → /-agent-.md. getPlan(agentId) reads the file (ENOENT→null). On resume (copyPlanForResume), slug is restored from transcript log.messages[].slug; if the file is missing, recovery attempts file snapshot first, then message-history scan. On fork (copyPlanForFork), a NEW slug is generated and the original content is copied so sessions don't clobber each other. persistFileSnapshotIfRemote() writes incremental plan snapshots to the transcript (only in remote/CCR environments). + +**Data model:** getPlanFilePath(agentId?): main session → /.md; subagent → /-agent-.md. getPlan() returns file contents or null (ENOENT tolerated). Recovery sources scanned backwards in transcript: (1) ExitPlanMode tool_use.input.plan (injected by normalizeToolInput), (2) user message .planContent field (set during clear-context-and-implement flow), (3) attachment of type 'plan_file_reference' with .planContent (created by auto-compact). File snapshots are SystemFileSnapshotMessage { type:'system', subtype:'file_snapshot', snapshotFiles:[{key,path,content}], isMeta:true } written incrementally in remote (CCR) sessions. + +**Config:** settings.json: plansDirectory (relative path resolved against cwd; must stay within project root or falls back to default ~/.claude/plans). Slug generated via generateWordSlug() with up to 10 retries to avoid filename collisions. Per-session cache keyed by sessionId. clearPlanSlug on /clear; copyPlanForResume on resume; copyPlanForFork generates a NEW slug to avoid clobbering. + +### Plan approval flow +**Purpose:** The 5-option UX presented when the model calls ExitPlanMode, and how approval mutates session state. + +**Mechanism:** ExitPlanMode.checkPermissions returns behavior:'ask', message:'Exit plan mode?'. The UI renders the plan file contents and presents 5 options: (1) Approve and start in auto mode → sets mode to 'auto'; (2) Approve and accept edits → mode 'acceptEdits'; (3) Approve and review each edit manually → mode 'default'; (4) Keep planning with feedback → stays in plan mode, feeds user feedback back; (5) Refine with Ultraplan → hands off to a browser-based Claude Code on the web session. On approve, call() restores the chosen mode (from prePlanMode or the chosen option), sets hasExitedPlanMode=true and needsPlanModeExitAttachment=true (which injects a plan-exit attachment into subsequent context). User can press Ctrl+G to edit the plan file in $EDITOR before approving; an edited plan is written back to disk and planWasEdited=true is flagged. For plan_mode_required teammates (isTeammate() && isPlanModeRequired()), no local approval dialog: a plan_approval_request is written to the team-lead mailbox and the teammate awaits an inbox response. + +**Data model:** ExitPlanMode tool_result for non-agent approval: content includes 'User has approved your plan. You can now start coding...' + filePath + optional TeamCreate hint (if Agent tool available) + '## Approved Plan:' / '## Approved Plan (edited by user):' header + plan text. For teammates with plan_mode_required: a plan_approval_request mailbox message { type:'plan_approval_request', from, timestamp, planFilePath, planContent, requestId } is sent to 'team-lead', and tool_result tells the teammate to wait for inbox approval. For subagents (isAgent): tool_result = 'User has approved the plan... respond with ok'. Empty plan: tool_result = 'User has approved exiting plan mode. You can now proceed.' + +**Config:** Ctrl+G opens the plan file in the user's $EDITOR. When showClearContextOnPlanAccept is enabled, each approve option also offers to clear the planning context first. Accepting also auto-names the session from plan content (unless --name / /rename set). Gate-off fallback: if prePlanMode was 'auto' but the auto gate is now off (circuit breaker / settings), restoreMode falls back to 'default' and a notification is shown. + +## Key behaviors +- Slash commands and skills are ONE merged system. .claude/commands/deploy.md and .claude/skills/deploy/SKILL.md both create /deploy and behave identically. If a skill and a command share a name, the SKILL takes precedence. Existing commands keep working; skills add: a supporting-file directory, richer frontmatter (arguments, user-invocable, disallowed-tools, effort, context, agent, hooks, paths, shell). +- A command/skill is ONLY recognized at the START of a user message. Text after the name is arguments. /plan [description] both enters plan mode AND immediately starts on the task; /plan with no arg just enters plan mode. +- String substitution runs ONCE over the original file. !`cmd` output is plain text and is NOT re-scanned for further placeholders, so a command cannot emit a placeholder for a later pass. Inline ! is only recognized at line start or after whitespace; 'KEY=!`cmd`' is left literal. +- $ARGUMENTS: if the placeholder is absent from the body but args were provided, the harness APPENDS 'ARGUMENTS: ' to the end. Indexed args use shell-style quoting: /my-skill "hello world" second → $0='hello world', $1='second'. Escape literal $ with a single backslash directly before the token (\$1.00); doubled backslash (\\$1) leaves both backslashes and still expands $1. +- Skill descriptions load into context so the model knows what is available, but full content loads only on invocation. The listing budget = 1% of the model's context window (configurable via skillListingBudgetFraction or SLASH_COMMAND_TOOL_CHAR_BUDGET); on overflow, least-invoked skills lose descriptions first. Per-entry combined description+when_to_use is capped at 1,536 chars (configurable via maxSkillDescriptionChars). +- Read-only enforcement in plan mode is PROMPT-BASED, not a hard tool toggle. The plan-mode system message explicitly forbids edits/commits/non-readonly tools, but the Edit/Write tools themselves remain available — the harness permits Edit specifically against the plan file path. Other mutating tools (Bash that writes, MCP mutators) are blocked by the plan permission mode (mode==='plan' auto-denies writes like default mode, EXCEPT the plan file). +- ExitPlanMode does NOT take plan content as a parameter — it reads the plan from the file the model wrote. The plan is loaded from disk in call() via getPlan(agentId). If the file is missing/empty, the approval dialog can still be presented and tool_result says 'User has approved exiting plan mode. You can now proceed.' (This is why the dialog can appear with 'no plan' unprompted.) +- planWasEdited is tracked separately: when CCR web UI (or Ctrl+G) sends an edited plan via permissionResult.updatedInput, the edited plan is written back to disk (writeFile) and re-snapshotted (persistFileSnapshotIfRemote), and tool_result labels it 'Approved Plan (edited by user)' so the model knows the user changed something. +- ExitPlanMode has a circuit-breaker fallback: if prePlanMode was 'auto' but the auto-mode gate is now off (circuit breaker or settings disable), restoreMode falls back to 'default' instead of calling setAutoModeActive(true) directly — prevents ExitPlanMode from bypassing the auto-mode gate. +- ExitPlanMode.validateInput rejects with errorCode 1 if called when mode !== 'plan' ('You are not in plan mode. This tool is only for exiting plan mode...'). This happens because the tool is announced in the deferred-tool list regardless of mode so the model can call it after plan approval (fresh delta on compact/clear). +- Teammates bypass the local approval dialog entirely (checkPermissions returns behavior:'allow'; requiresUserInteraction() returns false). If isPlanModeRequired() is true, a plan_approval_request is written to the team-lead mailbox and the teammate blocks on an inbox response; if voluntary plan mode, it exits locally without approval. +- plansDirectory in settings.json is resolved relative to cwd and validated to stay within project root; a path-traversal attempt falls back to ~/.claude/plans. The new (V2) plan mode FORCES using ~/.claude/plans unless plansDirectory is set, which breaks workflows using plan files elsewhere (known issue #12707). +- Plan slug is a random word slug (generateWordSlug) with up to 10 collision retries; main session file is .md, subagent plan is -agent-.md. /clear clears the slug; resume restores it from transcript; fork generates a NEW slug (copyPlanForFork) to avoid clobbering. +- Protected paths (`.git`, `.vscode`, `.claude` except `.claude/worktrees`, shell rc files, etc.) are NEVER auto-approved in plan/default/acceptEdits modes — they prompt. Even in plan mode, editing the plan file is allowed because it lives in the plans directory (not a protected path). +- live change detection: adding/editing/removing a skill under ~/.claude/skills/ or project .claude/skills/ takes effect mid-session without restart; but creating a top-level skills dir that didn't exist at startup needs a restart, and plugin folder changes (hooks/, agents/, .mcp.json, output-styles/) need /reload-plugins. + +## External interfaces +- File paths: .claude/commands/.md, ~/.claude/commands/.md, .claude/skills//SKILL.md, ~/.claude/skills//SKILL.md, /skills//SKILL.md, ~/.claude/plans/.md, ~/.claude/plans/-agent-.md +- CLI flags: --permission-mode plan, --add-dir , -p (non-interactive), --dangerously-skip-permissions, --allow-dangerously-skip-permissions, --name +- Interactive: type / for command menu, Shift+Tab to cycle modes (default→acceptEdits→plan), Ctrl+G to edit the plan file in $EDITOR +- settings.json keys: permissions.defaultMode, permissions.disableAutoMode, permissions.disableBypassPermissionsMode, plansDirectory, showClearContextOnPlanAccept, disableBundledSkills, disableSkillShellExecution, skillOverrides (values: on|name-only|user-invocable-only|off), skillListingBudgetFraction, maxSkillDescriptionChars +- Env vars: SLASH_COMMAND_TOOL_CHAR_BUDGET, CLAUDE_CODE_USE_POWERSHELL_TOOL=1, CLAUDE_CODE_ENABLE_AUTO_MODE, CLAUDE_CODE_NEW_INIT=1, CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1 +- Tool names: Skill (model-invoked), ExitPlanMode (a.k.a. EXIT_PLAN_MODE_V2_TOOL_NAME), EnterPlanMode, Agent (Task), TeamCreate, AskUserQuestion +- Substitution vars in command/skill bodies: $ARGUMENTS, $ARGUMENTS[N], $N, $, ${CLAUDE_SESSION_ID}, ${CLAUDE_EFFORT}, ${CLAUDE_SKILL_DIR}, ${CLAUDE_PLUGIN_ROOT} +- MCP prompts as commands: /mcp____ + +## Open questions +- Exact contents of the EnterPlanMode tool's prompt and the FULL verbatim plan-mode system message (the 4-phase workflow text) — only paraphrased excerpts are publicly documented; the exact strings live in the bundled CLI. +- Whether there is a distinct EnterPlanMode tool definition beyond the permission-mode transition handler, or whether entering plan mode is purely a /plan + Shift+Tab + mode-transition mechanism (sources suggest EnterPlanMode exists as a callable tool that the model can invoke itself, equivalent to Shift+Tab). +- Exact behavior of `allowedPrompts` in the ExitPlanMode inputSchema (the Ant-internal prompt-based permission section is stubbed out in the public leaf-kit repo) — whether/how it pre-approves Bash categories post-approval. +- Whether /plan with a description arg bypasses the EnterPlanMode tool call entirely (UI-level mode switch) or still routes through the tool. + +## Sources +- [Commands reference — Claude Code Docs (code.claude.com/docs/en/commands)](https://code.claude.com/docs/en/commands) — Official authoritative table of ALL built-in slash commands (/help, /clear, /init, /agents, /mcp, /memory, /model, /plan, /compact, etc.) with purposes, aliases, arguments, version gates, and Skill/Workflow markers. +- [Extend Claude with skills — Claude Code Docs (code.claude.com/docs/en/slash-commands)](https://code.claude.com/docs/en/slash-commands) — Official doc confirming commands↔skills merge, file locations, the full frontmatter reference table (name/description/when_to_use/argument-hint/arguments/disable-model-invocation/user-invocable/allowed-tools/disallowed-tools/model/effort/context/agent/hooks/paths/shell), string substitutions ($ARGUMENTS/$N/${CLAUDE_*}), !`cmd` rules, skillOverrides states, skillListingBudgetFraction, disableSkillShellExecution. +- [Command Frontmatter Reference (anthropics/claude-plugins-official)](https://github.com/anthropics/claude-plugins-official/blob/main/plugins/plugin-dev/skills/command-development/references/frontmatter-reference.md) — Official Anthropic plugin repo's full field specs: description (~60 chars), allowed-tools (string|array|Bash(git:*)), model (sonnet/opus/haiku), argument-hint, disable-model-invocation, with validation rules and complete examples. +- [Command Development Skill README (anthropics/claude-code)](https://github.com/anthropics/claude-code/blob/main/plugins/plugin-dev/skills/command-development/README.md) — Official Anthropic command-development skill: file format, locations (project/personal/plugin), $ARGUMENTS/$1/$2 positional args, @file refs, !`bash` execution, ${CLAUDE_PLUGIN_ROOT}. +- [ExitPlanModeV2Tool.ts (leaf-kit/claude-analysis)](https://github.com/leaf-kit/claude-analysis/blob/main/src/tools/ExitPlanModeTool/ExitPlanModeV2Tool.ts) — Reverse-engineered source: exact tool name 'ExitPlanMode', input/output zod schemas, validateInput/checkPermissions/call logic, plan-read-from-disk, teammate mailbox approval, circuit-breaker fallback, tool_result formats. +- [ExitPlanModeTool/prompt.ts (leaf-kit/claude-analysis)](https://github.com/leaf-kit/claude-analysis/blob/main/src/tools/ExitPlanModeTool/prompt.ts) — Verbatim EXIT_PLAN_MODE_V2_TOOL_PROMPT: 'does NOT take plan content as a parameter', 'read from file', 'Only use when task requires planning implementation steps... not for research', AskUserQuestion separation. +- [utils/plans.ts (leaf-kit/claude-analysis)](https://github.com/leaf-kit/claude-analysis/blob/main/src/utils/plans.ts) — Exact plan file path logic: getPlansDirectory (plansDirectory setting, cwd-relative, path-traversal guard, default ~/.claude/plans), getPlanSlug (generateWordSlug, MAX_SLUG_RETRIES=10), getPlanFilePath (main .md, subagent -agent-.md), copyPlanForResume/copyPlanForFork, recoverPlanFromMessages (3 recovery sources). +- [Choose a permission mode — Claude Code Docs](https://code.claude.com/docs/en/permission-modes) — Official: plan mode is read-only, Shift+Tab cycle, /plan prefix, --permission-mode plan, the 5 approval options, Ctrl+G plan editing, defaultMode:'plan' setting, protected paths list. +- [What Actually Is Claude Code's Plan Mode? (Armin Ronacher / lucumr.pocoo.org)](https://lucumr.pocoo.org/2025/12/17/what-is-plan-mode/) — Deep independent analysis confirming read-only enforcement is prompt-based (not tool removal), plan file edited via Edit tool, EnterPlanMode/ExitPlanMode tool pair, and paraphrased 4-phase plan-mode system prompt. +- [[Feature Request] Plan mode should support plan files outside ~/.claude/plans (anthropics/claude-code#12707)](https://github.com/anthropics/claude-code/issues/12707) — Confirms the new/V2 plan mode FORCES using ~/.claude/plans unless plansDirectory is configured, and references env vars for the V2 plan mode. diff --git a/docs/claude-code-architecture/research/streaming-protocol.md b/docs/claude-code-architecture/research/streaming-protocol.md new file mode 100644 index 0000000..6f984d4 --- /dev/null +++ b/docs/claude-code-architecture/research/streaming-protocol.md @@ -0,0 +1,104 @@ +# Research: streaming-protocol + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code's streaming protocol is layered across five distinct surfaces that a Go reimplementation must reproduce. (1) The Anthropic Messages API emits server-sent events (SSE) over an HTTP stream: a strict sequence of message_start -> [per content block: content_block_start -> content_block_delta(s) -> content_block_stop] -> message_delta (cumulative usage + stop_reason) -> message_stop, with interspersed ping/error events. (2) tool_use inputs stream as partial-JSON fragments via input_json_delta deltas whose partial_json strings must be concatenated and parsed once at content_block_stop; the content_block_start.input placeholder is an empty object {} by deliberate design, and the deltas are strings (a type mismatch re-implementors must handle). Fine-grained eager_input_streaming can deliver invalid/truncated JSON. (3) The Claude Agent SDK (Python/TypeScript) wraps the bundled CLI as a subprocess and communicates via newline-delimited JSON (NDJSON) over stdin/stdout; raw API SSE events are wrapped into a StreamEvent message (type "stream_event" / SDKPartialAssistantMessage) only when include_partial_messages/includePartialMessages is enabled, interleaved with semantic AssistantMessage/UserMessage/SystemMessage/ResultMessage objects. (4) Headless `claude -p --output-format stream-json --verbose --include-partial-messages` emits NDJSON on stdout where each line is one event; event types include system (with subtypes init/api_retry/compact_boundary/plugin_install), stream_event, assistant, user, result (terminal). (5) The SDK<->CLI control protocol is a bidirectional NDJSON stream over stdin/stdout with control_request/control_response messages for permission (can_use_tool), hooks, and in-process SDK MCP tool calls, multiplexed by request_id. The terminal sentinel of a stream-json run is a ResultMessage (type "result"), which is the single load-bearing contract for consumers. + +## Components +### Anthropic Messages API SSE streaming +**Purpose:** The lowest transport layer: the raw server-sent events streamed back from POST /v1/messages with stream:true. Everything Claude Code / Agent SDK streams up to the user is derived from accumulating these events. + +**Mechanism:** Sequence is STRICTLY ordered: (1) ONE message_start carrying the Message skeleton with empty content[]; (2) for each content block: ONE content_block_start (carries index + the content_block stub), zero or more content_block_delta events (each carries index + a typed delta), ONE content_block_stop (carries index only); (3) one or more message_delta events (top-level Message mutations — primarily stop_reason and cumulative usage); (4) ONE terminal message_stop. ping events may appear anywhere. Each content block's index maps to its final position in Message.content[]. Exception: server-side fallback emits a content_block_start/content_block_stop pair with NO deltas between. SSE wire format is `event: \ndata: \n\n`. Unknown event types may be added — clients must handle gracefully. + +**Data model:** Each SSE frame: two lines — `event: ` and `data: {"type":"", ...}` (the data.type MATCHES the SSE event name), blank line terminates. message_start.message has full Message skeleton {id, type:"message", role:"assistant", content:[], model, stop_reason:null, stop_sequence:null, usage:{input_tokens, output_tokens}}. content_block_start has {type:"content_block_start", index:int, content_block:{type:"text"|"tool_use"|"thinking"|"server_tool_use"|"web_search_tool_result", ...}}. For text: content_block={type:"text", text:""}. For tool_use: content_block={type:"tool_use", id:"toolu_...", name:, input:{}} (input is EMPTY OBJECT placeholder). For thinking: {type:"thinking", thinking:"", signature:""}. Deltas: text_delta {text}, input_json_delta {partial_json: }, thinking_delta {thinking}, signature_delta {signature}. message_delta: {delta:{stop_reason, stop_sequence}, usage:{output_tokens (cumulative)}}. message_stop: {type:"message_stop"} (empty data). ping: {type:"ping"}. error: {type:"error", error:{type:"overloaded_error", message:...}}. + +**Config:** HTTP request: POST /v1/messages with body {"stream": true, ...}. Response Content-Type: text/event-stream. Headers: anthropic-version (e.g. 2023-06-01), x-api-key or Authorization: Bearer. + +### Fine-grained tool_use input streaming (partial JSON) +**Purpose:** How the `input` field of a tool_use block is delivered incrementally so a client can render/act on partial args before the block closes. + +**Mechanism:** The accumulation contract (verbatim from docs): (1) On content_block_start with type=="tool_use", initialize `input_json = ""`; (2) for each content_block_delta with delta.type=="input_json_delta", append `input_json += event.delta.partial_json`; (3) on content_block_stop, parse `json.loads(input_json)`. The deliberate type mismatch — content_block_start.input is an empty OBJECT {}, but the deltas carry STRING partial_json — is by design: the object marks the slot, the deltas build the real value. A block can emit MANY deltas (sometimes dozens). Without eager_input_streaming the server buffers+validates whole values; current models emit at most one complete key+value per delta chunk, so there are visible pauses. With eager streaming, chunks arrive sooner, are longer, may straddle tokens, and the final string is NOT guaranteed valid JSON (max_tokens can truncate mid-value — must handle that and e.g. wrap in {"INVALID_JSON": ""} when feeding back as a tool error). + +**Data model:** Per-block accumulator state keyed by content-block index: map[int]string of concatenated partial_json. Final parsed value: tool_use.input is always an OBJECT (map), built by json.loads the accumulated string at content_block_stop. + +### Agent SDK message model + StreamEvent +**Purpose:** The Python/TypeScript Agent SDK's typed message classes that wrap the raw SSE events and the conversation lifecycle. + +**Mechanism:** The SDK wraps the bundled `claude` CLI as a subprocess and communicates via NDJSON over stdin/stdout (NOT a direct HTTP API call). With partial messages ENABLED, the SDK additionally yields a StreamEvent for every raw API SSE event, interleaved with the semantic messages. The flow: StreamEvent(message_start) -> StreamEvent(content_block_start/delta/stop) for each block -> StreamEvent(message_delta) -> StreamEvent(message_stop) -> AssistantMessage (the ACCUMULATED complete message) -> [tool executes] -> next turn's StreamEvents -> ... -> ResultMessage. To extract streaming text: check isinstance StreamEvent -> event.type=="content_block_delta" -> delta.type=="text_delta" -> delta.text. To track tool calls: content_block_start with content_block.type=="tool_use" gives .name; accumulate input_json_delta.partial_json; content_block_stop finalizes. To consume from the CLI directly: `claude -p ... --output-format stream-json --verbose --include-partial-messages` then each stdout line is a JSON object; the streaming lines have type=="stream_event" and an `event` field mirroring the raw SSE event. + +**Data model:** @dataclass StreamEvent: { uuid: str; session_id: str; event: dict[str,Any] (the RAW Anthropic SSE event); parent_tool_use_id: str|None }. AssistantMessage: { content: list[ContentBlock]; model: str; parent_tool_use_id; error: AssistantMessageError|None }. SystemMessage: { subtype: str; data: dict }. ResultMessage: { subtype, duration_ms, duration_api_ms, is_error, num_turns, session_id, stop_reason, total_cost_usd, usage:dict, result:str, structured_output }. ContentBlock variants: TextBlock{text}, ToolUseBlock{id,name,input}, ThinkingBlock{thinking,signature}. + +**Config:** ClaudeAgentOptions(include_partial_messages=True) (Python) / includePartialMessages:true (TypeScript). Required to receive any token-level data. Default False. + +### Headless CLI --output-format stream-json +**Purpose:** The CLI surface for headless / CI / scripted streaming consumption of an agent run. + +**Mechanism:** `--output-format stream-json` makes `claude -p` emit NDJSON (one JSON object per line) on stdout as events occur, instead of a single batch payload. The FIRST event in the stream is system/init (unless CLAUDE_CODE_SYNC_PLUGIN_INSTALL is set, in which case system/plugin_install events precede it). Token-level deltas only appear if BOTH --verbose AND --include-partial-messages are passed; otherwise only complete assistant/user/result/system messages are emitted. When an API request fails with a retryable error, a system/api_retry event is emitted BEFORE the retry (use to surface retry progress / custom backoff). The LAST event is always a result message (type:"result") with the full cost/usage/turns metadata. Consumers MUST buffer bytes and split on newline because events can straddle chunk boundaries. The result event is the terminal sentinel — a known bug (issue #1920) is that the CLI sometimes fails to emit it, causing consumers to hang. + +**Data model:** Every line: JSON object with `type` field. assistant: {type:"assistant", message:{content:[ContentBlock], model, ...}, uuid, session_id, parent_tool_use_id}. user: {type:"user", message:{role:"user", content:...}, uuid, session_id, parent_tool_use_id, tool_use_result}. stream_event: {type:"stream_event", event:{...raw SSE...}, uuid, session_id, parent_tool_use_id}. system/init: {type:"system", subtype:"init", session_id, model, tools, mcpServers, plugins, plugin_errors}. system/api_retry: {type:"system", subtype:"api_retry", attempt:int(>=1), max_retries:int, retry_delay_ms:int, error_status:int|null, error:, uuid, session_id}. system/compact_boundary (Python: SystemMessage subtype "compact_boundary"; TS: SDKCompactBoundaryMessage). result: {type:"result", subtype:"result"|"success"|"error", result:str, session_id, is_error:bool, duration_ms, duration_api_ms, num_turns, total_cost_usd, usage:{...}, stop_reason, structured_output}. + +### stdin/stdout NDJSON control protocol (SDK <-> CLI) +**Purpose:** The bidirectional wire protocol between an SDK host process and the Claude Code CLI subprocess — used for permission callbacks, hooks, in-process SDK MCP tools, and streaming multi-turn input. + +**Mechanism:** The SDK spawns the CLI with BOTH --input-format stream-json AND --output-format stream-json, so stdin AND stdout are NDJSON. stdin carries: (a) user turns — `{"type":"user","message":{"role":"user","content":...}}` one per line, generator-yielded for multi-turn; (b) control_response messages replying to CLI requests; (c) on connect (client mode) an initialize control_request registering hooks (PreToolUse/PostToolUse/UserPromptSubmit/Stop/SubagentStop/PreCompact with matcher globs) and sdk_mcp_servers. stdout carries assistant/user/result/stream_event/system messages PLUS control_request messages from the CLI: can_use_tool (permission), hook_callback, and mcp_message (invoke an in-process @tool / SDK MCP server tool). The CLI issues a JSON-RPC handshake against each SDK MCP server (initialize -> capabilities -> tools/list) before calling tools. SDK responses to mcp_message MUST wrap the JSON-RPC result in an `mcp_response` field (undocumented but required — missing it causes a 60s timeout). request_id multiplexes concurrent control requests. Writes must be newline-terminated + flushed; each JSON object on exactly one line. Close stdin for graceful shutdown; SIGTERM if it doesn't exit. + +**Data model:** control_request: {type:"control_request" (or "sdk_control_request"), request:{subtype, request_id, ...}}. initialize: {request:{subtype:"initialize", request_id, hooks:{:[{matcher, hook_callback_ids:[...]}]}, sdk_mcp_servers:["name",...]}}. permission: {request:{subtype:"permission", request_id, tool_name, tool_input:dict}}. mcp_message: {request:{subtype:"mcp_message", request_id, server_name, message:{jsonrpc:"2.0", id, method, params}}}. control_response success: {type:"control_response", response:{subtype:"success", request_id, response:{...}}}. perm allow: response:{behavior:"allow"}. perm deny: response:{behavior:"deny", message}. mcp result: response:{mcp_response:{jsonrpc, id, result:{content:[{type:"text",text}], isError:bool}}}. control_response error: {response:{subtype:"error", request_id, error}}. SDK MCP handshake: initialize method -> {protocolVersion:"2025-11-25", capabilities:{tools:{listChanged:false}}, serverInfo:{name,version}}, then notifications/initialized, then tools/list. + +**Config:** CLI flags for SDK subprocess: `--output-format stream-json --input-format stream-json --verbose` (required trio). Plus optionally: --permission-prompt-tool stdio (route perms via control protocol, NOT interactive), --setting-sources user,project,local, --system-prompt / --append-system-prompt, --permission-mode acceptEdits|dontAsk|..., --model, --no-session-persistence. Env: ANTHROPIC_API_KEY, CLAUDE_CODE_OAUTH_TOKEN, CLAUDE_CONFIG_DIR (default ~/.claude), CLAUDE_CODE_ENTRYPOINT (e.g. sdk-go), CLAUDE_AGENT_SDK_VERSION. + +## Key behaviors +- stream-json output requires THREE flags together for token streaming: --output-format stream-json --verbose --include-partial-messages. Omit --include-partial-messages and you get only complete assistant/user/result/system lines (no per-token deltas). Omit --verbose and stream-json does not work. +- DELIBERATE type mismatch in tool_use streaming: content_block_start.input is an empty OBJECT {}, but each delta carries a STRING (partial_json). Do not assign deltas to .input; concatenate strings and parse once at content_block_stop. The empty object is just a slot marker. +- The `index` field on content_block_* events is the authoritative key into the final Message.content[] array. Multiple blocks (text, then tool_use, then text again) are distinguished by index, and the order of start/stop events preserves final array order. +- Usage in message_delta is CUMULATIVE (output_tokens grows), not incremental. message_start.usage has input_tokens + output_tokens:1 (placeholder). Final usage is read from the LAST message_delta before message_stop. +- The CLI emits a `result` (type:"result") message as the terminal event of a stream-json run — that is the sentinel a consumer waits on. Known bug (issue #1920): it is sometimes missing, hanging naive consumers. +- system/init is the first event (model, tools, mcpServers, plugins, plugin_errors). With CLAUDE_CODE_SYNC_PLUGIN_INSTALL set, system/plugin_install events (status: started/installed/failed/completed) precede system/init. Use plugins/plugin_errors fields to fail CI on a plugin that failed to load. +- system/api_retry carries: attempt (starts at 1), max_retries, retry_delay_ms, error_status (int OR null for connection errors with no HTTP response), and an error category enum: authentication_failed, oauth_org_not_allowed, billing_error, rate_limit, overloaded, invalid_request, model_not_found, server_error, max_output_tokens, unknown. +- Extended thinking: thinking_delta events build the .thinking text; a single signature_delta arrives JUST BEFORE content_block_stop carrying the signature used to verify block integrity. With thinking.display:"omitted", NO thinking_delta is sent — the block opens, gets one signature_delta, and closes. display:"summarized" streams a condensed summary. +- Fine-grained streaming (eager_input_streaming:true on a tool) can yield INVALID or partial JSON (especially if stop_reason is max_tokens, truncating mid-parameter). A robust consumer must tolerate parse failure and, when echoing the bad input back as a tool_result error, wrap it as {"INVALID_JSON":""}. +- Error recovery differs by model family: Claude 4.5 and earlier — re-feed the partial response as an assistant message and resume. Claude 4.6 and later — instead send a USER message instructing the model to continue from where it left off (e.g. `Your previous response was interrupted and ended with X. Continue.`). Tool-use and thinking blocks CANNOT be partially recovered; resume from the most recent text block. +- server_tool_use / web_search_tool_result blocks are emitted inline in the SAME stream (index increments across them) for built-in tools like web_search_20250305. The web_search_tool_result block arrives as a content_block_start already containing the full content array (no deltas), then a content_block_stop. +- Piped stdin to `claude -p` is capped at 10MB (since v2.1.128) — over the cap the process exits non-zero. Background Bash tasks spawned during a -p run are terminated ~5s after the final result and stdin close (behavior since v2.1.163; before that a non-exiting bg process held the run open forever). +- Agent SDK message ordering with partials ON: StreamEvents for one assistant turn -> AssistantMessage (complete) -> [tool runs] -> next turn's StreamEvents -> ... -> ResultMessage. Without partials, the StreamEvents are suppressed but AssistantMessage/UserMessage/SystemMessage/ResultMessage still arrive. +- SDK subprocess control protocol: every control_response must echo the request_id; SDK MCP tool responses must wrap JSON-RPC result in `mcp_response` (undocumented, omission = 60s timeout). Each JSON message on stdin must be one line, newline-terminated, flushed. Close stdin to shut down gracefully. +- Compact boundary: when history is auto-compacted, Python emits a SystemMessage with subtype "compact_boundary"; TypeScript emits SDKCompactBoundaryMessage. A Go reimplementation must produce this boundary to keep SDK consumers in sync. + +## External interfaces +- CLI flag: --output-format stream-json|json|text +- CLI flag: --input-format stream-json (enables stdin NDJSON control protocol) +- CLI flag: --include-partial-messages (enables token-level stream_event deltas) +- CLI flag: --verbose (REQUIRED with stream-json) +- CLI flag: --permission-prompt-tool stdio (route permissions over control protocol) +- CLI flag: --bare (skip hooks/skills/plugins/MCP/CLAUDE.md auto-load; recommended for SDK/CI; future default for -p) +- CLI flag: --json-schema + --output-format json (structured output -> result.structured_output) +- CLI flag: --setting-sources user,project,local +- CLI flag: --system-prompt / --append-system-prompt / --append-system-prompt-file +- CLI flag: --permission-mode acceptEdits|dontAsk|default|plan|bypassPermissions +- HTTP: POST https://api.anthropic.com/v1/messages body {"stream": true} -> Content-Type: text/event-stream +- Env: ANTHROPIC_API_KEY, CLAUDE_CODE_OAUTH_TOKEN, CLAUDE_CONFIG_DIR (default ~/.claude), CLAUDE_CODE_ENTRYPOINT, CLAUDE_AGENT_SDK_VERSION, CLAUDE_CODE_SYNC_PLUGIN_INSTALL +- Python SDK: query(prompt, options) async generator; ClaudeAgentOptions(include_partial_messages=True); ClaudeSDKClient.connect() +- Python types: from claude_agent_sdk.types import StreamEvent, UserMessage, AssistantMessage, SystemMessage, ResultMessage +- TypeScript SDK: @anthropic-ai/claude-agent-sdk; SDKPartialAssistantMessage {type:'stream_event'}; SDKMessage union; SDKUserMessage generator + +## Open questions +- Exact TS field names for the result envelope emitted by `--output-format json` (result, session_id, is_error, total_cost_usd, usage, num_turns, duration_ms, duration_api_ms, stop_reason, structured_output) — confirm against current TS SDKMessage definitions in @anthropic-ai/claude-agent-sdk rather than the Python dataclass shapes. +- Whether `claude -p --output-format stream-json` still REQUIRES --verbose in the latest 2.x (docs and the Go community doc both say yes, but exact current version gate unverified). +- Exact set and ordering of system/init fields emitted in stream-json (model, cwd, tools, mcpServers, plugins, plugin_errors, permissionMode, version) for a faithful Go replica — the docs only enumerate plugins/plugin_errors explicitly. +- The precise CLI exit codes for the 10MB stdin cap error and for the missing-result-event hang (not documented; only behavior described). + +## Sources +- [Stream responses in real-time — Claude Code Docs (Agent SDK streaming-output)](https://code.claude.com/docs/en/agent-sdk/streaming-output) — Authoritative: defines StreamEvent dataclass, include_partial_messages flag, message flow ordering, text_delta + input_json_delta accumulation examples. +- [Streaming messages — Claude API Docs (platform.claude.com)](https://platform.claude.com/docs/en/build-with-claude/streaming) — Authoritative source for the raw SSE event flow: message_start, content_block_start/delta/stop, message_delta (cumulative usage), message_stop, ping, error; full text/tool/thinking/web_search wire examples; Claude 4.5 vs 4.6 error recovery. +- [Run Claude Code programmatically — Claude Code Docs (headless)](https://code.claude.com/docs/en/headless) — Authoritative: --output-format text|json|stream-json, the --verbose + --include-partial-messages requirement, system/init, system/api_retry field table, system/plugin_install, the jq text-delta one-liner, --bare mode, 10MB stdin cap (v2.1.128), background-task exit (v2.1.163). +- [Fine-grained tool streaming — Claude API Docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/fine-grained-tool-streaming) — Authoritative: eager_input_streaming:true per-tool flag, the input:{} placeholder vs partial_json string contract, invalid-JSON handling and INVALID_JSON wrapper, max_tokens truncation behavior. +- [Message Types — Claude Agent SDK for Python](https://anthropics-claude-agent-sdk-python-82.mintlify.app/api/types/messages) — Authoritative dataclass shapes for UserMessage, AssistantMessage (error enum), SystemMessage (subtype), ResultMessage (full field list: subtype, duration_ms, duration_api_ms, is_error, num_turns, session_id, stop_reason, total_cost_usd, usage, result, structured_output), StreamEvent (uuid/session_id/event/parent_tool_use_id), Task* messages. +- [Streaming Input — Claude Code Docs (streaming-vs-single-mode)](https://code.claude.com/docs/en/agent-sdk/streaming-vs-single-mode) — Authoritative: SDKUserMessage generator shape for stdin stream-json, image content blocks, continue/resume, single-vs-streaming input mode limits. +- [Inside the Claude Agent SDK: From stdin/stdout Communication to Production](https://buildwithaws.substack.com/p/inside-the-claude-agent-sdk-from) — Detailed (SDK v0.1.19) reverse-engineering of the subprocess NDJSON control protocol: can_use_tool / hook_callback control_request/response shapes, request_id multiplexing, the CLI invocation flags, and the initialize handshake. +- [claude-agent-sdk-go/docs/cli-protocol.md (GitHub)](https://github.com/Roasbeef/claude-agent-sdk-go/blob/main/docs/cli-protocol.md) — Most precise wire-format reference for a Go reimplementation: exact control_request/control_response JSON for initialize, permission, mcp_message, the required mcp_response wrapper (undocumented), MCP handshake, error envelope, env vars, and shutdown semantics. +- [Claude Code stream-json: the output format that changes everything — Background Claude](https://backgroundclaude.com/blog/stream-json) — Concrete confirmation of the three-flag rule, the system/api_retry shape, and a correct NDJSON line-buffering Node consumer (events straddle chunk boundaries). +- [Missing Final Result Event in Streaming JSON Output — anthropics/claude-code #1920](https://github.com/anthropics/claude-code/issues/1920) — Documents the known gotcha that the terminal {"type":"result",...} event is sometimes missing in stream-json, which any consumer must tolerate. +- [[BUG] stdout under --output-format stream-json stops — anthropics/claude-code #17248](https://github.com/anthropics/claude-code/issues/17248) — Evidence of stream-json stdout stalls affecting automated consumers; relevant for a replica's reliability guarantees. +- [Handling invalid JSON in Anthropic's fine-grained tool streaming](https://andyjakubowski.com/engineering/handling-invalid-json-in-anthropic-fine-grained-tool-streaming) — Reinforces that Anthropic (unlike OpenAI Structured Outputs) does NOT guarantee valid partial/final JSON under eager streaming, with concrete recovery patterns. diff --git a/docs/claude-code-architecture/research/subagents-task.md b/docs/claude-code-architecture/research/subagents-task.md new file mode 100644 index 0000000..87ae6e1 --- /dev/null +++ b/docs/claude-code-architecture/research/subagents-task.md @@ -0,0 +1,141 @@ +# Research: subagents-task + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code's subagent system is orchestrated by a single model-facing meta-tool: the "Agent" tool (legacy alias "Task", renamed in v2.1.63). When the parent model calls Agent with {subagent_type, prompt, description, model, run_in_background}, it spawns a child agent that runs its own full conversation loop in an isolated context window with its own system prompt, tool pool, permission boundary, and abort controller. The child does its work and returns ONLY its final message verbatim as the tool result — the parent never sees intermediate tool calls or reasoning. Subagents are defined as Markdown files with YAML frontmatter at .claude/agents/ (project), ~/.claude/agents/ (user), via --agents CLI JSON, in plugins, or via managed settings, with a fixed 5-level precedence. Each subagent's "description" field drives automatic delegation, but users can force invocation via natural-language naming, @-mention, or --agent (run whole session as that agent). Parallel spawning happens naturally when the model emits multiple Agent tool calls in one turn; background subagents (run_in_background:true or background:true frontmatter or Ctrl+B) run concurrently and auto-deny any prompt. As of v2.1.172, subagents can spawn nested subagents (foreground at any depth, background capped at depth 5). Communication beyond prompt/result uses the "SendMessage" tool (only with CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1), which routes by recipient name/ID/UDS-socket/bridge-session and auto-resumes dead agents from their disk transcript. + +## Components +### AgentTool (a.k.a. Task tool) +**Purpose:** The model-facing meta-tool that spawns a child subagent. The ONLY tool the parent model calls to delegate work; everything below flows from it. + +**Mechanism:** Registered via buildTool() factory under name "Agent" with legacy alias "Task". call() runs a 10-step decision tree BEFORE runAgent(): (1) teammate? (team_name+name set) -> spawnTeammate(); (2) resolve effective agent type: subagent_type provided -> use it; omitted+fork enabled -> undefined (fork path); omitted+fork disabled -> "general-purpose" default; (3) fork guard check; (4) resolve definition from activeAgents, filtering by permission deny rules + allowedAgentTypes, throw if not found/denied; (5) wait up to 30s for required MCP servers; (6) resolve isolation (param overrides def): remote->teleportToRemote(), worktree->createAgentWorktree(), null->normal; (7) sync-vs-async decision: shouldRunAsync = run_in_background || selectedAgent.background || isCoordinator || forceAsync || isProactiveActive; (8) assemble worker tool pool; (9) build system prompt + prompt messages; (10) execute (async -> registerAsyncAgent + void lifecycle; sync -> iterate runAgent inline). The dynamic prompt from getPrompt() is context-sensitive (lists available agents as an attachment message to avoid busting prompt cache, NOT inline in tool description). + +**Data model:** TaskInput (zod, feature-gated): +Base (always present): description (string, required, 3-5 word summary), prompt (string, required, full task instructions), subagent_type (string, optional), model (enum sonnet|opus|haiku, optional), run_in_background (boolean, optional). +Full schema additions (when swarm/isolation features active): name (string, makes agent addressable via SendMessage({to:name})), team_name (string), mode (PermissionMode), isolation (enum worktree|remote), cwd (string, absolute path override). +Feature-gated omissions: when fork active OR CLAUDE_CODE_DISABLE_BACKGROUND_TASKS set, run_in_background is stripped; when KAIROS flag off, cwd is omitted. The model never sees fields it cannot use. + +**Config:** type: Agent; name 'Agent'; legacy alias 'Task' for backward compat with older transcripts/permission rules/hook configs. + +### AgentDefinition file format (.claude/agents/*.md) +**Purpose:** Declarative definition of a subagent: identity, capabilities, system prompt, and lifecycle config. Single source reused across subagent invocation, @-mention, --agent main-thread mode, and agent-team teammates. + +**Mechanism:** Loaded at session START only (restart required for disk edits; /agents UI edits take effect immediately). Five scope locations with priority: (1) Managed settings org-wide [highest], (2) --agents CLI flag JSON [session], (3) .claude/agents/ [project], (4) ~/.claude/agents/ [user], (5) plugin agents/ dir [lowest]. Project & user scanned RECURSIVELY (subfolders OK, identity from name field only — keep names unique within a scope or one is silently discarded). Plugin subfolders BECOME part of the scoped id (agents/review/security.md in plugin my-plugin -> my-plugin:review:security). --agents JSON uses same fields, with `prompt` field = markdown body. Programmatic SDK agents take precedence over filesystem agents with the same name. + +**Data model:** --- +name: # REQUIRED + # REQUIRED (when to delegate) +tools: Read, Glob, Grep # optional comma-list or YAML array; '*' = all +disallowedTools: Write, Edit # denylist; applied BEFORE tools allowlist resolves +model: sonnet|opus|haiku|fable||inherit # default: inherit +permissionMode: default|acceptEdits|auto|dontAsk|bypassPermissions|plan +maxTurns: +skills: [skill-name, ...] # full content injected, not just description +mcpServers: [{: {type,command,args}}, ""] +hooks: {PreToolUse|PostToolUse|Stop: [{matcher, hooks:[{type:command,command}]}]} +memory: user|project|local # dir at ~/.claude/agent-memory// etc. +background: true|false # default false +effort: low|medium|high|xhigh|max| +isolation: worktree # temp git worktree branched from default branch +color: red|blue|green|yellow|purple|orange|pink|cyan +initialPrompt: # auto-submitted as first user turn when agent runs as MAIN session (--agent) +--- + + +**Config:** name format: lowercase + hyphens (filename need not match name). model resolution precedence: CLAUDE_CODE_SUBAGENT_MODEL env -> per-invocation model param -> frontmatter model -> main model. plugins IGNORE hooks, mcpServers, permissionMode fields (security). + +### Built-in subagent registry (6 types) +**Purpose:** The always-available agents Claude delegates to automatically. Cover exploration, planning, general work, verification, and UI helpers. + +**Mechanism:** General-purpose: full tools (minus Agent), no CLAUDE.md omission, model=getDefaultSubagentModel(). Explore: Haiku, read-only (FileEdit/FileWrite/NotebookEdit/Agent removed), CRITICAL: READ-ONLY MODE in prompt, one-shot — most spawned (~34M/week). Plan: 'inherit' model, read-only, 4-step structured process ending with Critical Files list, one-shot. Verification: read-only, 'inherit', background:true always, red, ~130-line anti-avoidance prompt, criticalSystemReminder_EXPERIMENTAL guardrail. statusline-setup: Sonnet, Read+Edit only, orange. claude-code-guide: Haiku, dontAsk mode, excluded when entrypoint=SDK. Disable all built-ins via CLAUDE_AGENT_SDK_DISABLE_BUILTIN_AGENTS=1; deny specific via permissions.deny=["Agent(Explore)"] or --disallowedTools. + +**Data model:** Type registry built dynamically by getBuiltInAgents() gated by feature flags + GrowthBook experiments (BUILTIN_EXPLORE_PLAN_AGENTS + tengu_amber_stoat for Explore/Plan; VERIFICATION_AGENT + tengu_hive_evidence for Verification). + +**Config:** Explore & Plan have omitClaudeMd:true (strip CLAUDE.md + git status, saves tokens; only these two skip them, NO frontmatter field to change). Explore/Plan are ONE_SHOT (no agentId returned, no SendMessage instructions, no usage trailer). Agent tool is in default disallowedTools for general-purpose to prevent exponential fan-out. + +### runAgent() 15-step lifecycle +**Purpose:** The single async-generator function that creates and drives a subagent's entire execution context. Every subagent type (fork/built-in/custom/coordinator-worker) flows through it. + +**Mechanism:** 15 steps: (1) Model resolution chain caller-override > agent-def > parent-model > default (getAgentModel handles 'inherit'); (2) agentId creation (override.agentId or createAgentId() -> agent-); (3) context prep — fork clones parent history via filterIncompleteToolCalls() (strips tool_use blocks lacking matching tool_result, else API rejects); fresh agents start empty; file-state cache fork=clone, fresh=createWithSizeLimit; (4) CLAUDE.md stripping for read-only agents; (5) permission isolation — custom getAppState() overlays agent mode unless parent is bypassPermissions/acceptEdits/auto (parent wins); async agents get shouldAvoidPermissionPrompts:true; allowedTools replaces session allow rules but preserves SDK --allowedTools; (6) tool resolution (fork: useExactTools passthrough for byte-identical cache prefix; else resolveAgentTools applies tools/disallowedTools/ASYNC_AGENT_ALLOWED_TOOLS); (7) system prompt (fork uses override.systemPrompt = parent's exact rendered bytes; else getAgentSystemPrompt + env details); (8) abort controller isolation (async=new unlinked controller; sync=parent's shared controller); (9) register frontmatter hooks scoped to agentId, Stop->SubagentStop conversion, strictPluginOnlyCustomization skips user agent hooks; (10) preload skills (3-strategy name resolution) as user messages; (11) MCP init (name refs shared/memoized, inline created+cleaned up); (12) createSubagentContext (sync shares setAppState, async isolates it; both share setAppStateForTasks + setResponseLength; messages own array); (13) onCacheSafeParams callback for background summarization; (14) query() loop drives child conversation, yields Messages, each recorded to sidechain transcript JSONL O(1); (15) finally{} cleanup: mcpCleanup, clearSessionHooks, cleanupAgentTracking, readFileState.clear(), initialMessages.length=0, unregisterPerfettoAgent, clearAgentTranscriptSubdir, remove agent's todos, killShellTasksForAgent. + +**Data model:** runAgent signature: {agentDefinition, promptMessages, toolUseContext, canUseTool, isAsync, canShowPermissionPrompts, forkContextMessages, querySource, override, model, maxTurns, availableTools, allowedTools, onCacheSafeParams, useExactTools, worktreePath, description}. agentId branded type AgentId = `agent-`. + +**Config:** Thinking disabled for normal agents ({type:'disabled'}) to control cost; fork agents inherit thinkingConfig for cache identity. Explore/Plan skip CLAUDE.md & git status (gate tengu_slim_subagent_claudemd defaults true). + +### Task state machine + async communication +**Purpose:** Unified state model for all background operations (shell, subagent, teammate, remote, workflow, mcp-monitor, dream). Backbone of background agent tracking, progress, and result delivery. + +**Mechanism:** Three comms channels: (1) Disk output files (outputFile symlink to JSONL transcript, read incrementally via outputOffset; TaskOutputTool polls, block:true polls until terminal/timeout); (2) Task notifications ( XML injected as user-role message in parent conversation, deduped via notified flag); (3) Command queue pendingMessages[] drained at tool-round boundaries by drainPendingMessages() (messages arrive BETWEEN tool rounds, never mid-execution). ProgressTracker tracks toolUseCount, latestInputTokens (cumulative-latest), cumulativeOutputTokens (summed), recentActivities (cap 5). Backgrounding mid-execution: Promise.race between next-message and background-signal; foreground iterator.return() triggers cleanup, re-spawn as async with same ID, flip isBackgrounded. + +**Data model:** TaskStateBase: {id (prefixed random, ~2.8T combos), type, status, description, toolUseId, startTime, endTime?, totalPausedMs?, outputFile (disk path), outputOffset (read cursor), notified (dedup flag)}. LocalAgentTaskState adds: agentId, prompt, selectedAgent, agentType, model?, abortController?, pendingMessages[], isBackgrounded, retain, diskLoaded, evictAfter?, progress?, lastReportedToolCount, lastReportedTokenCount. AppState.tasks is flat Record (no parent-child tree). + +**Config:** 7 types: local_bash(b), local_agent(a), remote_agent(r), in_process_teammate(t), local_workflow(w), monitor_mcp(m), dream(d). 5 statuses: pending->running->{completed|failed|killed}. isTerminalTaskStatus() guards message injection. + +### SendMessage + agent teams (inter-agent messaging) +**Purpose:** Universal communication primitive across subagents, coordinator workers, swarm teammates, and remote/UDS peers. Single tool, 4 routing modes by shape of `to` field. + +**Mechanism:** Leader spawns teammates (in-process via AsyncLocalStorage, or split-pane via tmux/iTerm2). SendMessage routes by `to`: bridge: (remote relay, needs consent) > uds: (local IPC) > agentNameRegistry lookup (running->queuePendingMessage; terminal->resumeAgentBackground; not in AppState->resume from disk transcript) > team mailbox fallback. Mailbox = writeToMailbox() file per recipient; to:"*" broadcasts to all members except sender (no fan-out opt). Structured protocols: shutdown_request/response (cooperative, teammate may reject), plan_approval_response (only lead approves). Auto-resume: SendMessage to dead agent reads sidechain JSONL, filters orphaned thinking/tool blocks, rebuilds content-replacement state, re-registers as background task, runs runAgent() with restored history + new message. Workers cannot spawn sub-teams (INTERNAL_WORKER_TOOLS deny set). Known bug: SendMessage by agent NAME for completed/resumed agents may silently fail — agent ID is reliable (GitHub issue #42999). + +**Data model:** InProcessTeammateTaskState: type 'in_process_teammate', identity, prompt, messages? (UI cap 50), pendingUserMessages[], isIdle, shutdownRequested, awaitingPlanApproval, permissionMode, onIdleCallbacks?, currentWorkAbortController (distinct from main kill controller — cancels current turn only, redirect pattern). TeamContext: {teamName, teammates:{[id]:{name,color}}}. agentNameRegistry: Map. + +**Config:** Requires CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1 (experimental). Stored on disk: team config ~/.claude/teams/{team-name}/config.json (members array with name, agentId, agentType), task list ~/.claude/tasks/{team-name}/. Both removed on cleanup. NO project-level teams.json recognized. + +### Termination & resume contract +**Purpose:** How subagents end, how their result returns to parent, and how they can be continued. + +**Mechanism:** When subagent completes, Agent tool result includes text block 'agentId: '. Explore/Plan are one-shot (no agentId, cannot resume). To resume: parent uses SendMessage({to: agentId}) (only available with CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1) OR SDK resumes by passing resume: + naming agentId in prompt. Transcripts at ~/.claude/projects/{project}/{sessionId}/subagents/agent-{agentId}.jsonl — persist independently of main conversation (main compaction doesn't touch them); cleaned up via cleanupPeriodDays (default 30). Stopped subagent receiving SendMessage auto-resumes in background without new Agent invocation. + +**Data model:** Agent tool output discriminated union: {status:'completed', prompt, ...AgentToolResult} | {status:'async_launched', agentId, description, prompt, outputFile}. (Internal-only TeammateSpawnedOutput & RemoteLaunchedOutput excluded from exported schema for dead-code-elimination.) + +**Config:** builtIn always registered in interactive sessions; disable specific via permissions.deny=["Agent()"] or --disallowedTools. Resume requires non-one-shot agent (general-purpose/custom); Explore/Plan cannot resume. CLAUDE_CODE_DISABLE_BACKGROUND_TASKS=1 disables all background; CLAUDE_CODE_FORK_SUBAGENT=1 forces all spawns to background. + +## Key behaviors +- The Task->Agent rename (v2.1.63) is a BREAKING CHANGE for hook scripts: PreToolUse/PostToolUse hooks that string-match the tool name must now check BOTH 'Task' and 'Agent' for cross-version compatibility. The SDK still emits 'Agent' in tool_use blocks but 'Task' in system:init tools list and result.permission_denials[].tool_name. +- Model resolution order is FIXED and non-obvious: CLAUDE_CODE_SUBAGENT_MODEL env > per-invocation model param > frontmatter model > main conversation model. 'inherit' resolves to parent's model. Explore defaults to Haiku for external users via GrowthBook gating. +- Subagent receives ONLY: its own system prompt + Agent tool prompt + project CLAUDE.md (except Explore/Plan) + git status snapshot (except Explore/Plan) + preloaded skills. It does NOT receive parent conversation history, parent system prompt, or preloaded skill content unless in AgentDefinition.skills. The parent->child channel is ONLY the prompt string. +- The parent receives the subagent's FINAL message VERBATIM as the Agent tool_result (may be summarized by parent in its own response). To preserve verbatim subagent output in user-facing response, instruct the main query() to do so — the contract is not automatic. +- Foreground subagents share the parent's abort controller (Escape kills both); background subagents get an independent controller (Escape on parent does NOT kill them). Backgrounding mid-execution re-spawns with same ID and flips isBackgrounded. +- Background subagents auto-deny ANY tool call that would prompt (no terminal attached); foreground passes prompts through to user. Named/background subagents auto-deny prompting tools; 'bubble' mode is the exception that surfaces prompts to parent terminal. +- If 'Agent' is omitted from a subagent's tools list, it CANNOT spawn nested subagents. 'Agent(worker, researcher)' allowlist syntax ONLY applies when running as main thread via --agent; in a subagent definition, any type list in parens is IGNORED (bare Agent enables nesting). +- Nested subagent depth limit (v2.1.172): foreground can spawn at any depth (self-limiting via blocking); background subagent at depth 5 gets NO Agent tool and cannot spawn further. The limit is fixed and NOT configurable. Fork still cannot spawn another fork (querySource==='agent:builtin:fork' guard + isInForkChild scan for ). +- Permission mode cascade: if parent is bypassPermissions, acceptEdits, or auto mode, the PARENT'S mode always wins — the subagent's permissionMode frontmatter is IGNORED. Otherwise the agent's mode applies. This prevents a custom agent from downgrading security the user explicitly set. +- Auto-resume via SendMessage: sending a message to a completed/killed agent transparently resurrects it from its disk JSONL transcript (filters orphaned thinking/tool blocks, rebuilds content-replacement state for cache stability). Coordinators do not need to track agent liveness. CAVEAT: GitHub issue #42999 reports SendMessage by agent NAME silently fails for some resume paths — agent ID is the reliable target. +- transcripts persist separately from main conversation: main-conversation compaction does NOT touch subagent transcripts. They survive session restart and are cleaned up via cleanupPeriodDays (default 30 days). Sidechain recording is O(1) per message (append-only, previous-UUID reference). +- Plugin subagents CANNOT use hooks, mcpServers, or permissionMode frontmatter fields (silently ignored for security). Copy into .claude/agents/ if you need them. As of v2.1.153, main-session MCP restrictions (--strict-mcp-config, --bare, managed MCP, allowedMcpServers/deniedMcpServers) also cover servers declared in subagent frontmatter (but --strict-mcp-config does NOT filter inline --agents/SDK agents servers — those are explicit caller input). +- Filesystem-based agents load at SESSION START only. Editing a .claude/agents/*.md on disk requires a session restart. /agents UI edits take effect immediately. Windows: very long subagent prompts may fail (>8191 char command-line limit) — use filesystem agents. +- Explore/Plan are the ONLY agents that skip CLAUDE.md and git status, and there is NO frontmatter field to change which agents skip them. If a rule must reach Explore/Plan, restate it in the delegation prompt. +- In agent teams: subagent definitions used as teammates apply ONLY tools + model; the body is APPENDED to teammate system prompt (not replacing). skills and mcpServers fields are NOT applied on the teammate path (teammates load those from project/user settings like a regular session). Team coordination tools (SendMessage, task tools) are ALWAYS available even when tools restricts others. + +## External interfaces +- Tool name: 'Agent' (primary), 'Task' (legacy alias) — emitted in tool_use blocks; system:init tools list & result.permission_denials[].tool_name still use 'Task' in some SDK versions +- Agent tool input: {description, prompt, subagent_type?, model?, run_in_background?, name?, team_name?, mode?, isolation?, cwd?} +- Agent tool output: {status:'completed', prompt, ...result} | {status:'async_launched', agentId, description, prompt, outputFile} +- SendMessage tool input: {to: name|'*'|'uds:'|'bridge:'|agentId, summary?, message: string | {type:'shutdown_request'|'shutdown_response'|'plan_approval_response', ...}} +- TaskStop tool input: {task_id?, shell_id? (deprecated)} — legacy alias 'KillShell' +- TaskOutput tool input: {task_id, block=true, timeout=30000} +- File formats: .claude/agents/*.md & ~/.claude/agents/*.md (YAML frontmatter + markdown body); --agents JSON (prompt field = body); subagent transcripts ~/.claude/projects/{project}/{sessionId}/subagents/agent-{agentId}.jsonl +- CLI flags: --agent , --agents '', --disallowedTools 'Agent(Explore)', --teammate-mode in-process|tmux|auto, settings 'agent' & 'teammateMode' +- Env vars: CLAUDE_CODE_SUBAGENT_MODEL, CLAUDE_CODE_DISABLE_BACKGROUND_TASKS, CLAUDE_CODE_FORK_SUBAGENT, CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS, CLAUDE_AGENT_SDK_DISABLE_BUILTIN_AGENTS, CLAUDE_CODE_COORDINATOR_MODE +- Permission rule forms: 'Agent', 'Agent(worker, researcher)' (allowlist only when main --agent), 'Agent(Explore)' in permissions.deny + +## Open questions +- Exact content/wording of the Explore agent's 'CRITICAL: READ-ONLY MODE' system prompt section and the general-purpose system prompt (described but not quoted verbatim in sources) +- Full list and exact gating conditions of the ~12 feature flags + GrowthBook experiments (FORK_SUBAGENT, BUILTIN_EXPLORE_PLAN_AGENTS, VERIFICATION_AGENT, KAIROS, TRANSCRIPT_CLASSIFIER, PROACTIVE, tengu_amber_stoat, tengu_hive_evidence, tengu_slim_subagent_claudemd, tengu_scratch) — which are compile-time vs runtime A/B +- Exact AgentProgress type fields and the ASYNC_AGENT_ALLOWED_TOOLS allowlist contents +- Whether the 'dream' task type (speculative background thinking) and 'local_workflow' Workflow tool are GA or still feature-gated as of v2.1.175 +- Whether coordinator mode (CLAUDE_CODE_COORDINATOR_MODE) is GA or still behind COORDINATOR_MODE feature flag for general users + +## Sources +- [Create custom subagents — Claude Code Docs (official)](https://code.claude.com/docs/en/sub-agents) — PRIMARY source. Full frontmatter field table, 5 scope priorities, built-in subagent details (Explore/Plan/general-purpose), isolation:worktree, what-loads-at-startup matrix, resume contract, nested depth rules. +- [Subagents in the SDK — Claude Code Docs (official)](https://code.claude.com/docs/en/agent-sdk/subagents) — AgentDefinition field table (description/prompt/tools/disallowedTools/model/skills/memory/mcpServers/initialPrompt/maxTurns/background/effort/permissionMode), what-subagents-inherit matrix, v2.1.63 Task->Agent rename + dual-name detection guidance, resume via agentId, v2.1.172 nested depth rule. +- [Orchestrate teams of Claude Code sessions — Claude Code Docs (official)](https://code.claude.com/docs/en/agent-teams) — Agent teams architecture (lead/teammates/task list/mailbox), team+task disk paths, subagent-definitions-for-teammates (tools+model honored, body appended, skills/mcpServers ignored), mailbox messaging, plan approval protocol, v2.1.32 minimum. +- [Ch 8. Spawning Sub-Agents — Claude Code from Source](https://claude-code-from-source.com/ch08-sub-agents/) — Authoritative internals: AgentTool base+full input schema with feature-gated field omissions, 10-step call() decision tree, full 15-step runAgent() lifecycle, 6 built-in agent types with feature gates, fork guard mechanics, output schema discriminated union. +- [Ch 10. Tasks, Coordination, and Swarms — Claude Code from Source](https://claude-code-from-source.com/ch10-coordination/) — Task state machine (7 types, 5 statuses, TaskStateBase/LocalAgentTaskState fields), 3 background comms channels (disk/notifications/queue), SendMessage 4-mode routing + auto-resume, TaskStop kill switch, coordinator mode internals, swarm mailbox. +- [Claude Code changelog — Claude Code Docs (official)](https://code.claude.com/docs/en/changelog) — Confirms version-specific facts: v2.1.172 'Sub-agents can now spawn sub-agents up to 5 levels deep'; Workflow tool agent() attribution. +- [v2.1.63 Task->Agent tool rename breaking hooks — GitHub Issue #29677](https://github.com/anthropics/claude-code/issues/29677) — Confirms the v2.1.63 Task->Agent rename is a breaking change for PreToolUse/PostToolUse hook scripts that check the tool name. +- [SendMessage silently fails when using agent name — GitHub Issue #42999](https://github.com/anthropics/claude-code/issues/42999) — Documents the gotcha that SendMessage with agent NAME may silently fail for resuming completed agents; only agent ID works reliably. +- [Claude Code v2.1.172 Release Notes — claudeupdates.dev](https://www.claudeupdates.dev/version/2.1.172) — Independent corroboration of v2.1.172 nested subagent (5-level) release and the agent-lifecycle stability fixes (stuck-active panel, fixed background agent project-settings isolation). +- [Task tool input schema (TaskArgs) — letta-ai/letta-code Task.ts](https://github.com/letta-ai/letta-code/blob/32e042d5/src/tools/impl/Task.ts) — Third-party reimplementation confirming exact Task tool args: command/subagent_type/prompt/description/model/agent_id/conversation_id/run_in_background, validating the schema shape from primary sources. diff --git a/docs/claude-code-architecture/research/system-prompt-assembly.md b/docs/claude-code-architecture/research/system-prompt-assembly.md new file mode 100644 index 0000000..689eb68 --- /dev/null +++ b/docs/claude-code-architecture/research/system-prompt-assembly.md @@ -0,0 +1,134 @@ +# Research: system-prompt-assembly + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code's system prompt is not a static string but a per-turn assembled array of blocks (branded `SystemPrompt` type) built by `getSystemPrompt()` in `src/constants/prompts.ts` and resolved by `buildEffectiveSystemPrompt()`. It is split into a STATIC, globally-cacheable zone (~12 sections: identity, intro, system rules, doing-tasks, actions, using-tools, tone/style, output-efficiency, token-budget, proactive) and a DYNAMIC, per-session zone (env info, scratchpad, function-result-clearing, MCP instructions, memory, CLAUDE.md, output-style, git-status, append-prompt) divided by a `__SYSTEM_PROMPT_DYNAMIC_BOUNDARY__` marker that is stripped before the API call. Each section is either memoized via `systemPromptSection()` (cached until `/clear` or `/compact`) or recomputed every turn via `DANGEROUS_uncachedSystemPromptSection()` (used for MCP instructions and env info). CLAUDE.md content is injected as a USER message (project context), NOT into the system prompt in the SDK; in the interactive CLI it appears in the prompt assembly. Hooks inject `` tags via `additionalContext`/`systemMessage` at event-appropriate positions. The Agent SDK exposes preset/custom/append options and `excludeDynamicSections` (v0.2.98+) to move per-session context into the first user message for cross-session cache reuse. + +## Components +### Effective Prompt Resolution (priority system) +**Purpose:** Decides the final prompt base before per-turn assembly. + +**Mechanism:** buildEffectiveSystemPrompt() resolves which prompt base is used via a strict priority ladder: (0) overrideSystemPrompt non-empty replaces everything; (1) COORDINATOR_MODE feature => dedicated coordinator prompt (strips toolset to Agent + TaskStop + SendMessage); (2) mainThreadAgentDefinition exists => proactive mode appends to default, else replaces; (3) --system-prompt CLI arg replaces default; (4) default = full getSystemPrompt() output. The SDK exposes three starting points: minimal default (omitted systemPrompt), claude_code preset (object {type:'preset',preset:'claude_code', append?:string, excludeDynamicSections?:boolean}), or a custom string. + +**Data model:** Priority tiers: 0 Override, 1 Coordinator (feature active => toolset stripped to Agent+TaskStop+SendMessage), 2 mainThreadAgentDefinition (proactive: append; else replace), 3 --system-prompt CLI (replace), 4 Default = getSystemPrompt(). The branded SystemPrompt type prevents passing raw string[] to the API. + +**Config:** systemPrompt: { type:'preset', preset:'claude_code', append?:string, excludeDynamicSections?:boolean } (TS); system_prompt={'type':'preset','preset':'claude_code','append':...} (Python). Custom: systemPrompt: string. None => minimal default. excludeDynamicSections added v0.2.98 (TS) / v0.1.58 (Python). CLI flags: --append-system-prompt, --exclude-dynamic-system-prompt-sections, --system-prompt. Env: CLAUDE_CODE_SIMPLE truthy => single-line minimal prompt. + +### getSystemPrompt() — section factory +**Purpose:** The core factory that concatenates ~18 ordered sections split by a cache boundary. + +**Mechanism:** Static zone (cacheable, scope 'global'): 1 CLI System Prefix ('You are Claude Code, Anthropic's official CLI for Claude.'), 2 Intro (interactive vs headless swaps 'assist' for 'complete'), 3 Cyber Risk Instruction, 4 URL Safety ('NEVER generate or guess URLs'), 5 System Rules (output format, prompt-injection defense, system-reminder handling, compaction), 6 Doing Tasks (anti-YAGNI; conditional on output_style keepCodingInstructions), 7 Executing Actions (LOW/MEDIUM/HIGH blast-radius taxonomy; always-confirm set: rm -rf/DROP TABLE, git push/publish, migrations/force-push), 8 Using Your Tools (prefer dedicated tools Read/Edit/Glob/Grep over Bash; varies by repl_mode/embedded_search/task_tool_enabled), 9 Tone & Style (no emojis; varies user_type_external), 10 Output Efficiency (internal 'between-tool calls ≤25 words' vs external 'go straight to the point'), 11 Token Budget (GATED on feature('TOKEN_BUDGET')), 12 Proactive/KAIROS (GATED on feature('PROACTIVE')). Then the cache boundary marker, then the Dynamic zone (scope 'org' or uncached): 13 Env Info (cwd, isGit, platform, shell, osVersion, model name, knowledge cutoff; varies undercover/worktree), 14 Scratchpad, 15 Function Result Clearing (microcompact_enabled; '5 most recent results always kept'), 16 Summarize Tool Results, 17 MCP Server Instructions (DANGEROUS_uncached — recomputed every turn), 18 Memory, plus Language, Output Style, Git Status Snapshot (current branch / recent commits / working tree — snapshot in time), Numeric Length Anchors (user_type_ant), Brief (kairos_brief), and Append System Prompt at the very end. + +**Data model:** Sections registered via systemPromptSection(name, compute) [cached, invalidated only on /clear or /compact] or DANGEROUS_uncachedSystemPromptSection(name, compute, reason) [recomputed every turn — used for getMcpInstructionsSection, Env Info]. clearSystemPromptSections() invalidates the memo AND clears beta-header latches. + +**Config:** Gates: ask_user_enabled, non_interactive (omits shell-shortcut section in SDK/headless), agent_tool_enabled (+ fork_subagent + explore_plan_agents), skills_enabled (+ experimental_skill_search), verification_agent, memory_configured, user_type_ant, language_set, output_style, mcp_connected (+ mcp_delta_mode), scratchpad_enabled, microcompact_enabled, token_budget, kairos_brief, is_git_repo & !remote & git_instructions_enabled, append_system_prompt. + +### Environment / System Context section +**Purpose:** Inject cwd, platform, shell, model, OS version, git status so the model knows its execution environment. + +**Mechanism:** Env Info is a DANGEROUS_uncachedSystemPromptSection recomputed per turn. It reads osType/osVersion/osRelease, getCwd(), getIsGit(). A separate 'Git Status Snapshot' block (gated is_git_repo && not remote && git_instructions_enabled) injects current branch, default (main) branch, git user, and a working-tree status with recent commits. The whole env block is what breaks the prefix cache for the static zone — excludeDynamicSections moves it into the first user message instead. + +**Data model:** Env fields read: osType, osVersion, osRelease, getCwd(), getIsGit(). The gitStatus block carries currentBranch, mainBranch (default branch for PRs), gitUser, and a working-tree status string + recent commits list. + +**Config:** Env var sources: osType, osVersion, osRelease (platform runtime), getCwd(), getIsGit(). CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1 loads CLAUDE.md/rules from --add-dir paths. + +### CLAUDE.md cascade (memory) +**Purpose:** Persistent project/user/org instructions, loaded per session and lazily. + +**Mechanism:** IMPORTANT asymmetry: in the Agent SDK CLAUDE.md is NOT injected into the system prompt — the SDK reads it and injects it as a USER message (project context) alongside the conversation. Per the memory docs: 'CLAUDE.md content is delivered as a user message after the system prompt, not as part of the system prompt itself.' Resolution walks up the directory tree from cwd collecting CLAUDE.md and CLAUDE.local.md, concatenating root-down with .local appended after .md at each level. Managed policy CLAUDE.md (/Library/Application Support/ClaudeCode/CLAUDE.md on macOS, /etc/claude-code/ on Linux, C:\Program Files\ClaudeCode\ on Windows) loads first and cannot be excluded. @path imports resolve relative to the importing file with max depth 4 hops. Subdirectory CLAUDE.md files load lazily when Claude reads files there. Project-root CLAUDE.md is re-injected after /compact. + +**Data model:** Discovery order: managed policy (cannot be excluded) -> ~/.claude/CLAUDE.md -> ancestor dirs root-down (CLAUDE.md then CLAUDE.local.md at each level) -> ./CLAUDE.md or ./.claude/CLAUDE.md -> ./CLAUDE.local.md. .claude/rules/*.md (no paths frontmatter) join at CLAUDE.md priority; path-scoped rules (paths: glob YAML) load on file read. HTML block comments stripped (code-block comments preserved). Imports expanded recursively up to 4 hops. Auto-memory MEMORY.md first 200 lines or 25KB loaded; topic files on demand only. + +**Config:** settingSources / setting_sources controls whether 'project' and 'user' files load (default both enabled). CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD=1 loads memory from --add-dir paths. claudeMdExcludes (glob, arrays merge across layers) skips files. --setting-sources may exclude 'local'. + +### MCP Server Instructions injection +**Purpose:** Inject per-server 'how to use this server' guidance into the dynamic prompt zone. + +**Mechanism:** When MCP servers are connected, each server's instructions field (returned in InitializeResult during the initialize handshake) is injected as a '# MCP Server Instructions' section, one subsection per server, in the dynamic/uncached zone (DANGEROUS_uncachedSystemPromptSection => recomputed every turn). If mcp_delta_mode is enabled, instructions are delivered as a per-turn attachment instead of inline in the system prompt. Empty/missing instructions are omitted. + +**Data model:** instructions: string from InitializeResult. Per-server section header '## '. Composite prompt text assembled under '# MCP Server Instructions'. + +**Config:** mcp_connected gate; mcp_delta_mode toggles per-turn attachment vs inline. Instructions are re-fetched because tools/list can change (MCP list_changed). + +### Hook injection (system-reminder wrapping) +**Purpose:** Run user-defined shell/HTTP/MCP/prompt/agent interceptors at lifecycle events and inject their output as model-visible reminders. + +**Mechanism:** Five handler types: command (stdin JSON / stdout+exit), http (POST body / 2xx response JSON), mcp_tool (calls a tool on a connected server; text output treated as command stdout), prompt (single-turn Claude yes/no), agent (spawns a tool-using subagent). The additionalContext field in hookSpecificOutput is wrapped by Claude Code in a tag and inserted at a position determined by the firing event: SessionStart/Setup/SubagentStart => start of conversation before first prompt; UserPromptSubmit/UserPromptExpansion => alongside submitted prompt; PreToolUse/PostToolUse/PostToolUseFailure/PostToolBatch => next to the tool result; Stop/SubagentStop => end of turn. Matches: 'Claude Code wraps the string in a system reminder and inserts it into the conversation at the point where the hook fired.' Exit 0 with stdout on UserPromptSubmit/UserPromptExpansion/SessionStart also adds the text as Claude-visible context (these three events only). Exit 2 blocks per the per-event blocking table. + +**Data model:** Output schema: { continue?:bool, stopReason?:string, suppressOutput?:bool, systemMessage?:string, terminalSequence?:string(allowlist OSC 0/1/2/9/99/777 + BEL), decision?:'block', reason?:string, hookSpecificOutput:{ hookEventName, permissionDecision?:'allow'|'deny'|'ask', permissionDecisionReason?, additionalContext?, retry?:bool } }. additionalContext/systemMessage/plain stdout capped 10,000 chars; overflow => file + preview. Exit codes: 0 success (JSON parsed), 2 blocking error (stderr fed to Claude), other = non-blocking. HTTP: 2xx+body=JSON, non-2xx=non-blocking. + +**Config:** Boundaries: UserPromptSubmit default timeout lowered to 30s; MessageDisplay 10s. Tokens/effort injected as $CLAUDE_EFFORT env and effort:{level} in hook JSON. Managed hooks survive disableAllHooks from lower layers. + +### Hook event matchers & tool-name namespacing +**Purpose:** Filter which hooks fire for which tool/event. + +**Mechanism:** Tool-event hooks (PreToolUse, PostToolUse, PostToolUseFailure, PermissionRequest, PermissionDenied) match by tool_name. matcher rules: '*' / '' / omitted => all; only [A-Za-z0-9_|] => exact or |-separated exact list; any other char => JS regex. MCP tools are named mcp____; match-all-from-server needs mcp____.* (the .* makes it a regex; bare mcp__memory is treated as exact string and matches nothing). Optional per-handler 'if' uses permission-rule syntax (e.g. Bash(rm *), Edit(*.ts)) and only evaluates on tool events. SessionStart matches startup|resume|clear|compact; InstructionsLoaded matches session_start|nested_traversal|path_glob_match|include|compact. + +**Data model:** Input: { session_id, transcript_path, cwd, permission_mode:'default'|'plan'|'acceptEdits'|'auto'|'dontAsk'|'bypassPermissions', effort:{level}, hook_event_name, plus event-specific (tool_name, tool_input). agent_id/agent_type added in subagents. Output: permissionDecision allow/deny/ask + reason (PreToolUse), retry:bool (PermissionDenied), additionalContext (model-facing), systemMessage (user-facing warning), suppressOutput, terminalSequence, continue:false + stopReason. + +**Config:** Matched by tool name. Settings keys: hooks.[].matcher, hooks[].if (permission-rule syntax), disableAllHooks, allowManagedHooksOnly, once (skill-frontmatter only). Hook sources: ~/.claude/settings.json, .claude/settings.json, .claude/settings.local.json, managed policy, plugin hooks/hooks.json, skill/agent frontmatter. + +### Dynamic reminders: todo / plan mode / skill surfacing +**Purpose:** Steer the model mid-conversation without rebuilding the system prompt. + +**Mechanism:** These are NOT part of the system prompt. They are injected as attachments appended to user messages each turn: (a) todo/task state ('The task tools haven't been used recently... consider using TaskCreate'), (b) active plan-mode ('plan only, do not code yet'), (c) auto-surfaced relevant skills ('Skills relevant to your task:'), (d) hook-produced additionalContext, (e) git/file-change diff reminders after tool edits. They are wrapped in tags and the model is instructed (via System Rules section) to read and apply them. + +**Data model:** Reminders are blocks attached as attachments to user messages (not stored in the system prompt array). + +**Config:** Todo tracking built into Agent SDK (TaskCreate/TaskUpdate/TaskList). Plan mode is permission_mode:'plan'. Reminders are non-system-prompt context — they appear as tags in the message stream. + +## Key behaviors +- CLAUDE.md lives in the CONVERSATION (user message), not the system prompt, in the Agent SDK — it does not affect the system-prompt cache entry. The env-info block (cwd/platform/git/shell/model) DOES live in the system prompt and is what normally prevents cache reuse across directories. +- excludeDynamicSections moves the env-info block into the FIRST USER MESSAGE so the system prompt (preset + append) becomes byte-identical across users/machines and shares a cache entry. Tradeoff: text in a user message carries marginally less weight than in the system prompt. Requires claude-agent-sdk TS v0.2.98 / Python v0.1.58. +- Three caching modes in splitSysPromptPrefix(): Mode 1 (MCP present) => no global cache, whole prompt scope 'org' because MCP tool defs change; Mode 2 (1P default, no MCP) => split at boundary, static=scope 'global' (cross-org cacheable), dynamic=uncached; Mode 3 (3P providers Bedrock/Vertex/OpenAI) => whole prefix scope 'org'. +- The boundary marker __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__ is inserted into the prompt array but REMOVED before sending to the API — the model never sees it. It exists only so splitSysPromptPrefix can find the split point. +- systemPromptSection() memoizes compute results and is only cleared by /clear or /compact (clearSystemPromptSections also clears beta-header latches). DANGEROUS_uncachedSystemPromptSection forces per-turn recompute and is deliberately named to discourage use — reserved for genuinely per-turn content (MCP instructions, env info). +- Output styles: a custom output style by DEFAULT REPLACES the preset's software-engineering instructions; set keep-coding-instructions: true in frontmatter to layer on top instead. Stored in ~/.claude/output-styles/ (user) or .claude/output-styles/ (project). Loaded via settingSources user/project. Python SDK has no programmatic outputStyle selector. +- CLAUDE.md loading is gated by settingSources — an empty array disables CLAUDE.md entirely even though the claude_code preset is active. 'project' loads ./CLAUDE.md or ./.claude/CLAUDE.md; 'user' loads ~/.claude/CLAUDE.md. +- CLAUDE.md import depth is capped at 4 hops; relative @paths resolve against the importing file, not cwd. Block HTML comments are stripped before injection (code-block comments preserved). Subdirectory CLAUDE.md files load lazily on file reads, not at launch. +- Auto-memory MEMORY.md: only first 200 lines OR 25KB (whichever first) loaded at session start; topic files loaded on demand. Storage at ~/.claude/projects//memory/, shared across worktrees of one git repo. Requires Claude Code v2.1.59+. Toggle: autoMemoryEnabled setting, CLAUDE_CODE_DISABLE_AUTO_MEMORY=1, or /memory UI. +- managed-policy CLAUDE.md cannot be excluded by claudeMdExcludes and cannot be disabled — it always applies. The claudeMd key in managed-settings.json is an alternative to deploying a managed CLAUDE.md file (only honored in managed/policy settings). +- Git Status Snapshot injected only when is_git_repo && not remote && git_instructions_enabled. It is explicitly a 'snapshot in time' and the prompt warns it will not update during the conversation. +- MCP server instructions come from the instructions field of the MCP InitializeResult; Claude Code injects them as a per-server subsection. If mcp_delta_mode is on, they are attached per-turn instead. Because MCP tool lists can change (list_changed), the MCP instructions section is DANGEROUS_uncached. +- Hook additionalContext/systemMessage/plain stdout are CAPPED at 10,000 chars; overflow is written to a file and replaced with a preview + path. additionalContext is wrapped in a tag and inserted at the event-appropriate position (start of convo / alongside prompt / next to tool result / end of turn) — it is model-visible but not shown as a chat message. +- Exit code 2 is the ONLY blocking signal for most hook events (exit 1 = non-blocking error, action proceeds). UserPromptSubmit exit 2 erases the prompt; PreToolUse exit 2 blocks the tool; Stop exit 2 keeps Claude going. JSON output is only parsed on exit 0. +- As of v2.1.139 command hooks run without a controlling terminal on macOS/Linux (/dev/tty unavailable); use terminalSequence JSON field (allowlisted OSC 0/1/2/9/99/777 + BEL, v2.1.141+) for notifications instead. +- For OpenAI-compatible providers, normalizeMessagesForAPI() flattens the SystemPrompt[] by joining with \n\n into a single 'system' role message and strips cache_control / Anthropic beta headers. +- Plan mode injects an attachment to user messages ('plan only, do not code yet') and is reflected as permission_mode:'plan' in hook input. Plan mode actually writes plan markdown files then wipes the planning context before execution. + +## External interfaces +- SDK (TS): systemPrompt: {type:'preset',preset:'claude_code',append?,excludeDynamicSections?} +- SDK (Python): system_prompt={'type':'preset','preset':'claude_code','append':...,'exclude_dynamic_sections':bool} +- SDK: settingSources=['user','project'] / setting_sources=['user','project'] (empty array disables CLAUDE.md) +- SDK: settings.outputStyle (string) selects ~/.claude/output-styles/.md +- CLI flags: --append-system-prompt, --system-prompt, --exclude-dynamic-system-prompt-sections, --add-dir, --setting-sources +- Env: CLAUDE_CODE_SIMPLE, CLAUDE_CODE_USE_BEDROCK/VERTEX/OPENAI, CLAUDE_CODE_ADDITIONAL_DIRECTORIES_CLAUDE_MD, CLAUDE_CODE_DISABLE_AUTO_MEMORY +- Managed CLAUDE.md paths: /Library/Application Support/ClaudeCode/CLAUDE.md (macOS), /etc/claude-code/CLAUDE.md (Linux/WSL), C:\Program Files\ClaudeCode\CLAUDE.md (Windows) +- settings.json keys: claudeMd, claudeMdExcludes (glob array), autoMemoryEnabled, autoMemoryDirectory, outputStyle, hooks.{Event}[] +- Output styles: ~/.claude/output-styles/*.md and .claude/output-styles/*.md with frontmatter name/description/keep-coding-instructions +- Hook config JSON: hooks.[].matcher + [].hooks[].{type,command/args|url|server+tool|prompt,if,timeout,async,asyncRewake,statusMessage,once} +- Internal TS functions: getSystemPrompt(), buildEffectiveSystemPrompt(), systemPromptSection(), DANGEROUS_uncachedSystemPromptSection(), clearSystemPromptSections(), splitSysPromptPrefix(), normalizeMessagesForAPI() +- Type: branded SystemPrompt = string[] & {__brand:'SystemPrompt'} +- Cache-control scopes: 'global' (cross-org) and 'org' (per-org) + +## Open questions +- Exact byte content / wording of the 12 static sections in the CURRENT (2026) public build — Piebald-AI repo tracks this per version; should be sampled directly from the target version for a 1:1 replica. +- Full current set of feature-flag gates (TOKEN_BUDGET, CACHED_MICROCOMPACT, PROACTIVE/KAIROS, COORDINATOR_MODE, experimental_skill_search, verification_agent, fork_subagent, explore_plan_agents, undercover) and their default on/off state per build. +- Precise wording of the env-info template line (Working directory / Is a git repository / Platform / Shell / OS Version / model name / knowledge cutoff) and whether 'date' is still injected in 2026 builds. +- Whether managed-policy and ~/.claude/CLAUDE.md are injected into the SYSTEM PROMPT (as the CLI does) or only the user message (as the SDK does) — the two surfaces diverge; the Go replica must pick per surface. +- Exact implementation of mcp_delta_mode (per-turn attachment format) and scratchpad path scheme. + +## Sources +- [Modifying system prompts — Claude Code Docs (official)](https://code.claude.com/docs/en/agent-sdk/modifying-system-prompts) — Authoritative: preset/append/custom/excludeDynamicSections, CLAUDE.md goes to conversation not system prompt, excludeDynamicSections min versions (TS v0.2.98 / Python v0.1.58), what env fields embed in the prompt and break cache. +- [How Claude remembers your project — Claude Code Docs (official)](https://code.claude.com/docs/en/memory) — Authoritative CLAUDE.md cascade: 4 scopes + load order, ancestor walk, CLAUDE.local.md appended per level, @import max depth 4, HTML comment stripping, /compact re-injection of project root, claudeMdExcludes, managed CLAUDE.md paths, auto-memory first-200-lines/25KB cap. +- [Hooks reference — Claude Code Docs (official)](https://code.claude.com/docs/en/hooks) — Authoritative hook lifecycle, all 30 events, matcher semantics (exact vs regex), mcp____ namespacing, 5 handler types, JSON output schema (additionalContext/systemMessage/permissionDecision/decision block/terminalSequence), exit-2 blocking, 10k char cap, wrapping and insertion-point rules. +- [System Prompt Assembly — DeepWiki (claude-code-best, indexed 2026-06-12)](https://deepwiki.com/claude-code-best/claude-code/2.3-system-prompt-assembly) — Reverse-engineered from leaked source: getSystemPrompt() in src/constants/prompts.ts, branded SystemPrompt type, SYSTEM_PROMPT_DYNAMIC_BOUNDARY marker removed pre-send, systemPromptSection vs DANGEROUS_uncachedSystemPromptSection, buildEffectiveSystemPrompt priority ladder, splitSysPromptPrefix 3 cache modes, CLAUDE_CODE_SIMPLE fast path. +- [How Claude Code Builds Its System Prompt — 18 Layers (Cadences)](https://codex.cadences.app/en/blog/claude-code-system-prompt/) — Independent corroboration of the 18 ordered sections, static/dynamic boundary placement at section 12-13, anti-YAGNI section content, risk taxonomy LOW/MED/HIGH, conditional feature-flag gates (TOKEN_BUDGET, PROACTIVE/KAIROS, CACHED_MICROCOMPACT, COORDINATOR_MODE). +- [How Claude Code Builds a System Prompt — dbreunig (2026-04-04)](https://www.dbreunig.com/2026/04/04/how-claude-code-builds-a-system-prompt.html) — Most granular per-section inventory with conditional gates and variation triggers (output_style, user_type_ant, repl_mode, embedded_search, task_tool_enabled, agent_tool_enabled+fork_subagent, skills_enabled, experimental_skill_search, verification_agent, memory_configured, undercover, is_worktree, language_set, microcompact_enabled, token_budget, kairos_brief, is_git_repo&&!remote&&git_instructions_enabled, append_system_prompt), plus env-info template text and git snapshot block. +- [Server Instructions: Giving LLMs a user manual — MCP Blog](https://blog.modelcontextprotocol.io/posts/2025-11-03-using-server-instructions/) — Confirms MCP servers return instructions in InitializeResult and hosts (including Claude Code) inject them into the system prompt; basis for the DANGEROUS_uncached MCP instructions section. +- [Piebald-AI/claude-code-system-prompts (GitHub)](https://github.com/Piebald-AI/claude-code-system-prompts) — Version-tracked dump of the actual assembled system prompt text, 27 builtin tool descriptions, and sub-agent prompts (Explore/Plan/Task) — ground truth for exact wording per version. +- [Server instructions issue — anthropics/claude-code #43749](https://github.com/anthropics/claude-code/issues/43749) — Documents the instructions field consumption from InitializeResult into session context. +- [Inside Claude Code's System Prompt — claudecodecamp](https://www.claudecodecamp.com/p/inside-claude-code-s-system-prompt) — Community corroboration of 110+ conditionally assembled instructions and section ordering. diff --git a/docs/claude-code-architecture/research/tool-exec-engine.md b/docs/claude-code-architecture/research/tool-exec-engine.md new file mode 100644 index 0000000..62bd256 --- /dev/null +++ b/docs/claude-code-architecture/research/tool-exec-engine.md @@ -0,0 +1,104 @@ +# Research: tool-exec-engine + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code's tool-exec engine sits between the model's `tool_use` content blocks and the `tool_result` blocks returned to the API. Every tool call — built-in (Read/Edit/Bash/Grep/Agent) or MCP — flows through one uniform 14-step pipeline (`checkPermissionsAndCallTool`): lookup → abort-check → Zod input validation → semantic `validateInput` → speculative classifier start → input backfill → PreToolUse hooks → permission resolution (deny→ask→allow rules + tool.checkPermissions + mode + interactive prompt) → deny hooks → `call()` execution → result budgeting (persist oversize to `~/.claude/tool-results/{hash}.txt`) → PostToolUse hooks → append newMessages → classifyToolError. Concurrency runs two layers: a greedy `partitionToolCalls()` groups consecutive concurrency-safe calls into parallel batches (isolating unsafe calls into serial singletons), and a `StreamingToolExecutor` starts tools speculatively *while the model is still streaming* its response. Results are buffered and yielded in submission order (not completion order) so conversation history stays coherent. Permission gating is layered: PreToolUse hooks can short-circuit, then static allow/ask/deny rules (`Tool` or `Tool(specifier)` format), then tool-specific checks, then one of 7 modes (default/acceptEdits/plan/auto/dontAsk/bypassPermissions/bubble). MCP tools are registered as `mcp____` and are indistinguishable to the agent loop. + +## Components +### Tool-call lifecycle (API + in-process) +**Purpose:** Translate a model tool_use block into a validated, permission-gated, executed tool_result content block, preserving message-history invariants. + +**Mechanism:** 1) Stream assistant response, parse each tool_use block. 2) For each: look up tool def (alias-fallback to getAllBaseTools for renamed tools in old transcripts), abort-check, Zod safeParse input (on failure append hint to call ToolSearch for deferred tools), semantic validateInput (e.g. FileEdit rejects no-ops, Bash blocks standalone sleep when MonitorTool present). 3) Speculatively start auto-mode classifier for Bash. 4) Backfill derived fields (expand ~/foo) into a CLONED input (original kept for transcript). 5) Run PreToolUse hooks — can allow/deny/modify/stop; hook allow does NOT bypass deny/ask rules; exit code 2 blocks before rule eval. 6) canUseTool(): if hook decided, final; else deny→ask→allow rule match → tool.checkPermissions() → mode default → interactive prompt or classifier. 7) On deny build error msg + run PermissionDenied hooks. 8) call(input=original). 9) Result budget. 10) PostToolUse hooks (can modify MCP output / block). 11) Append newMessages. 12) classifyToolError for telemetry. + +**Data model:** API contract (Anthropic Messages): assistant turn with stop_reason='tool_use' contains 1+ tool_use blocks {id:'toolu_...', name, input}. Client must reply with ONE user message whose content array begins with tool_result blocks {tool_use_id, content?, is_error?} — text blocks MUST come AFTER all tool_results, else HTTP 400. Multiple tool_result blocks for one turn MUST be batched in a single user message (separate messages break future parallel-tool-use prompting). Server tools (web_search, code_execution) execute inside Claude and need no tool_result. + +**Config:** settings.json: permissions.{allow,ask,deny} string arrays; permissions.defaultMode; --permission-mode / --dangerously-skip-permissions CLI flags. ENABLE_TOOL_SEARCH unset|true|auto|auto:N|false controls MCP deferral. MAX_MCP_OUTPUT_TOKENS, MCP_TOOL_TIMEOUT. + +### Permission resolution chain +**Purpose:** Decide allow/deny/ask per tool invocation using deny→ask→allow precedence layered over 7 modes. + +**Mechanism:** Rule string format 'Tool' or 'Tool(specifier)'. Bare deny removes tool from context entirely; scoped deny (Bash(rm *)) leaves tool visible and blocks the matching call. Bash rules: glob '*' (space before * = word boundary; ls* matches lsof, ls * does not); ':*' suffix == trailing ' *'; separators && || ; | |& & newline split compound commands and EACH subcommand must match (max 5 rules saved per compound approval); process wrappers timeout/time/nice/nohup/stdbuf and bare xargs are stripped; read-only set (ls cat echo pwd head tail grep find wc which diff stat du cd + read-only git) never prompts. Read/Edit use gitignore patterns with 4 anchors: //abs, ~/home, /project-rel, ./cwd-rel. WebFetch uses domain: prefix (* matches within a label except leading *. or whole-pattern). MCP rules: mcp__, mcp____*, mcp____tool (allow globs only after literal mcp__server__ prefix; unanchored allow globs are warned+skipped). Protected paths (.git, .claude except worktrees, .vscode, .idea, .husky, etc + named rc/config files) never auto-approved except in bypassPermissions. + +**Data model:** PermissionRule = { source, ruleBehavior: 'allow'|'deny'|'ask', ruleValue: 'Tool' | 'Tool(specifier)' }. Settings precedence (highest wins): Managed > CLI args > .claude/settings.local.json > .claude/settings.json > ~/.claude/settings.json. A deny at ANY level cannot be overridden. + +**Config:** Seven modes: default, acceptEdits (auto-allows edits + mkdir/touch/rm/rmdir/mv/cp/sed in-scope), plan (read-only, denies writes), dontAsk (auto-deny prompts, CI), bypassPermissions (allow all; since v2.1.126 includes protected paths; rm -rf / and rm -rf ~ STILL prompt as circuit breaker; refuses root/sudo outside sandbox), auto (classifier model; v2.1.83+; consecutive 3 or total 20 blocks → fall back to prompting). Shift+Tab cycles default→acceptEdits→plan. disableBypassPermissionsMode / disableAutoMode = 'disable' locks them. + +### Concurrency: partition + streaming executor +**Purpose:** Run independent read-only tools in parallel; serialize writes; overlap tool execution with model response streaming. + +**Mechanism:** partitionToolCalls() walks calls L→R, safeParse input, calls isConcurrencySafe(parsedInput) in try-catch (failure→serial), merges consecutive-safe calls into one concurrent batch, isolates unsafe calls into single-tool serial batches. Concurrent: runToolsConcurrently via bounded async-generator all() with limit. Serial: apply contextModifier immediately. TWO OPTIMIZATIONS: (a) speculative execution — StreamingToolExecutor.addTool() is fire-and-forget called per parsed tool_use during streaming; processQueue() admits a tool iff noToolsRunning || (newToolSafe && allRunningSafe); (b) batch dispatch after stream completes. RESULTS YIELDED IN SUBMISSION ORDER not completion order — getCompletedResults() breaks the walk at any executing serial tool (order preservation via buffering). Context modifiers only applied for serial tools; concurrent-batch modifiers queued by tool_use_id and applied in submission order after batch. discard() escape hatch sets discarded=true so retry stream starts fresh. + +**Data model:** Partition = []Group{ parallel:bool, calls:[]ToolCall }. TrackedTool states: queued|executing|completed|yielded. ToolResult={ data, newMessages?, contextModifier? }. AbortController hierarchy: query-level (Ctrl+C) → sibling-level (Bash-error cascade) → per-tool. + +**Config:** CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY (default 10) bounds concurrent batch size. Tools declare interruptBehavior() 'cancel'|'block' (block is default). + +### Result budgeting +**Purpose:** Bound tool output size per-call and per-conversation to avoid context overflow. + +**Mechanism:** Per-tool maxResultSizeChars threshold → oversize output persisted to ~/.claude/tool-results/{hash}.txt and replaced with preview block (model re-Reads full content). ContentReplacementState tracks an aggregate conversation budget (death-by-a-thousand-cuts guard). BashTool detects image output by magic bytes → emits image content block; FileReadTool emits base64 image blocks, handles PDFs/notebooks/dirs, blocks /dev/zero /dev/random /dev/stdin. + +**Data model:** Persisted file path ~/.claude/tool-results/{hash}.txt; wrapper replaces in-content. + +**Config:** maxResultSizeChars per tool (Bash 30000, FileEdit 100000, Grep 100000, FileRead Infinity). MCP: MAX_MCP_OUTPUT_TOKENS default 25000, warning at 10000; per-server .mcp.json timeout overrides MCP_TOOL_TIMEOUT; tool can raise limit to 500000 via _meta['anthropic/maxResultSizeChars']. + +### MCP tool routing & registry +**Purpose:** Expose external MCP server tools as first-class tools indistinguishable from built-ins to the agent loop. + +**Mechanism:** Spawn server (stdio/SSE/HTTP) → JSON-RPC 2.0 initialize → tools/list discovers → register with mcp__ prefix → route tools/call transparently. assembleToolPool(): built-ins (deny-filtered, REPL-hidden, isEnabled-checked) sorted alphabetically THEN MCP tools sorted alphabetically, concatenated (built-ins prefix) so a prompt-cache breakpoint sits after the last built-in — flat-sorted interleaving would bust cache on MCP add/remove. MCP tools go through the SAME 14-step pipeline. Tool search/deferred loading (ENABLE_TOOL_SEARCH default-on for MCP): tools sent with defer_loading=true (name+desc only, no schema); model calls ToolSearchTool to load schema; calling a deferred tool without loading → Zod string-coercion failure + targeted recovery hint. + +**Data model:** Tool name mcp____ (chars outside [A-Za-z0-9_-] → _, capped 64). Plugin form mcp__plugin____. MCP tool schema = JSON Schema; input validated same as built-ins. + +**Config:** MAX_MCP_OUTPUT_TOKENS, MCP_TOOL_TIMEOUT, ENABLE_TOOL_SEARCH, .mcp.json (project root, checked into VCS), .claude.json (user scope). + +### Error classification & recovery +**Purpose:** Convert execution failures into model-actionable tool_result(is_error) without leaking internals, and keep conversation history coherent. + +**Mechanism:** classifyToolError() extracts telemetry-safe string (errno, stable name) — never logs raw msg (minified builds mangle constructor.name). Parallel batch: only Bash non-zero-exit errors cascade (cancel sibling controller → synthetic 'Cancelled: parallel tool call errored'); Read/Grep/Fetch errors are isolated (no sibling cancel). Dependencies across parallel calls (create-then-update) are NOT pre-detected: dispatch all, if one fails return is_error:true with natural message, model reissues next turn. Orphaned tool_use (interrupted parallel call) must still get a placeholder tool_result or API 400s. MaxTokens stop_reason with partial tool_use: still emit tool_result blocks for the partial calls. + +**Data model:** tool_result.is_error=true with natural stderr-style content. Stop reasons: tool_use (run tools), end_turn, max_tokens, pause_turn, refusal, model_context_window_exceeded, etc. + +**Config:** CLAUDE_CODE_MAX_OUTPUT_TOKENS bounds model output; MaxTokens stop surfaces that error. + +## Key behaviors +- RESULTS ARE YIELDED IN SUBMISSION (tool_use arrival) ORDER, NOT COMPLETION ORDER. Buffer completed results; getCompletedResults() BREAKS the walk at any still-executing serial tool so nothing after it yields early. This is the single hardest correctness invariant to preserve in a reimpl. +- Concurrency safety is PER-INVOCATION, not per-tool. isConcurrencySafe(parsedInput) is called after safeParse; any parse failure or thrown exception → serial (fail-closed). BashTool parses compound commands via splitCommandWithOperators and returns true only if EVERY non-neutral subcommand is in search/read/list sets. +- Mutual exclusion contract in the streaming executor: a tool can start iff noToolsRunning OR (newToolSafe AND allRunningAreSafe). A single non-concurrent tool in flight blocks everyone. +- Bash errors are the ONLY errors that cascade to sibling cancellation in a parallel batch (synthesize 'Cancelled: parallel tool call errored'). This is confirmed production behavior (v2.1.158, issue #64247) and a known bug source — Opus 4.8 spirals on the synthetic cancel messages. Read/Grep errors do NOT cancel siblings. +- tool_result blocks for a parallel turn MUST be batched in a single user message and MUST come before any text blocks. Splitting results across messages or putting text first 'teaches' the model to stop using parallel tools and can cause HTTP 400. +- Permission rule precedence is deny → ask → allow (first match), REGARDLESS of specificity. A matching ask rule prompts even if a more specific allow matches. A deny at ANY settings level is absolute. Hook decisions do not bypass deny/ask rules; hook exit-code-2 blocks before rule eval. +- Bare deny rule (e.g. 'Bash') REMOVES the tool from model context entirely; scoped deny ('Bash(rm *)') keeps the tool visible and blocks only matching calls. Bash wildcard space sensitivity: 'Bash(ls *)' matches 'ls -la' not 'lsof'; 'Bash(ls*)' matches both. ':*' suffix == trailing ' *' but only at pattern end. +- Speculative execution during streaming: StreamingToolExecutor.addTool() is fire-and-forget (does not await processQueue) so response parsing never stalls; tools can finish before the model response completes. Abort-controller hierarchy is 3 levels (query→sibling→per-tool); per-tool abort bubbles to query controller unless reason is a sibling error (so permission denial ends the whole turn). +- FileReadTool is the ONLY built-in with maxResultSizeChars=Infinity (persisting Read output would loop). It self-bounds via token estimation. MCP default output token limit is 25000 (warn at 10000); a tool can raise to hard ceiling 500000 via _meta['anthropic/maxResultSizeChars']. +- assembleToolPool sorts built-ins and MCP tools alphabetically SEPARATELY then concatenates (built-ins prefix) to keep a stable prompt-cache breakpoint after the last built-in — flat-sorting all tools would invalidate cache when MCP servers change. +- Tool search/defer_loading (default-on for MCP): sends name+description only; model calls ToolSearch to load schema. Disabled by default on Vertex AI and when ANTHROPIC_BASE_URL is non-first-party. Requires tool_reference support (no Haiku). Calling a deferred tool un-triggered → Zod string-coercion failure + recovery hint. +- bypassPermissions (v2.1.126+) includes protected-path writes but rm -rf / and rm -rf ~ still prompt as a circuit breaker; refuses to start as root/sudo outside recognized sandboxes. auto mode classifier thresholds (consecutive 3 / total 20 blocks) are NOT configurable. + +## External interfaces +- Anthropic Messages API: stop_reason='tool_use' with tool_use{id,name,input} blocks; reply user message with tool_result{tool_use_id,content,is_error} blocks (all results in ONE user message, no text before tool_results) +- Internal: checkPermissionsAndCallTool() 14-step pipeline; partitionToolCalls() in toolOrchestration.ts; StreamingToolExecutor{addTool,processQueue,executeTool,getCompletedResults,getRemainingResults,discard}; canUseTool() +- Tool interface: call(input)→ToolResult{data,newMessages,contextModifier}; inputSchema (Zod→JSON Schema); isConcurrencySafe(input); isReadOnly(input); checkPermissions(input); validateInput(); isEnabled(); interruptBehavior(); maxResultSizeChars +- Config files: ~/.claude/settings.json, .claude/settings.json, .claude/settings.local.json (permissions.{allow,ask,deny,defaultMode}); .mcp.json (project MCP), .claude.json (user MCP); ~/.claude/tool-results/{hash}.txt (persisted oversize output) +- MCP JSON-RPC 2.0: initialize, tools/list (supports _meta anthropic/maxResultSizeChars up to 500000), tools/call +- CLI flags: --permission-mode, --dangerously-skip-permissions, --allow-dangerously-skip-permissions, --add-dir, --allowedTools, --disallowedTools +- Env vars: CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY(10), MAX_MCP_OUTPUT_TOKENS(25000), MCP_TOOL_TIMEOUT, ENABLE_TOOL_SEARCH, CLAUDE_CODE_MAX_OUTPUT_TOKENS, CLAUDE_CODE_ENABLE_AUTO_MODE + +## Open questions +- Exact set and order of fields in the Zod input backfill / _simulatedSedEdit injection (only approximate from secondary source) +- Whether contextModifier queuing for concurrent batches is actually exercised by any current built-in (source comment says none are) +- Precise mapping of the auto-mode classifier's decision order vs the in-process 14-step pipeline (two slightly different orderings are described) +- Exact behavior when an orphaned tool_use from an interrupted parallel turn is repaired (placeholder tool_result content text) + +## Sources +- [Handle tool calls — Claude API Docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/handle-tool-calls) — Authoritative API contract: tool_use/tool_result block shapes, is_error, ordering rules (tool_result must immediately follow, must be first in user content, HTTP 400 cases). +- [Parallel tool use — Claude API Docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/parallel-tool-use) — disable_parallel_tool_use semantics, unordered execution, dependency recovery via is_error, single-user-message batching rule. +- [Ch 6. Tools — From Definition to Execution (Claude Code from Source)](https://claude-code-from-source.com/ch06-tools/) — Best secondary source: 14-step checkPermissionsAndCallTool pipeline, buildTool fail-closed defaults, Tool interface (5 key members), ToolResult/ToolUseContext, registry assembleToolPool, deferred loading, per-tool maxResultSizeChars table. +- [Ch 7. Concurrent Tool Execution (Claude Code from Source)](https://claude-code-from-source.com/ch07-concurrency/) — partitionToolCalls algorithm, streaming executor lifecycle (queued/executing/completed/yielded), mutual-exclusion admission, order-preservation, Bash-only sibling cascade, discard() escape hatch, per-tool concurrency table. +- [Configure permissions — Claude Code Docs](https://code.claude.com/docs/en/permissions) — Official rule syntax: deny→ask→allow precedence, Bash wildcards (space-before-*, :* suffix), compound command splitting, process-wrapper stripping, Read/Edit gitignore anchors, WebFetch domain:, MCP mcp__server__tool rules, protected paths, settings precedence. +- [Choose a permission mode — Claude Code Docs](https://code.claude.com/docs/en/permission-modes) — Six modes table (default/acceptEdits/plan/auto/dontAsk/bypassPermissions), what each auto-approves, auto-mode classifier thresholds (3 consecutive / 20 total), v2.1.126 protected-path change, rm -rf / circuit breaker, auto-mode model requirements. +- [Connect Claude Code to tools via MCP — Claude Code Docs](https://code.claude.com/docs/en/mcp) — MCP tool naming mcp__server__tool (64-char cap, char substitution), plugin form mcp__plugin_X_Y__Z, MAX_MCP_OUTPUT_TOKENS=25000 default (warn 10000), _meta anthropic/maxResultSizeChars ceiling 500000, tool search/defer_loading (ENABLE_TOOL_SEARCH), JSON-RPC 2.0 tools/list + tools/call. +- [[Bug] Parallel tool calls cancel all siblings on single error (#64247)](https://github.com/anthropics/claude-code/issues/64247) — Confirms exact behavior + version (v2.1.158): 'Cancelled: parallel tool call ... errored', isConcurrencySafe→annotations.readOnlyHint, Bash-error sibling cascade. +- [Environment variables — Claude Code Docs](https://code.claude.com/docs/en/env-vars) — Confirms CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY default 10 governs read-only tool + subagent parallelism. +- [toolOrchestration.ts (openonion/claude-code mirror)](https://github.com/openonion/claude-code/blob/main/src/services/tools/toolOrchestration.ts) — Source confirmation of getMaxToolUseConcurrency() = parseInt(env.CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY)||10 and runToolsConcurrently signature. diff --git a/docs/claude-code-architecture/research/tools-canonical.md b/docs/claude-code-architecture/research/tools-canonical.md new file mode 100644 index 0000000..b359519 --- /dev/null +++ b/docs/claude-code-architecture/research/tools-canonical.md @@ -0,0 +1,184 @@ +# Research: tools-canonical + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code (as of v2.1.x, mid-2026) exposes a fixed canonical set of built-in tools to the model. The core file/exec/agent tools are Read, Write, Edit, Glob, Grep, Bash, NotebookEdit, Task (a.k.a. Agent), TodoWrite, WebFetch, WebSearch, AskUserQuestion, ExitPlanMode, Skill. The official docs table now lists ~50 tools including newer ones: TaskCreate/TaskGet/TaskList/TaskUpdate (which REPLACE TodoWrite as of v2.1.142), NotebookEdit, LSP, Monitor, PowerShell, EnterPlanMode/ExitPlanMode, EnterWorktree/ExitWorktree, CronCreate/CronList/CronDelete, ScheduleWakeup, SendMessage, TeamCreate/TeamDelete, Workflow, ShareOnboardingGuide, RemoteTrigger, PushNotification, ListMcpResourcesTool/ReadMcpResourceTool, WaitForMcpServers, ToolSearch, plus deprecated BashOutput/KillShell/TaskOutput. CRITICAL VERSION FACT: MultiEdit was REMOVED in Claude Code v2.0 (it existed in v1.x for batch atomic edits in a single file) and is NOT in the current tool set; the model achieves the same via multiple parallel Edit calls. TodoWrite is DISABLED BY DEFAULT as of v2.1.142 in favor of the Task* quartet (re-enable via CLAUDE_CODE_ENABLE_TASKS=0). Each tool has a strict JSON-schema parameter contract; file tools require absolute paths and enforce a read-before-edit/read-before-write session state check; permission rules use the exact tool name as the matcher string. + +## Components +### Read +**Purpose:** Read file contents with line numbers; multimodal (text, images, PDFs, .ipynb). + +**Mechanism:** Returns file contents with 1-indexed line numbers in `cat -n` format. Line-number prefix format: `spaces + line_number + tab + content`. Default reads first 2000 lines from the start; each line truncated at 2000 chars. If a whole-file read exceeds token limit, returns first page + a `PARTIAL view` notice telling the model how to read more with offset/limit. A read that explicitly passes offset/limit and STILL exceeds the limit returns an error. Multimodal: images (PNG/JPG) returned as visual content (resized/recompressed to model limits); PDFs read whole if <=10 pages, else paged via `pages` param like "1-5" up to 20 pages; .ipynb returns all cells with outputs. Reads files only, NOT directories (use Bash `ls`). Absolute paths enforced. + +**Data model:** Params: {file_path: string (required), offset?: number, limit?: number}. additionalProperties:false. Result: tool_result with text content. For >10-page PDFs the `pages` param is required. + +**Config:** Required: file_path. Optional: offset (1-indexed line number to start), limit (line count, default 2000). No path = error. + +### Write +**Purpose:** Create new file or fully overwrite existing file. + +**Mechanism:** Creates a new file or fully overwrites an existing one. Does NOT append or merge — atomically writes the complete content. Enforces READ-BEFORE-WRITE: if target exists, the model must have read it in the current conversation at least once or the call FAILS with an error. New files are exempt. Same Bash-read satisfaction rules as Edit (cat/head/tail/sed -n X,Yp/grep/egrep/fgrep on a single file, no pipes). For partial changes, the model is instructed to use Edit instead. Absolute paths only. + +**Data model:** Params: {file_path: string (required), content: string (required)}. additionalProperties:false. + +**Config:** Required: file_path, content. No optional fields. + +### Edit +**Purpose:** Precise surgical string replacement in a file via exact matching. + +**Mechanism:** EXACT string replacement — no regex, no fuzzy matching. Three checks run in order: (1) READ-BEFORE-EDIT (must have read file this conversation AND file unchanged on disk since) — runs FIRST before matching; (2) MATCH (old_string must appear exactly, including indentation/whitespace); (3) UNIQUENESS — old_string must appear EXACTLY ONCE, otherwise the edit fails; to disambiguate, supply more surrounding context, or set replace_all:true to replace all occurrences. Absolute paths. Read-before-edit is ALSO satisfied when Bash ran cat/head/tail/sed -n 'X,Yp'/grep/egrep/fgrep on a SINGLE file with no pipes/redirects — piped output and other commands do NOT count. NOTE: read-before-edit satisfaction set != deny-rule-checked set (egrep/fgrep count for read-before-edit but not Read deny rules). + +**Data model:** Params: {file_path, old_string, new_string (all required); replace_all?: boolean (default false)}. additionalProperties:false. + +**Config:** Required: file_path, old_string, new_string. Optional: replace_all (default false). new_string MUST differ from old_string. + +### Glob +**Purpose:** Fast file-by-name pattern matching. + +**Mechanism:** Finds files by NAME pattern using standard glob syntax: `*` (single dir level), `**` (recursive), `?`, `{a,b}` alternation, `[abc]`/`[a-z]`/`[!abc]`. Examples: `**/*.js`, `src/**/*.ts`, `*.{json,yaml}`. Results sorted by modification time (most recent first), capped at 100 files; hitting the cap returns a truncation flag so the model can narrow. Does NOT respect .gitignore by default (finds gitignored files) — DIFFERS from Grep which does respect .gitignore. Set CLAUDE_CODE_GLOB_NO_IGNORE=false to make it respect .gitignore. + +**Data model:** Params: {pattern: string (required), path?: string}. additionalProperties:false. Result: list of file paths + truncation flag. + +**Config:** CLAUDE_CODE_GLOB_NO_IGNORE=false makes Glob respect .gitignore (default ignores the ignore file). + +### Grep +**Purpose:** Search file contents using ripgrep regex. + +**Mechanism:** Searches file CONTENTS. Built on ripgrep (uses ripgrep regex, NOT POSIX grep — literal braces need escaping: `interface\{\}` to find Go `interface{}`). Three output modes: files_with_matches (paths only, DEFAULT), content (matching lines + file + line number, supports -A/-B/-C context and -n), count (per-file match count). Scope by `glob` (e.g. `**/*.tsx`) or `type` (e.g. `py`, `rust`). Default single-line match; multiline:true spans lines (rg -U --multiline-dotall). head_limit caps first N entries across all modes. Respects .gitignore (skips gitignored files); to search a gitignored file pass its path directly. The literal JSON keys `-i`, `-n`, `-A`, `-B`, `-C`, `multiline`, `head_limit` mirror rg flags. + +**Data model:** Params: {pattern (required), path?, output_mode?: 'content'|'files_with_matches'|'count' (default files_with_matches), glob?, type?, '-i'?, '-n'?, '-A'?, '-B'?, '-C'?, multiline?: boolean (default false), head_limit?: number}. additionalProperties:false. Note the literal flag names -i/-n/-A/-B/-C as JSON keys. + +**Config:** output_mode default files_with_matches. -A/-B/-C/-n only honored with output_mode=content. multiline default false. head_limit works in all modes. + +### NotebookEdit +**Purpose:** Modify Jupyter notebook cells by cell_id. + +**Mechanism:** Edits ONE cell at a time, targeted by `cell_id` (NOT string replacement across the notebook like Edit). Modes: replace (overwrite cell source, DEFAULT), insert (add new cell AFTER target; with no cell_id goes at the START; requires cell_type=code|markdown), delete (remove target cell). notebook_path must be ABSOLUTE. Permission rules use the Edit(...) path format — e.g. `Edit(notebooks/**)` covers NotebookEdit in that dir. + +**Data model:** Params: {notebook_path (required, absolute), new_source (required), cell_id?, cell_type?: 'code'|'markdown', edit_mode?: 'replace'|'insert'|'delete' (default replace)}. additionalProperties:false. + +**Config:** Required: notebook_path, new_source. Optional: cell_id, cell_type (required for insert), edit_mode (default replace). + +### Bash +**Purpose:** Execute shell commands; general-purpose escape hatch. + +**Mechanism:** Runs each command in a SEPARATE process (not one persistent shell) but emulates persistence: `cd` carries to later commands ONLY if it stays in the project dir or an added working dir (else resets to project dir + appends `Shell cwd was reset to `). Env vars do NOT persist across commands (export in one is gone in the next). Aliases/functions/options DO persist — at session start Claude Code sources ~/.zshrc/~/.bashrc/~/.profile, captures aliases/functions/options, applies to every command. Subagent sessions never carry cwd changes. Limits: default timeout 120000ms (2 min), model can request up to 600000ms (10 min) via timeout param; output truncated at 30000 chars by default — when exceeded, full output saved to a file in the session dir and the model gets the file path + short preview (raise via BASH_MAX_OUTPUT_LENGTH up to hard 150000). run_in_background:true detaches; never use it for `sleep` (returns immediately). Model is told to avoid Bash for cat/head/tail/grep/find/sed/awk/echo and to prefer Read/Grep/Glob; independent commands go as parallel Bash calls, dependent ones chained with && (not newlines). Background task output files have no size limit and are not auto-cleaned. Git safety: never update git config, never destructive git ops unless explicit, never skip hooks, never force-push main/master. + +**Data model:** Params: {command: string (required), description?: string, timeout?: number (max 600000), run_in_background?: boolean (default false)}. additionalProperties:false. Result text includes stdout, stderr, and `Exit code N`. + +**Config:** timeout default 120000 (BASH_DEFAULT_TIMEOUT_MS overrides default, BASH_MAX_TIMEOUT_MS overrides ceiling). Output cap 30000 (BASH_MAX_OUTPUT_LENGTH raises it, hard ceiling 150000). CLAUDE_BASH_MAINTAIN_PROJECT_WORKING_DIR=1 disables cwd carry-over. CLAUDE_ENV_FILE for env var persistence. Sources ~/.zshrc/~/.bashrc/~/.profile. + +### Skill +**Purpose:** Execute a skill within the main conversation. + +**Mechanism:** Loads a skill by name. Skill names without leading slash. Plugin-namespaced skills use `plugin:skill` form. When invoked, shows `{name} skill is loading` then expands the skill prompt. Only skills in the available list may be invoked; cannot invoke a skill already running; not for built-in CLI commands (/help, /clear). Runs through the existing Skill tool rather than adding a new tool entry. Note: the separate SlashCommand tool handles user-authored `/commands`. + +**Data model:** Params: {command: string (required) — skill name only, no args}. additionalProperties:false. + +**Config:** Required: command. No args passed (args go in the skill itself). + +### ExitPlanMode +**Purpose:** Present a plan for approval and exit plan mode. + +**Mechanism:** Called only while in plan mode, after the model has presented its plan and is ready to code. Presents the plan to the user for approval and exits plan mode. ONLY for implementation/code-writing tasks — explicitly NOT for research/exploration. If ambiguous, the model is told to resolve via AskUserQuestion first. Permission: Yes (entering/exiting plan mode is gated). + +**Data model:** Params: {plan: string (required, supports markdown)}. additionalProperties:false. + +**Config:** Required: plan. Use only for implementation tasks, not research. + +### AskUserQuestion +**Purpose:** Ask multiple-choice clarifying questions. + +**Mechanism:** Structured multiple-choice prompt. 1-4 questions per call, 2-4 options per question, header is a very short label (max 12 chars), each option has label (1-5 words) + description. Users can always select 'Other' for custom text (auto-added — model must NOT include an 'Other' option). multiSelect must be specified. Used for gathering preferences, clarifying ambiguity, deciding implementation direction. + +**Data model:** Params: {questions: array (minItems 1, maxItems 4) of {question, header (max 12 chars), multiSelect: boolean (required), options: array (minItems 2, maxItems 4) of {label, description}}; answers?: object (populated by permission component)}. additionalProperties:false. + +**Config:** 1-4 questions; 2-4 options each; header max 12 chars; label 1-5 words; multiSelect required field. + +### WebSearch +**Purpose:** Server-side web search returning titles+URLs. + +**Mechanism:** Runs query against Anthropic's server-side web search backend, returns result TITLES and URLs only (does NOT fetch pages — follow up with WebFetch). May issue up to EIGHT backend searches per call, refining internally before returning. Scope with allowed_domains (include only) or blocked_domains (exclude) — the two lists CANNOT be combined in one call. Backend not configurable (use MCP for other providers). Permission rules take NO specifier — bare `WebSearch` in allow/deny only. US-only. Availability varies by provider (works on Claude API + MS Foundry; on Vertex AI with Claude 4 models; NOT on Bedrock). + +**Data model:** Params: {query: string (required, minLength 2), allowed_domains?: string[], blocked_domains?: string[]}. additionalProperties:false. + +**Config:** Required: query (min 2 chars). allowed_domains XOR blocked_domains (not both). No specifier in permission rules. + +### WebFetch +**Purpose:** Fetch a URL, convert to Markdown, extract per prompt via small model. + +**Mechanism:** Fetches URL, converts HTML to Markdown (not configurable), runs the prompt against content using a SMALL FAST model, returns that model's answer (NOT raw page) — lossy by design. HTTP auto-upgraded to HTTPS. Large pages truncated to a fixed char limit before processing. 15-minute self-cleaning cache. On cross-host redirect, returns a text result naming original + redirect target (does NOT follow); model issues a second WebFetch. User-Agent begins with `Claude-User`; Accept header prefers Markdown over HTML. In default/acceptEdits modes, prompts on first reach of a new domain EXCEPT a built-in preapproved docs-domain set; add `WebFetch(domain:example.com)` to pre-allow. An explicit WebFetch(domain:...) in deny/ask/allow OVERRIDES the preapproved set. auto/bypassPermissions modes skip the prompt. + +**Data model:** Params: {url: string (required, format: uri), prompt: string (required)}. additionalProperties:false. + +**Config:** Required: url, prompt. 15-min cache. HTTP auto->HTTPS. User-Agent: Claude-User*. + +### Task (a.k.a. Agent) +**Purpose:** Spawn a subagent with its own context to handle a task autonomously. + +**Mechanism:** Spawns a subagent in a SEPARATE context window that works autonomously and returns ONE final text result; parent never sees intermediate tool calls/outputs. Named types: general-purpose (all tools), Explore (Glob/Grep/Read/Bash, with thoroughness quick|medium|very thorough), plus setup agents. `tools`/`disallowedTools` frontmatter on the subagent definition controls tool set: neither=inherit all; tools only=just those; disallowedTools only=all except those; both set=disallowedTools wins. Foreground subagents show live permission prompts; background subagents auto-deny any prompting call and continue. Launching itself needs no permission. maxTurns caps turn count. Fork mode: a fork inherits the full parent conversation, always runs in background, surfaces prompts in terminal. Note: docs table lists the tool as `Agent`; older schema/system-prompt name is `Task` — same tool. deprecated TaskOutput is replaced by Read on the task's output file path. + +**Data model:** Params: {description: string (3-5 words, required in older schema), prompt: string (required), subagent_type: string (required), model?: 'haiku'|'sonnet'|'opus', resume?: string (agent id)}. additionalProperties:false. + +**Config:** Required: prompt. Optional: description, subagent_type, model, resume. + +### TodoWrite (LEGACY / disabled by default) +**Purpose:** Manage the session checklist (whole-list replace). + +**Mechanism:** Replaces the ENTIRE todo list each call (not incremental). Exactly ONE item should be in_progress at a time. Item shape: {content: imperative-form string, status: 'pending'|'in_progress'|'completed', activeForm: present-continuous string}. Use for 3+ step complex tasks; skip for trivial/conversational. VERSION CHANGE: TodoWrite is DISABLED BY DEFAULT as of v2.1.142 in favor of the granular TaskCreate/TaskGet/TaskList/TaskUpdate quartet. To re-enable the legacy TodoWrite tool, set CLAUDE_CODE_ENABLE_TASKS=0. (Note: the Tasks feature itself was gated behind CLAUDE_CODE_ENABLE_TASKS=1 during its earlier opt-in rollout.) A 2026 system-prompt change swaps the hardcoded TodoWrite reference for one that resolves to TaskCreate or TodoWrite depending on whether tasks are enabled. + +**Data model:** TodoWrite params: {todos: array of {content (minLength 1), status: 'pending'|'in_progress'|'completed', activeForm (minLength 1)}}. additionalProperties:false on items. + +**Config:** Disabled by default since v2.1.142. Set CLAUDE_CODE_ENABLE_TASKS=0 to re-enable TodoWrite. + +### TaskCreate / TaskGet / TaskList / TaskUpdate +**Purpose:** Granular ID-based task management (replaces TodoWrite). + +**Mechanism:** The modern replacement (introduced ~v2.1.16, became default in v2.1.142). Granular CRUD: TaskCreate (new pending task, auto-assigned ID), TaskGet (full details by ID), TaskList (all tasks summary), TaskUpdate (status pending->in_progress->completed, owner assignment, blockedBy/blocks dependencies, or deleted). Replaces the whole-list-replace TodoWrite with ID-based per-task updates and dependency graphs. State persists in ~/.claude/tasks// for team contexts. + +**Data model:** TaskCreate: {subject, description, activeForm?, metadata?}. TaskUpdate: {taskId, status?, subject?, description?, activeForm?, owner?, addBlockedBy?, addBlocks?, metadata?}. TaskGet: {taskId}. TaskList: {} (returns summary). + +**Config:** No permission required. New ID-based (vs old positional). + +### Monitor / LSP / PowerShell / plan-mode / worktree / cron / agent-team / workflow / MCP / background-task tools +**Purpose:** Extended built-in tools beyond the core file/exec/agent set. + +**Mechanism:** These are real, current tools but secondary to the core file/exec/agent set: Monitor (v2.1.98+, runs a watcher in background, reuses Bash permission rules, not on Bedrock/Vertex/Foundry); LSP (code intelligence, inactive until a code-intelligence plugin is installed; operations goToDefinition/findReferences/hover/documentSymbol/workspaceSymbol/goToImplementation/prepareCallHierarchy/incomingCalls/outgoingCalls); PowerShell (native, CLAUDE_CODE_USE_POWERSHELL_TOOL=1, spawns pwsh with -ExecutionPolicy Bypass process-scope); EnterPlanMode/ExitPlanMode (plan mode lifecycle); EnterWorktree/ExitWorktree (git worktree sessions under .claude/worktrees/); CronCreate/CronList/CronDelete (session-scoped scheduled prompts); ScheduleWakeup (reschedules a /loop iteration, 1min-1hr out); PushNotification (desktop + phone via Remote Control); SendMessage/TeamCreate/TeamDelete (agent teams, CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1); Workflow (dynamic multi-subagent orchestration); ShareOnboardingGuide; RemoteTrigger (claude.ai Routines behind /schedule); ListMcpResourcesTool/ReadMcpResourceTool/WaitForMcpServers/ToolSearch (MCP integration + deferred tool loading); TaskOutput (DEPRECATED — prefer Read on the task output file path); TaskStop (kill background task). Older/internal-only tools NOT in current v2 docs: BashOutput (read background shell output by bash_id, only NEW output since last check, optional regex filter that permanently drops non-matching lines) and KillShell (kill by shell_id) — these predate the run_in_background/task-id model. + +**Data model:** Various; see docs table. + +**Config:** Conditions: SendMessage/TeamCreate/TeamDelete need CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1. Monitor/RemoteTrigger/ScheduleWakeup/PushNotification unavailable on Bedrock/Vertex/Foundry. PowerShell needs CLAUDE_CODE_USE_POWERSHELL_TOOL=1 (off-C Windows). LSP needs a code-intelligence plugin. ToolSearch only when tool-search enabled. + +## Key behaviors +- Read output uses `cat -n` 1-indexed line numbers with prefix `spaces + line_number + tab + content`; default first 2000 lines, each line truncated at 2000 chars; a whole-file read that exceeds the token limit returns a `PARTIAL view` notice (NOT an error), but a read that explicitly passes offset/limit and still exceeds returns an ERROR. +- Edit's THREE ordered checks: (1) read-before-edit (file read this conversation + unchanged on disk since) runs FIRST, (2) exact match, (3) uniqueness — old_string must appear EXACTLY ONCE or the edit FAILS (use replace_all:true or more context). Whitespace/indentation must match exactly. +- Read-before-edit / read-before-write is ALSO satisfied by Bash `cat`/`head`/`tail`/`sed -n 'X,Yp'`/`grep`/`egrep`/`fgrep` on a SINGLE file with NO pipes/redirects — but the deny-rule-checked command set differs (egrep/fgrep count for read-before-edit but NOT for Read deny rules). Piped output does NOT satisfy read-before-edit. +- Bash: 30,000 char output truncation default; when exceeded, FULL output is saved to a file in the session dir and the model receives the file path + a short preview from the start (raise cap via BASH_MAX_OUTPUT_LENGTH up to hard 150,000). Background task `.output` files have NO size limit and are never auto-cleaned. +- Bash `cd` carries to later commands ONLY within the project dir / added working dirs; landing outside resets to project dir and appends `Shell cwd was reset to `. Env vars do NOT persist across commands (export is gone next call); aliases/functions/options DO persist (sourced from ~/.zshrc/~/.bashrc/~/.profile at session start). CLAUDE_BASH_MAINTAIN_PROJECT_WORKING_DIR=1 disables carry-over; CLAUDE_ENV_FILE enables env persistence. +- Glob does NOT respect .gitignore by default (finds gitignored files) — DIFFERS from Grep which DOES respect .gitignore. Glob results sorted by mtime (recent first), capped at 100 files with a truncation flag. Set CLAUDE_CODE_GLOB_NO_IGNORE=false to make Glob respect .gitignore. +- Grep uses RIPGREP regex not POSIX grep (literal braces need escaping: `interface\{\}`); output_mode default is `files_with_matches` (paths only); -A/-B/-C/-n context flags only honored when output_mode=content; multiline default false; literal JSON keys `-i`/`-n`/`-A`/`-B`/`-C` mirror rg flags. +- TodoWrite is DISABLED BY DEFAULT as of v2.1.142 — replaced by TaskCreate/TaskGet/TaskList/TaskUpdate. Re-enable legacy TodoWrite with CLAUDE_CODE_ENABLE_TASKS=0. TodoWrite replaces the WHOLE list each call; Task* tools are ID-based and granular with dependency graphs. +- MultiEdit (batch edits, one file, `edits: [{old_string,new_string,replace_all}]`) was REMOVED in Claude Code v2.0 and is NOT in the current built-in tool set — replicas should implement parallel Edit calls instead of a MultiEdit tool. +- WebFetch is LOSSY by design: HTML->Markdown (not configurable), processed by a small fast model per the prompt (model gets the answer, not raw page), 15-min cache, HTTP auto->HTTPS, cross-host redirect returns original+target (no follow) requiring a second call. User-Agent starts with `Claude-User`. +- WebSearch returns TITLES + URLs only (no page fetch — follow up with WebFetch); may issue up to 8 backend searches per call; allowed_domains and blocked_domains CANNOT be combined in one call; permission rule takes NO specifier (bare `WebSearch` only); US-only; NOT on Bedrock. +- Agent/Task subagents: parent sees ONLY the final result, never intermediate tool calls; launching needs no permission but each subagent tool call is checked against session permission rules (background subagents auto-deny any prompting call); disallowedTools takes precedence over tools when both frontmatter fields set. +- All file tools require ABSOLUTE paths (relative rejected); NotebookEdit targets cells by cell_id not by index and not by string replacement; permission rules: Read/Grep/Glob/LSP use `Read(path)` format, Edit/Write/NotebookEdit use `Edit(path)` format (an Edit allow also grants read to same path), Bash/Monitor use `Bash(cmd pattern)`, WebFetch uses `WebFetch(domain:...)`, Agent uses `Agent(type)`, Skill uses `Skill(name)`. + +## Open questions +- Exact current schema of the Task/Agent tool's optional `model` and `resume` fields and whether `description`/`subagent_type` remain strictly required in the latest v2.1.16x prompt (community schemas conflict slightly on required-ness). +- Whether TaskOutput is fully removed or merely deprecated in the very latest version (docs mark it deprecated, prefer Read on output file path). +- Exact composition of the built-in preapproved WebFetch documentation-domain set that skip the first-time domain prompt. +- Exact internal JSON result envelope shape for each tool (the model-facing text content is well documented, but the structured tool_result field names Claude Code itself emits for the API differ slightly and are not officially published. + +## Sources +- [Tools reference - Claude Code Docs (official)](https://code.claude.com/docs/en/tools-reference) — PRIMARY source. Full official table of every built-in tool name + permission requirement + per-tool behavior sections (Read cat -n, Edit unique-match, Bash persistence/limits, Glob/Grep, NotebookEdit, WebFetch/WebSearch, Write, Agent, TodoWrite v2.1.142 deprecation, Task tools, Monitor/LSP/PowerShell/worktree/cron/workflow). +- [Internal claude code tools implementation (gist by bgauryy)](https://gist.github.com/bgauryy/0cdb9aa337d01ae5bd0c803943aa36bd) — Reverse-engineered EXACT JSON schemas (draft-07) and parameter interfaces for Read/Write/Edit/Glob/Grep/NotebookEdit/Bash/BashOutput/KillShell/Task/Skill/SlashCommand/TodoWrite/ExitPlanMode/AskUserQuestion/WebFetch/WebSearch/getDiagnostics/executeCode — the load-bearing field names and types for a replica. +- [Claude Code Tool Input Schemas (kaidhar/claude-code-permissions-hook)](https://github.com/kaidhar/claude-code-permissions-hook/blob/main/docs/tool-input-schemas.md) — Cross-referenced tool_input JSON shapes (verified against actual hook inputs) used by PreToolUse hooks — confirms MultiEdit schema (edits[] array), Task model/resume fields, LS tool (path+ignore), and MCP naming mcp____. +- [Claude Code 2.0 System Prompt Changes (Mikhail Shilkov)](https://mikhail.io/2025/09/sonnet-4-5-system-prompt-changes/) — Authoritative confirmation that MultiEdit was REMOVED in Claude Code v2.0 (existed as a ~70-line tool in v1.x), driving the decision NOT to reimplement a MultiEdit tool. +- [Tasks API vs TodoWrite (DeepWiki) + Reddit r/ClaudeAI](https://deepwiki.com/FlorianBruniaux/claude-code-ultimate-guide/8.1-tasks-api-vs-todowrite) — Confirms the v2.1.16 Tasks API introduction and the v2.1.142 default-disable of TodoWrite, plus the CLAUDE_CODE_ENABLE_TASKS env var semantics during rollout. +- [anthropics/claude-code Issue #19901 (Bash output limits)](https://github.com/anthropics/claude-code/issues/19901) — Official-tracked confirmation that Bash captures max 30,000 chars by default and spills full output to a session file with path+preview when exceeded. +- [Claude Code changelog (official)](https://code.claude.com/docs/en/changelog) — Version-specific Bash behavior changes (background shell stopped ~5s after result when stdin closes; $()/$VAR subshell pattern matching) and the CLAUDE_CODE_ENABLE_TASKS gating timeline. +- [Piebald-AI claude-code-system-prompts CHANGELOG](https://github.com/Piebald-AI/claude-code-system-prompts/blob/main/CHANGELOG.md) — Tracks the system-prompt swap that resolves the TodoWrite tool reference to TaskCreate or TodoWrite depending on whether tasks are enabled — confirms the dual-resolution mechanism. diff --git a/docs/claude-code-architecture/research/tui-ide-config.md b/docs/claude-code-architecture/research/tui-ide-config.md new file mode 100644 index 0000000..ef64e47 --- /dev/null +++ b/docs/claude-code-architecture/research/tui-ide-config.md @@ -0,0 +1,92 @@ +# Research: tui-ide-config + +**Confidence:** high +**As-of:** 2026-06 + +## Summary + +Claude Code's "terminal UI" is NOT a Bubble Tea-style Model/Update/View loop. It is a TypeScript React (ConcurrentRoot) application rendered to the terminal via Ink + a heavily customized react-reconciler host config and Yoga flexbox layout engine, writing ANSI to stdout through a packed-cell Screen buffer with dirty-tracking, double-buffering, and atomic BSU/ESU frame updates. Two renderers exist: 'fullscreen' (alt-screen, virtualized scrollback, flicker-free — the modern default) and 'default' (classic main-screen). IDE integration is local-only: VS Code/Cursor/Windsurf/JetBrains extensions run a WebSocket-or-SSE MCP server on localhost, write a lockfile to ~/.claude/ide/.lock, set CLAUDE_CODE_SSE_PORT + ENABLE_IDE_INTEGRATION, and the CLI auto-connects (auth via x-claude-code-ide-authorization header); VS Code bundles its own CLI binary, JetBrains runs the PATH `claude`. Configuration is a 4-scope hierarchy (User < Project < Local < Managed) where managed settings (server-managed / MDM plist / Windows registry / system managed-settings.json) cannot be overridden and merge first as a base with systemd-style drop-in directory support. Environment variables (CLAUDE_CODE_*, ANTHROPIC_*) generally override settings keys, and CLI flags override for a single session. + +## Components +### Custom React+Ink Terminal Renderer +**Purpose:** Render the whole TUI: streaming markdown, permission dialogs, spinners, scrollback, diff, vim-mode editor. NOT a Bubble Tea loop — it is a browser-grade retained-mode renderer. + +**Mechanism:** react-reconciler host config creates a custom in-memory DOM (7 element types: ink-root, ink-box, ink-text, ink-virtual-text, ink-link, ink-progress, ink-raw-ansi) reconciled in ConcurrentRoot mode. resetAfterCommit() triggers Yoga calculateLayout() then onRender(). Each frame: Stage1 React commit + Yoga layout -> Stage2 DOM-to-screen (walk tree into packed-cell Screen buffer) -> Stage3 overlay (selection/search highlight mutate buffer in-place, set prevFrameContaminated) -> Stage4 diff vs front frame (2 Int32 compares per cell, walks only damage rectangle) -> Stage5 optimize (merge adjacent row patches, cache style transitions) -> Stage6 write stdout as a SINGLE write() wrapped in BSU/ESU (ESC[?2026h ... ESC[?2026l) atomic updates. Blit optimization: clean unchanged-position nodes copy cells straight from prevScreen. Double buffer: front/back Frame swapped by pointer; pools shared across frames so IDs valid across swap. + +**Data model:** DOMElement { yogaNode, style, attributes, childNodes, dirty, _eventHandlers, scrollTop, pendingScrollDelta, stickyScroll }. Frame { screen:Screen, viewport:Size, cursor:{x,y,visible}, scrollHint, scrollDrainPending }. Packed cell (2x Int32): word0=charId; word1=styleId[31:17]|hyperlinkId[16:2]|width[1:0]. Parallel arrays: noSelect(Uint8Array), softWrap(Int32Array), damage(Rectangle). + +**Config:** FRAME_INTERVAL_MS=16; scroll frame=4ms; CLAUDE_CODE_DEBUG_REPAINTS to attribute full repaints; CLAUDE_CODE_ALT_SCREEN_FULL_REPAINT=1 forces full repaint each frame + +### TUI Modes & Status Line +**Purpose:** User-facing controls over rendering mode, themes, editor bindings, and the custom status line. + +**Mechanism:** tui setting: 'fullscreen' = flicker-free alt-screen (DEC 1049) with virtualized scrollback and BSU/ESU atomic paints; 'default' = classic main-screen renderer. CLAUDE_CODE_NO_FLICKER env selects fullscreen; CLAUDE_CODE_DISABLE_ALTERNATE_SCREEN=1 forces default (and wins over the setting and CLAUDE_CODE_NO_FLICKER). Background sessions from agent view ALWAYS use fullscreen regardless. editorMode 'vim' adds a vim-mode editor in the prompt box (normal/insert). The /config tabbed Settings UI exposes status (model, account), and toggles like Auto-scroll, Editor mode, Show turn duration, Notifications, Terminal progress bar. statusLine: {type:'command', command:'~/.claude/statusline.sh'} runs a user script whose stdout is shown as the status line; disableAllHooks:true also kills the custom status line. Slash menu opens on '/' showing commands like /model, /usage, /compact, /remote-control, plus a Customize group (MCP, hooks, memory, permissions, plugins). IDE diff: when a connected IDE exists and diff tool is 'auto', edits open in the IDE diff viewer (openDiff blocks for user accept/reject); 'terminal' keeps them in-TUI. + +**Data model:** Settings keys: tui, editorMode, statusLine, viewMode, autoScrollEnabled, spinnerTipsEnabled, spinnerTipsOverride, spinnerVerbs, prefersReducedMotion, terminalProgressBarEnabled, syntaxHighlightingDisabled, autoMode {environment,allow,soft_deny,hard_deny arrays with literal "$defaults" inheritance} + +**Config:** tui: 'fullscreen' | 'default' (set via /tui or CLAUDE_CODE_NO_FLICKER); editorMode: 'normal'|'vim' (default normal); statusLine: {type:'command', command:''}; viewMode: 'default'|'verbose'|'focus'; autoScrollEnabled (default true); spinnerTipsEnabled; spinnerVerbs; prefersReducedMotion; terminalProgressBarEnabled (ConEmu/Ghostty 1.2.0+/iTerm2 3.6.6+) + +### IDE Integration (VS Code / JetBrains bridge) +**Purpose:** Connect the CLI TUI to a graphical IDE for diff viewing, selection sharing, file opening, diagnostics. + +**Mechanism:** On IDE launch: (1) extension starts a localhost WebSocket (or SSE) MCP server on a random port 10000-65535; (2) writes a lock file to ~/.claude/ide/.lock (also documented as -.lock) containing {pid, workspaceFolders, ideName, transport:'ws', authToken (32-char lowercase hex, 128-bit from OS CSPRNG)}; (3) sets env vars CLAUDE_CODE_SSE_PORT= and ENABLE_IDE_INTEGRATION=true when spawning claude. Claude reads the lockfile, matches the port, connects, and authenticates with HTTP header x-claude-code-ide-authorization: . Protocol = MCP spec 2025-03-26 over WS (JSON-RPC 2.0). Internal transport types are 'sse-ide' (url http://localhost:PORT/sse) and 'ws-ide' (url ws://localhost:PORT/ws). VS Code: extension BUNDLES its own CLI copy (run via bundled binary or claudeProcessWrapper); JetBrains plugin does NOT bundle — runs the `claude` command from PATH in the IDE terminal. From external terminal run /ide to connect. autoInstallIdeExtension (default true) auto-installs VS Code ext when launched inside a VS Code/JetBrains terminal; autoConnectIde (default false) connects when launched from an external terminal. /ide flag auto-connects if exactly one IDE available. WSL2 NAT/firewall can block the localhost socket (WSL1 unaffected); wslInheritsWindowsSettings lets WSL read Windows managed settings. + +**Data model:** Lock file JSON: {pid:int, workspaceFolders:[path], ideName:string, transport:'ws', authToken:32-hex-string}. Internal transport type tags: {type:'sse-ide'|'ws-ide', url, ideName, authToken?}. Messages: JSON-RPC 2.0 {jsonrpc:'2.0', method, params, id}. Methods IDE->Claude: selection_changed {text,filePath,fileUrl,selection{start{line,character},end{line,character},isEmpty}}, at_mentioned {filePath,lineStart,lineEnd}. Claude->IDE tools (12): openFile, openDiff, getCurrentSelection, getLatestSelection, getOpenEditors, getWorkspaceFolders, getDiagnostics, checkDocumentDirty, saveDocument, close_tab, closeAllDiffTabs, executeCode. + +**Config:** Plugin settings: Claude command path, suppress not-found, Option+Enter multiline, auto-update. Diff tool setting: auto|terminal (via /config). VS Code ext settings include claudeCode.useTerminal, claudeCode.initialPermissionMode {default,plan,acceptEdits,bypassPermissions}, claudeCode.preferredLocation {panel|sidebar}, claudeCode.autosave, claudeCode.claudeProcessWrapper. + +### settings.json Config Hierarchy +**Purpose:** Merge 4 scopes + managed layer into one effective config; cannot be overridden by user/project once set in managed. + +**Mechanism:** Merged at session start. Precedence (low->high): User(~/.claude/settings.json) < Project(.claude/settings.json) < Local(.claude/settings.local.json) < Managed(server-managed / MDM plist / registry / managed-settings.json). Managed CANNOT be overridden. Managed delivery: (a) server-managed from Claude.ai Admin; (b) MDM — macOS com.anthropic.claudecode plist domain, Windows HKLM\SOFTWARE\Policies\ClaudeCode (Settings REG_SZ/REG_EXPAND_SZ containing JSON), Windows user-level HKCU\SOFTWARE\Policies\ClaudeCode (lowest policy priority); (c) file-based managed-settings.json (+ managed-mcp.json) in /Library/Application Support/ClaudeCode/ (mac), /etc/claude-code/ (linux/WSL), C:\Program Files\ClaudeCode\ (win). Legacy Windows path C:\ProgramData\ClaudeCode dropped in v2.1.75. Most keys hot-reload (file watcher + ConfigChange hook); model & outputStyle read once at start. Managed settings parse tolerantly (strip+warn invalid entries, enforce rest; v2.1.169+). A few keys are stored in ~/.claude.json (OAuth, MCP user/local servers, per-project state, caches) NOT settings.json; before v2.1.119 autoScrollEnabled/editorMode/showTurnDuration/teammateMode/terminalProgressBarEnabled lived in ~/.claude.json. ~5 timestamped backups retained. Schema: $schema https://json.schemastore.org/claude-code-settings.json. + +**Data model:** managed-settings.json schema keys include: allowedMcpServers, deniedMcpServers, allowManagedMcpServersOnly, availableModels, enforceAvailableModels, forceLoginMethod (claudeai|console), forceLoginOrgUUID, requiredMinimumVersion, requiredMaximumVersion, allowManagedPermissionRulesOnly, allowManagedHooksOnly, claudeMd, strictKnownMarketplaces, blockedMarketplaces, allowedChannelPlugins, channelsEnabled, companyAnnouncements, policyHelper, parentSettingsBehavior, wslInheritsWindowsSettings, allowAllClaudeAiMcps. permissions object: {allow:[rule], ask:[rule], deny:[rule], additionalDirectories:[path], defaultMode:default|acceptEdits|plan|auto|dontAsk|bypassPermissions, disableBypassPermissionsMode:'disable', skipDangerousModePermissionPrompt}. Permission rule = `Tool` or `Tool(specifier)` e.g. Bash(npm run test *), Read(./.env), mcp__github__get_*. + +**Config:** Drop-in dir managed-settings.d/ (systemd convention: base merged first, then *.json sorted alphabetically, scalars override, arrays concat+dedupe, objects deep-merge, dotfiles ignored; numeric prefixes control order). policyHelper {path} computes managed settings dynamically. requiredMinimumVersion/requiredMaximumVersion (fail open if invalid). forceRemoteSettingsRefresh blocks startup until remote settings fetched (fail closed). + +### Env Vars (CLAUDE_CODE_* / ANTHROPIC_*) +**Purpose:** Per-process overrides; higher precedence than settings.json keys for the same feature. + +**Mechanism:** Env vars generally take precedence over settings fields (e.g. ANTHROPIC_MODEL > model setting; CLAUDE_CODE_AUTO_CONNECT_IDE > autoConnectIde). Exceptions: --model and /model override ANTHROPIC_MODEL; CLAUDE_CODE_EFFORT_LEVEL overrides /effort and effortLevel. NO_COLOR/FORCE_COLOR in settings.env (v2.1.143+) pass to subprocesses but do NOT change CC's own colors (set them in shell pre-launch instead). settings.env injects vars into every session + spawned subprocess. Many feature flags are env-only (no settings.json equivalent). + +**Data model:** Key env vars: ANTHROPIC_API_KEY, ANTHROPIC_AUTH_TOKEN (-> Authorization: Bearer), ANTHROPIC_BASE_URL, ANTHROPIC_MODEL, MAX_THINKING_TOKENS=0 (disable thinking, except Fable 5), DISABLE_AUTOUPDATER, CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC (= DISABLE_AUTOUPDATER+DISABLE_FEEDBACK_COMMAND+DISABLE_ERROR_REPORTING+DISABLE_TELEMETRY), BASH_DEFAULT_TIMEOUT_MS (120000), BASH_MAX_TIMEOUT_MS (600000), API_TIMEOUT_MS (600000), CLAUDE_CODE_SSE_PORT+ENABLE_IDE_INTEGRATION (IDE bridge), CLAUDE_CODE_AUTO_CONNECT_IDE, CLAUDE_CODE_IDE_SKIP_AUTO_INSTALL, CLAUDE_CODE_NO_FLICKER / CLAUDE_CODE_DISABLE_ALTERNATE_SCREEN, CLAUDE_CODE_DISABLE_VIRTUAL_SCROLL, CLAUDE_CODE_DISABLE_MOUSE, CLAUDE_CODE_FORCE_SYNC_OUTPUT, CLAUDE_CODE_SAFE_MODE, CLAUDE_CODE_EFFORT_LEVEL, CLAUDE_CODE_AUTO_COMPACT_WINDOW, CLAUDE_AUTOCOMPACT_PCT_OVERRIDE. + +**Config:** Not a settings.json key (CLI-time); also spawned into subprocesses via CLAUDECODE=1 (all spawned procs incl MCP/IDE terminals) and CLAUDE_CODE_CHILD_SESSION=1 (only Claude's own Bash/PowerShell/hook/statusline spawns, NOT IDE/stdio-MCP, v2.1.172+) which excludes nested interactive TUIs from --resume/--continue/history. + +### Key CLI Flags +**Purpose:** Per-invocation overrides of model, permissions, system prompt, output format, IDE connection, and customization scope. + +**Mechanism:** CLI flags override settings + env for ONE session. Headless/print mode (-p) uses --output-format text|json|stream-json, --input-format, --max-turns, --max-budget-usd, --session-id (UUID), --include-partial-messages, --include-hook-events, --json-schema, --permission-prompt-tool (MCP tool for non-interactive perms). --bare strips auto-discovery (hooks/skills/plugins/MCP/CLAUDE.md) sets CLAUDE_CODE_SIMPLE. --safe-mode disables all customizations (CLAUDE_CODE_SAFE_MODE) but keeps auth/model/built-in tools/permissions AND managed policy. --dangerously-skip-permissions == --permission-mode bypassPermissions. --ide auto-connects if exactly one IDE. --setting-sources picks which of user/project/local to load. + +**Data model:** Modes/payloads: --output-format text|json|stream-json; --input-format text|stream-json; --permission-mode default|acceptEdits|plan|auto|dontAsk|bypassPermissions; --setting-sources user,project,local. + +**Config:** Flags map 1:1 to many settings keys for one session: --model->model, --permission-mode->defaultMode, --effort->effortLevel, --fallback-model->fallbackModel, --teammate-mode->teammateMode, --verbose->viewMode, --settings (inline override), --setting-sources (which scopes to load), --add-dir->permissions.additionalDirectories. + +## Key behaviors +- RENDERER IS REACT+INK, NOT BUBBLE TEA. Claude Code's TUI is a TypeScript React app (ConcurrentRoot) with a custom react-reconciler host config and a Yoga flexbox layout engine, writing to stdout via a packed-cell Screen buffer with BSU/ESU (DEC mode 2026, ESC[?2026h/l) atomic frame updates. A Go replica must NOT model a Bubble-Tea Model/Update/View loop — it needs a retained-mode renderer with dirty-tracking, double buffering, and a diff/blit pipeline. +- Fullscreen (alt-screen) is the modern default; 'default' main-screen is legacy. Background/agent-view sessions ALWAYS use fullscreen regardless of the setting. Selection overlay and search highlight mutate the screen buffer in-place (set prevFrameContaminated), forcing a full-damage next frame — a deliberate tradeoff to avoid a separate overlay buffer. +- settings.json hot-reloads on file change (permissions/hooks/apiKeyHelper/statusLine reload live); only model and outputStyle require restart. ConfigChange hook fires per detected change. Files are watched across all 4 scopes. +- Managed settings are un-overridable and parse tolerantly (strip invalid entry, warn, enforce rest; v2.1.169+). User/project/local are strict (whole-file reject on validation error). requiredMinimumVersion/requiredMaximumVersion FAIL OPEN (invalid value stripped, not enforced) so a bad policy push can't brick startup; forceRemoteSettingsRefresh makes startup BLOCK and fail-closed on fetch failure. +- As of v2.1.142, defaultMode:'auto' set in project or local settings (.claude/settings.json, .claude/settings.local.json) is IGNORED — only ~/.claude/settings.json can grant auto mode. A repository cannot self-grant auto. Also skipDangerousModePermissionPrompt is ignored in project settings to block untrusted repos from auto-bypassing the bypass prompt. +- IDE bridge: IDE extension owns the WebSocket MCP server on localhost; CLI is the client. VS Code ext bundles its own CLI copy; JetBrains plugin runs PATH `claude` (no bundle). WSL2 NAT/firewall commonly blocks the localhost socket (WSL1 fine). JetBrains Remote Dev: install plugin on the REMOTE host not local client. +- Auto-discovery: when claude is launched inside a VS Code/JetBrains integrated terminal, autoInstallIdeExtension (default true) installs the ext and autoConnect connects. From an external terminal, autoConnectIde (default false) is off — run /ide or pass --ide. The lock file (~/.claude/ide/.lock) is the discovery mechanism. +- Env precedence nuance: env vars generally beat settings, BUT --model and /model beat ANTHROPIC_MODEL, and CLAUDE_CODE_EFFORT_LEVEL beats /effort. NO_COLOR/FORCE_COLOR in settings.env affect subprocesses only (v2.1.143+), not CC's own colors — set in shell pre-launch to change CC UI colors. +- Per-cell packed format is 2x Int32 (word0=charId, word1=styleId[31:17]|hyperlinkId[16:2]|width[1:0]); CharPool/StylePool/HyperlinkPool are interned and SHARED across front+back frames so blit can copy cells without re-interning. StylePool bit-0 encodes whether a style is visible on spaces (odd=visible) so invisible-space cells are skipped with one bitmask. Pools reset every 5 min with a migration pass to bound growth. +- Render scheduling: lodash throttle at 16ms (leading+trailing) via queueMicrotask after layout effects but same event-loop tick; scroll uses a separate 4ms setTimeout and bypasses React entirely (mutates DOM node scrollTop directly + markDirty). Resize is synchronous, not debounced. + +## Open questions +- Exact keystroke-level behavior of the vim-mode input editor (modes, registers, motions) — only confirmed it exists via editorMode:'vim'; the vim implementation file/grammar not located in public sources. +- Custom theme file format and discovery path (customThemes referenced in --safe-mode disables 'custom themes' but the theme JSON schema and load path are not documented in fetched sources — likely ~/.claude/themes/ but unverified). +- Precise multi-source merge semantics for every array vs scalar setting (the docs specify 'arrays merge across sources' generally and explicit exceptions like fallbackModel does NOT merge); a per-key merge table would be needed for an exact replica. +- Whether the SSE transport (sse-ide) is still actively used by current VS Code ext or if WS is now the only transport — sources describe both as internal types but don't pin which is default in v2.1.17x. + +## Sources +- [Claude Code settings (official docs)](https://code.claude.com/docs/en/settings) — Authoritative settings.json hierarchy, all setting keys, managed-settings delivery (plist/registry/file paths), drop-in dir merge rules, hot-reload + ConfigChange hook, invalid-entry tolerance, permission rule syntax, legacy ~/.claude.json storage. +- [Use Claude Code in VS Code (official docs)](https://code.claude.com/docs/en/vs-code) — VS Code extension: bundles own CLI, all extension settings (useTerminal/initialPermissionMode/preferredLocation/claudeProcessWrapper), shortcuts, vscode://anthropic.claude-code/open URI handler with prompt/session params, IDE diff accept/reject semantics. +- [Claude Code JetBrains IDEs (official docs)](https://code.claude.com/docs/en/jetbrains) — JetBrains plugin runs PATH claude (no bundle), /ide connects from external terminal, diff tool auto|terminal, diagnostic + selection sharing, supported IDEs, WSL2 firewall/NAT workaround, Remote Dev host install. +- [Environment variables (official docs)](https://code.claude.com/docs/en/env-vars) — Definitive env var reference: env>settings precedence rule with exceptions, CLAUDECODE vs CLAUDE_CODE_CHILD_SESSION distinction, ANTHROPIC_*/CLAUDE_CODE_* full table, NO_COLOR/FORCE_COLOR v2.1.143 behavior, IDE bridge vars. +- [CLI reference (official docs)](https://code.claude.com/docs/en/cli-reference) — Complete CLI command + flag table including --bare, --safe-mode, --setting-sources, --settings, --permission-mode, --ide, --output-format, --session-id, --mcp-config, model/prompt/permission flags and their settings mappings. +- [PROTOCOL.md - claudecode.nvim (reverse-engineered IDE protocol)](https://github.com/coder/claudecode.nvim/blob/main/PROTOCOL.md) — Definitive IDE bridge protocol: lock file JSON shape + ~/.claude/ide/.lock path, CLAUDE_CODE_SSE_PORT + ENABLE_IDE_INTEGRATION env vars, x-claude-code-ide-authorization header, MCP-over-WS JSON-RPC 2.0, all 12 IDE MCP tools (openFile/openDiff/getCurrentSelection/...). +- [Ch 13. The Terminal UI - Claude Code from Source](https://claude-code-from-source.com/ch13-terminal-ui/) — Deep technical write-up of the React+Ink renderer: custom DOM element types, Yoga host config, ConcurrentRoot, 7-stage render pipeline, double buffering, packed-cell Int32 format, CharPool/StylePool/HyperlinkPool interning, blit fast-path, BSU/ESU atomic updates, 16ms throttle, REPL.tsx structure. +- [Bridge & IDE Integration - Claude Code Internals](https://claude-code-explain.helmcode.com/bridge-ide/) — Internal transport types sse-ide/ws-ide, lockfile naming ~/.claude/ide/-.lock, distinction between local IDE integration (MCP localhost) vs remote Bridge (claude.ai), claude-vscode bidirectional channel, 15 JetBrains IDEs, VS Code auto-install command. +- [Configure server-managed settings (official docs)](https://code.claude.com/docs/en/server-managed-settings) — Server-managed settings delivery via Claude.ai Admin > Claude Code > Managed settings, all settings.json keys supported except OS-policy-restricted list. diff --git a/docs/claude-code-architecture/verify-verdicts.md b/docs/claude-code-architecture/verify-verdicts.md new file mode 100644 index 0000000..76e9017 --- /dev/null +++ b/docs/claude-code-architecture/verify-verdicts.md @@ -0,0 +1,59 @@ +## memory-claudemd +- [confirmed] Managed-policy CLAUDE.md precedence: managed (highest) → CLI args → local → project → user (lowest); the managed CLAUDE.md (file or the managed-only `claudeMd` settings key) cannot be excluded by claudeMdExcludes, and the Windows legacy path C:\ProgramData\ClaudeCode\managed-settings.json was removed in v2.1.75 (now C:\Program Files\ClaudeCode\). + evidence: All four sub-claims are confirmed verbatim by primary sources. + +(1) Precedence ordering — docs.claude.com/docs/en/settings, section "How scopes interact": "1. Managed (highest) - can't be overridden b +## streaming-protocol +- [confirmed] The headless final event is type=="result" with subtype "result" (or "success"/"error" variants) — NOT "message_stop". message_stop is the Messages-API SSE terminal event inside a stream_event, distinct from the ResultMessage that ends stream-json. Known bug #1920: missing result event hangs consumers. + evidence: Three authoritative sources confirm the core claim. (1) Headless docs (https://code.claude.com/docs/en/headless) document `--output-format stream-json` and the headless/SDK spec (quoted in issue #1920 + CORRECTION: The headless (stream-json / Agent SDK) conversation is terminated by a top-level event of type=="result" with subtype "success" (or an error variant such as "error") — NOT "message_stop". `message_stop` is a Messages-API SSE event that marks the end of a single message; in stream-json it arrives inside a StreamEvent (top-level type: "stream_event") and precedes the AssistantMessage and ultimately the final ResultMessage, which is what actually ends the stream. Known bug anthropics/claude-code#1920: Claude Code intermittently fails to emit the final {"type":"result",...} event in stream-json mode, which hangs SDK consumers indefinitely. +## system-prompt-assembly +- [confirmed] CLAUDE.md IS NOT IN THE SYSTEM PROMPT: official docs state CLAUDE.md/CLAUDE.local.md content is injected into the conversation as a USER message (project context), not into the system prompt; it therefore does NOT affect system-prompt cache entries. The exception is excludeDynamicSections (TS) / exclude_dynamic_sections (Python), added claude-agent-sdk v0.2.98 / v0.1.58, which moves the env-info block from the system prompt into the first user message. + evidence: The official Claude Code Agent SDK docs (code.claude.com/docs/en/agent-sdk/modifying-system-prompts) state verbatim: "CLAUDE.md takes a different path: the SDK reads it and injects its content into th +## agent-loop +- [confirmed] Token-budget auto-continue: COMPLETION_THRESHOLD=0.9 (stop at >=90% used) and DIMINISHING_THRESHOLD=500 tokens — early stop requires >=3 continuations AND both current+previous deltas <500. Subagents ALWAYS stop (budget is top-level only). The nudge is an isMeta user message. Source: claude-code-from-source.com ch05 + inematds/claudecode-manual 04-query-engine.md. + evidence: Confirmed against three independent primary sources that all trace back to the same upstream file (openclaudecode/src/query/tokenBudget.ts). + +(1) openonion/claude-code TS rewrite (https://github.com/o +## context-compaction +- [confirmed] API microcompact uses clear_tool_uses_20250919 with DEFAULT_MAX_INPUT_TOKENS=180,000 trigger and DEFAULT_TARGET_INPUT_TOKENS=40,000 (clear_at_least = 140,000); clear_thinking_20251015 with keep:'all' is emitted whenever hasThinking && !isRedactThinkingActive. + evidence: The deobfuscated Claude Code source `services/compact/apiMicrocompact.ts` (mirrored at github.com/leaf-kit/claude-analysis and claude-code-os.vercel.app) confirms every figure. Constants: `const DEFAU + CORRECTION: Claim confirmed. One caveat the claim omits (without contradicting it): the clear_tool_uses_20250919 strategy is emitted only when process.env.USER_TYPE === 'ant' AND env flags USE_API_CLEAR_TOOL_RESULTS or USE_API_CLEAR_TOOL_USES are truthy; the clear_thinking_20251015 strategy is emitted for all users whenever hasThinking && !isRedactThinkingActive (switching to keep:{type:'thinking_turns',value:1} when clearAllThinking is set). +## tool-exec-engine +- [refuted] Permission rule evaluation order is deny -> ask -> allow (first match wins, specificity does not change order); rules format 'Tool' or 'Tool(specifier)' with Bash wildcards where a space before * enforces a word boundary; oversized tool results persist to ~/.claude/tool-results/{hash}.txt and MCP default persist threshold is 25000 chars (hard ceiling 500000 via _meta anthropic/maxResultSizeChars) + evidence: Most sub-claims are confirmed verbatim by https://code.claude.com/docs/en/permissions: "Rules are evaluated in order: deny, then ask, then allow. The first match in that order determines the outcome, + CORRECTION: Permission rule evaluation order is deny -> ask -> allow (first match wins, rule specificity does not change the order); rules use the format 'Tool' or 'Tool(specifier)'; Bash specifiers support glob wildcards where a space before a trailing * (e.g. Bash(ls *)) enforces a word boundary, while Bash(ls*) does not; the _meta["anthropic/maxResultSizeChars"] override has a hard ceiling of 500,000 characters. HOWEVER, the documented default MCP output cap is 25,000 TOKENS (via MAX_MCP_OUTPUT_TOKENS), not 25,000 chars — the docs do not publish a default char-based persist-to-disk threshold. Oversized results ARE persisted to disk and replaced with a file reference, but the official docs do not document the exact path ~/.claude/tool-results/{hash}.txt; that path/hash-scheme is implementation detail not stated in authoritative docs. +## session-transcript +- [confirmed] Every transcript line carries a parentUuid (not just uuid), forming a DAG/linked-list; compact_boundary records set parentUuid:null and carry logicalParentUuid referencing the now-erased pre-compaction last message, immediately followed by a user message with isCompactSummary:true whose content starts with "This session is being continued from a previous conversation that ran out of context." + evidence: Primary source (blog.fsck.com technical guide, 2026-02-22) confirms every sub-assertion verbatim. (1) Linked-list: "The `parentUuid` field chains records into a linked list — each record points to the +## mcp +- [uncertain] MCP_TOOL_TIMEOUT default is ~28 hours; MAX_MCP_OUTPUT_TOKENS default is 25000 with a 10000-token warning threshold; per-server 'timeout' values below 1000 ms are ignored (fall through to MCP_TOOL_TIMEOUT) since v2.1.162 (before that they were floored to 1 second) + evidence: All three behavioral facts are confirmed by the PRIMARY source (official Claude Code env-vars doc, https://code.claude.com/docs/en/env-vars), which states verbatim: + +(1) MCP_TOOL_TIMEOUT: "Timeout in + CORRECTION: CONFIRMED: MCP_TOOL_TIMEOUT default is 100000000 ms (~28 hours); MAX_MCP_OUTPUT_TOKENS default is 25000 with a warning threshold at 10000 tokens; for the per-server `timeout` field in .mcp.json, values below 1000 ms are ignored (fall back to MCP_TOOL_TIMEOUT), while for the MCP_TOOL_TIMEOUT env var itself, values below 1000 ms are floored to 1 second. The official docs (code.claude.com/docs/en/env-vars) and changelog confirm both the behavioral change and that sub-1000 ms per-server values were previously floored to a 1-second watchdog. UNVERIFIED: the specific version "v2.1.162" — the official changelog does not let that version be cleanly pinned to this entry; treat the version number as approximate. +## skills +- [confirmed] Plugin skills are namespaced 'plugin-name:skill-name' and cannot conflict with enterprise/personal/project levels; the plugin root SKILL.md is the ONLY case where the frontmatter 'name' field sets the command name (otherwise directory name / filename governs). + evidence: The official Claude Code Skills docs (https://code.claude.com/docs/en/skills) state verbatim: "Plugin skills use a plugin-name:skill-name namespace, so they cannot conflict with other levels." + +On com +## permissions +- [confirmed] Rule syntax gotcha: Bash(ls *) requires the space and enforces a word-boundary (matches 'ls -la' not 'lsof'); Bash(ls*) without space matches both; trailing :* (Bash(ls:*)) is equivalent to trailing ' *' but is ONLY recognized at end of pattern; Read/Edit pattern anchors differ — //path=filesystem root, ~/path=home, /path=project root (NOT absolute!), path/./path=relative to cwd. + evidence: Official Claude Code docs (code.claude.com/docs/en/permissions, retrieved 2026-06-14, v2.1.x) confirm every assertion verbatim: + +(1) Bash word boundary: "The space before * matters: Bash(ls *) matches +## hooks +- [confirmed] PreToolUse uses hookSpecificOutput.permissionDecision (allow/deny/ask/defer) + permissionDecisionReason + updatedInput (NOT top-level decision/reason which is DEPRECATED for this event; legacy approve/block map to allow/deny). Other events (PostToolUse, Stop, UserPromptSubmit, PreCompact, ConfigChange) use TOP-LEVEL decision:'block' + reason. PermissionRequest uses hookSpecificOutput.decision.behavior (allow/deny). PreToolUse hooks fire BEFORE permission-mode checks and can deny even in bypassPermissions mode. + evidence: The official Hooks reference (https://code.claude.com/docs/en/hooks) confirms every component: + +(1) PreToolUse structure & deprecated top-level fields (line 1455, 1485): "Unlike other hooks that use a + CORRECTION: (Optional precision, not a correction: the top-level decision:'block' events are exactly UserPromptSubmit, UserPromptExpansion, PostToolUse, PostToolUseFailure, PostToolBatch, Stop, SubagentStop, ConfigChange, and PreCompact — i.e., the claim's list (PostToolUse, Stop, UserPromptSubmit, PreCompact, ConfigChange) is correct but not exhaustive. Updatedinput for PreToolUse sits directly under hookSpecificOutput; for PermissionRequest it is inside the decision object.) +## slash-commands-plan +- [confirmed] The 5 ExitPlanMode approval options presented to the user are exactly: 'Approve and start in auto mode', 'Approve and accept edits', 'Approve and review each edit manually', 'Keep planning with feedback', 'Refine with Ultraplan'; each approve option switches the permission mode accordingly. + evidence: The official Claude Code docs page "Choose a permission mode" (https://code.claude.com/docs/en/permission-modes) renders the ExitPlanMode prompt verbatim as an unordered list with these exact children + CORRECTION: When Claude exits plan mode, the approval prompt presents exactly these 5 options, in this order: 'Approve and start in auto mode', 'Approve and accept edits', 'Approve and review each edit manually', 'Keep planning with feedback', and 'Refine with Ultraplan for browser-based review' (the full label; 'Ultraplan' links to /en/ultraplan). 'Keep planning with feedback' and the 'Refine...' option are not approvals (they keep you in plan mode). The three approve options switch the session to the permission mode each describes (auto, acceptEdits, default), as the docs state: 'Approving a plan exits plan mode and switches the session to the permission mode each approve option describes.' +## subagents-task +- [confirmed] The Agent tool prompt-only return contract: parent receives ONLY the subagent's final message verbatim as the tool_result (no intermediate tool calls/reasoning); built-in Explore and Plan are one-shot and return NO agentId so they cannot be resumed via SendMessage. + evidence: Both halves are directly confirmed by official Claude Code docs. + +PART 1 (verbatim final-message return, no intermediate tool calls): The SDK docs (code.claude.com/docs/en/agent-sdk/subagents) state v \ No newline at end of file diff --git a/go.mod b/go.mod index fdd9630..55b674f 100644 --- a/go.mod +++ b/go.mod @@ -3,16 +3,17 @@ module iroha go 1.26.1 require ( - github.com/atotto/clipboard v0.1.4 - github.com/aymanbagabas/go-osc52/v2 v2.0.1 - github.com/charmbracelet/bubbles v1.0.0 - github.com/charmbracelet/bubbletea v1.3.10 github.com/charmbracelet/glamour v1.0.0 github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834 + github.com/charmbracelet/x/ansi v0.11.6 github.com/firebase/genkit/go v1.8.0 github.com/google/uuid v1.6.0 + github.com/muesli/termenv v0.16.0 + golang.org/x/net v0.54.0 + golang.org/x/term v0.43.0 google.golang.org/adk v1.2.1-0.20260519122726-f2aee5301649 google.golang.org/genai v1.57.0 + gopkg.in/yaml.v3 v3.0.1 ) require ( @@ -21,13 +22,15 @@ require ( cloud.google.com/go/compute/metadata v0.9.0 // indirect github.com/alecthomas/chroma/v2 v2.20.0 // indirect github.com/anthropics/anthropic-sdk-go v1.23.0 // indirect + github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect + github.com/aymanbagabas/go-udiff v0.3.1 // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/bahlo/generic-list-go v0.2.0 // indirect github.com/buger/jsonparser v1.1.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/charmbracelet/colorprofile v0.4.1 // indirect - github.com/charmbracelet/x/ansi v0.11.6 // indirect github.com/charmbracelet/x/cellbuf v0.0.15 // indirect + github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91 // indirect github.com/charmbracelet/x/exp/slice v0.0.0-20250327172914-2fdc97757edf // indirect github.com/charmbracelet/x/term v0.2.2 // indirect github.com/clipperhouse/displaywidth v0.9.0 // indirect @@ -35,7 +38,6 @@ require ( github.com/clipperhouse/uax29/v2 v2.5.0 // indirect github.com/coder/websocket v1.8.14 // indirect github.com/dlclark/regexp2 v1.11.5 // indirect - github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -53,14 +55,10 @@ require ( github.com/lucasb-eyer/go-colorful v1.3.0 // indirect github.com/mailru/easyjson v0.9.0 // indirect github.com/mattn/go-isatty v0.0.20 // indirect - github.com/mattn/go-localereader v0.0.1 // indirect github.com/mattn/go-runewidth v0.0.19 // indirect github.com/mbleigh/raymond v0.0.0-20250414171441-6b3a58ab9e0a // indirect github.com/microcosm-cc/bluemonday v1.0.27 // indirect - github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect - github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/reflow v0.3.0 // indirect - github.com/muesli/termenv v0.16.0 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/tidwall/gjson v1.18.0 // indirect github.com/tidwall/match v1.1.1 // indirect @@ -82,15 +80,12 @@ require ( go.opentelemetry.io/otel/sdk v1.43.0 // indirect go.opentelemetry.io/otel/trace v1.43.0 // indirect golang.org/x/crypto v0.51.0 // indirect - golang.org/x/net v0.54.0 // indirect golang.org/x/sys v0.44.0 // indirect - golang.org/x/term v0.43.0 // indirect golang.org/x/text v0.37.0 // indirect google.golang.org/api v0.279.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20260511170946-3700d4141b60 // indirect google.golang.org/grpc v1.81.0 // indirect google.golang.org/protobuf v1.36.11 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect rsc.io/omap v1.2.0 // indirect rsc.io/ordered v1.1.1 // indirect ) diff --git a/go.sum b/go.sum index d1e430f..56338eb 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,6 @@ cloud.google.com/go/auth v0.20.0 h1:kXTssoVb4azsVDoUiF8KvxAqrsQcQtB53DcSgta74CA= cloud.google.com/go/auth v0.20.0/go.mod h1:942/yi/itH1SsmpyrbnTMDgGfdy2BUqIKyd0cyYLc5Q= cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= -github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= -github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0= github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= github.com/alecthomas/chroma/v2 v2.20.0 h1:sfIHpxPyR07/Oylvmcai3X/exDlE8+FA820NTz+9sGw= @@ -14,8 +12,6 @@ github.com/alecthomas/repr v0.5.1 h1:E3G4t2QbHTSNpPKBgMTln5KLkZHLOcU7r37J4pXBuIg github.com/alecthomas/repr v0.5.1/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= github.com/anthropics/anthropic-sdk-go v1.23.0 h1:YVNnxfVVPJM+zvQ1oDgTJUBtLttGpBHe1WtJBr0QeAs= github.com/anthropics/anthropic-sdk-go v1.23.0/go.mod h1:WTz31rIUHUHqai2UslPpw5CwXrQP3geYBioRV4WOLvE= -github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= -github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= github.com/aymanbagabas/go-udiff v0.3.1 h1:LV+qyBQ2pqe0u42ZsUEtPiCaUoqgA9gYRDs3vj1nolY= @@ -28,10 +24,6 @@ github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMU github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5fXonfvc= -github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E= -github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw= -github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4= github.com/charmbracelet/colorprofile v0.4.1 h1:a1lO03qTrSIRaK8c3JRxJDZOvhvIeSco3ej+ngLk1kk= github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk= github.com/charmbracelet/glamour v1.0.0 h1:AWMLOVFHTsysl4WV8T8QgkQ0s/ZNZo7CiE4WKhk8l08= @@ -61,8 +53,6 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ= github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= -github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= -github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/firebase/genkit/go v1.8.0 h1:jIL9xS3ZxW9sTWN2SG9RyupPd0srjXmfB1749FPIuaY= @@ -110,8 +100,6 @@ github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4 github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4= -github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88= github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw= github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= @@ -119,10 +107,6 @@ github.com/mbleigh/raymond v0.0.0-20250414171441-6b3a58ab9e0a h1:v2cBA3xWKv2cIOV github.com/mbleigh/raymond v0.0.0-20250414171441-6b3a58ab9e0a/go.mod h1:Y6ghKH+ZijXn5d9E7qGGZBmjitx7iitZdQiIW97EpTU= github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk= github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA= -github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI= -github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo= -github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= -github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s= github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8= github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc= @@ -194,7 +178,6 @@ golang.org/x/net v0.54.0 h1:2zJIZAxAHV/OHCDTCOHAYehQzLfSXuf/5SoL/Dv6w/w= golang.org/x/net v0.54.0/go.mod h1:Sj4oj8jK6XmHpBZU/zWHw3BV3abl4Kvi+Ut7cQcY+cQ= golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= -golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ= golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= diff --git a/pkg/AGENTS.md b/pkg/AGENTS.md index af47b84..c22b354 100644 --- a/pkg/AGENTS.md +++ b/pkg/AGENTS.md @@ -1,44 +1,40 @@ - + # pkg ## Purpose -Core library packages implementing the agent system, LLM integration (7 providers), configuration, and terminal UI. +Core packages for the go-claude application, providing the agent runtime, LLM adapters, terminal UI, and configuration layer. ## Subdirectories | Directory | Purpose | |-----------|---------| -| `agent/` | Agent runner, 30+ tools, permissions, hooks, memory, task DAG, team, MCP (see `agent/AGENTS.md`) | -| `config/` | Configuration loading, 7-provider defaults, cost estimation, interactive wizard (see `config/AGENTS.md`) | -| `llm/` | LLM provider adapters — OpenAI-compatible, Anthropic, Genkit (see `llm/AGENTS.md`) | -| `tui/` | Bubble Tea terminal UI with 6 states, 17 slash commands, doctor (see `tui/AGENTS.md`) | +| `agent/` | Agent runtime: execution loop, 30+ tool dispatchers, git/LLM/MCP/subagent integration, session management, hooks, permissions, memory, task DAG, team orchestration, and background tasks (see `agent/AGENTS.md`) | +| `config/` | Application configuration loading, provider defaults, cost estimation, and interactive setup wizard (see `config/AGENTS.md`) | +| `llm/` | LLM provider adapters (OpenAI-compatible, Anthropic, Genkit/GLM) with retry logic and debug logging (see `llm/AGENTS.md`) | +| `tui/` | Bubble Tea terminal UI: chat view, input handling, status display, slash commands, theming, rendering, and doctor diagnostics (see `tui/AGENTS.md`) | ## For AI Agents ### Working In This Directory -- All packages use the `go-claude/pkg/` import path +- All packages follow standard Go conventions with `package` declarations matching directory names - Packages communicate via exported interfaces and global singletons (`Global*`) -- Test files are colocated with source (`*_test.go`) +- Test files are colocated with source (`*_test.go`); run per-package with `go test ./pkg//...` - Config path: `~/.iroha/` (auto-migrates from legacy `~/.go-claude/`) ### Testing Requirements - `go test ./pkg/...` runs all tests - Each package is independently testable -- Total: ~3,633 test lines across 23 test files +- ~80+ source files and 23+ test files across the four packages ### Common Patterns - Global singletons: `GlobalPermissionManager`, `GlobalHookManager`, `GlobalMemoryManager`, `GlobalTodoManager`, `GlobalTaskManager`, `GlobalBackgroundManager`, `GlobalCronScheduler`, `GlobalTeamManager`, `GlobalProtocolManager`, `GlobalAutonomyManager`, `GlobalWorktreeManager`, `GlobalMCPRouter` - Channel-based bridges for async TUI ↔ Agent communication (`ConfirmationBridge`, `ToolStatusBridge`) -- Chinese-language system prompts and error messages - Mutex-protected concurrent access (`sync.RWMutex`) ## Dependencies ### Internal -- All `pkg/` packages may import each other through the `agent` package as the orchestrator -- `agent` → `llm` (model adapter) -- `tui` → `agent` (runner, bridge, permission manager) +- Dependency flow: `tui/` → `agent/` → `llm/` + `config/` +- `agent` is the central orchestrator, importing all other packages - `cmd/agent-cli` → all `pkg/` packages - - diff --git a/pkg/agent/AGENTS.md b/pkg/agent/AGENTS.md index b9a6545..62e6cea 100644 --- a/pkg/agent/AGENTS.md +++ b/pkg/agent/AGENTS.md @@ -1,62 +1,119 @@ - + # agent ## Purpose -Core agent orchestration: runner lifecycle, SWE tool definitions (30+ tools), human-in-the-loop permission system, hook pipeline, cross-session memory, prompt builder, task DAG, cron scheduler, background execution, team coordination, protocol handshake, autonomous polling, git worktree isolation, MCP plugin routing, LSP client, multi-agent pool, session persistence, diff generation, CI monitoring, and audit logging. +Core agent orchestration: runner lifecycle, SWE tool definitions (40+ tools), human-in-the-loop permission system, multi-type hook pipeline (command/HTTP/LLM-prompt), cross-session memory with LLM-assisted consolidation, prompt builder, task DAG, cron scheduler, background execution, team coordination with process isolation, protocol handshake, autonomous polling, git worktree isolation, MCP plugin routing, LSP client with 5 code-intelligence tools, multi-agent pool, subagent delegation with worktree isolation, session persistence, diff generation, CI monitoring, audit logging, OS-level sandboxing (macOS sandbox-exec / Linux bwrap), plugin manifest system, skill discovery and trigger matching, tokenizer, watchdog crash recovery, web fetch/search with SSRF protection, and IPC bridge for inter-process communication. ## Key Files | File | Description | |------|-------------| -| `runner.go` | `CustomRunner` — wraps ADK runner, manages async execution, `ConfirmationBridge` channels, `ToolStatusBridge`, `blockingConfirmationTool` wrapper, hook pipeline (PreToolUse → execute → PostToolUse), `ToolCircuitBreaker` (3 consecutive failures → auto-block) | -| `tools.go` | Tool registration and dispatch — registers all 30+ SWE tools with the ADK agent builder, delegates handlers to `tools_*.go` files | -| `tools_file.go` | File tools: `file_read`, `file_write`, `file_edit`, `list_directory`, `search_grep` — path sandboxing, 10MB read limit, edit validation | -| `tools_shell.go` | Shell tool: `shell_run` — command execution with 30s timeout, 500-line stream cap, sandbox validation | -| `tools_mcp.go` | MCP tools: `mcp_server_list` — lists connected MCP plugin servers | -| `tools_memory.go` | Memory tools: `memory_save`, `memory_list` — persistent memory CRUD via `MemoryManager` | -| `tools_schedule.go` | Schedule tools: `schedule_create`, `schedule_list`, `schedule_delete` — cron job management via `CronScheduler` | -| `tools_task.go` | Task tools: `task_create`, `task_update`, `task_list`, `task_get` — DAG task management via `TaskManager` | -| `tools_team.go` | Team tools: `spawn_teammate`, `send_message`, `team_status` — team coordination via `TeamManager` | -| `tools_todo.go` | Todo tool: `todo` — session-level progress planning via `TodoManager` | -| `tools_worktree.go` | Worktree tools: `worktree_create`, `worktree_remove`, `worktree_status` — git worktree isolation via `WorktreeManager` | -| `pool.go` | `AgentPool` — multi-agent runner pool with per-agent LLM config, `GlobalAgentPool` singleton, dynamic runner creation with tool injection | -| `lsp.go` | `LSPClient` — Language Server Protocol client over stdio JSON-RPC 2.0, supports `textDocument/completion`, `textDocument/definition`, `textDocument/references`, `textDocument/hover`, `textDocument/diagnostics` | -| `git_helper.go` | Git utilities: `GitHasChanges`, `GitGetStagedDiff`, `GitGetCurrentBranch` — porcelain helpers for CI/worktree integrations | -| `session_store.go` | `PersistentSessionService` — wraps ADK `session.InMemoryService` with JSON persistence in `~/.iroha/sessions/`, CRUD + fork, session metadata, stale session GC | -| `permission.go` | `PermissionManager` — rule-based allow/deny/ask with bash security validation, three modes (default/plan/auto), path and content pattern matching | -| `hooks.go` | `HookManager` — external hook scripts loaded from `~/.iroha/hooks.json` and `./.iroha/hooks.json`, exit-code protocol (0=continue, 1=block, 2=inject), matcher support | -| `memory.go` | `MemoryManager` — file-based persistent memory with YAML frontmatter, four types (user/feedback/project/reference), two-layer storage (global `~/.iroha/memory/` + project `.iroha/memory/`), `MEMORY.md` index | -| `prompt.go` | `SystemPromptBuilder` — dynamic prompt assembly with cache-friendly stable/dynamic boundary (`=== DYNAMIC_BOUNDARY ===`), CLAUDE.md layering, skill injection, live task/team/worktree context | -| `todo_manager.go` | `TodoManager` — session-level task planning with status tracking (pending/in_progress/completed), max 12 items, nag reminder after 3 rounds without update | -| `task.go` | `TaskManager` — durable work graph (DAG) persisted as JSON files in `.tasks/`, bidirectional edge reconciliation, DFS cycle detection, auto-created placeholder nodes | -| `background.go` | `BackgroundManager` — slow-running shell commands in background goroutines, 5-min timeout, result preview, notification queue for next-turn delivery | -| `cron.go` | `CronScheduler` — 5-field cron expression evaluator, PID-based lock for multi-session safety, durable/session storage, jitter on :00/:30 marks, 7-day auto-expiry, missed-task detection | -| `team.go` | `TeamManager` — persistent specialist teammates with JSONL mailbox inbox, background polling loops, broadcast, `ProcessMessage` callback for LLM integration | -| `protocol.go` | `ProtocolManager` — structured request-response handshake (shutdown/plan_approval) persisted as JSON, single-use pending→approved/rejected lifecycle | -| `autonomous.go` | `AutonomousManager` — task auto-polling and state transitions (WORK/IDLE), keyword-based task claiming for specialist agents | -| `mcp.go` | `MCPClient` + `MCPToolRouter` — stdio-based JSON-RPC 2.0 lifecycle over child processes, dynamic tool discovery and ADK wrapping, plugins loaded from `.iroha/plugins.json` | -| `worktree.go` | `WorktreeManager` — git worktree creation/removal/keep, JSON index + JSONL event log, cascading task status updates on closeout | +| `runner.go` | `CustomRunner` -- wraps ADK runner, manages async execution, `ConfirmationBridge` channels, `ToolStatusBridge`, `blockingConfirmationTool` wrapper, hook pipeline (PreToolUse -> execute -> PostToolUse), `ToolCircuitBreaker` (3 consecutive failures -> auto-block) | +| `runner_bridge.go` | `ConfirmationBridge` -- async channel pair (`PromptChan`/`ResponseChan`) between runner goroutine and TUI main thread, plus cancellation via `CancelChan`; `ToolStatusBridge` -- buffered status channel with background drain worker for real-time tool state updates | +| `runner_confirmation.go` | `blockingConfirmationTool` -- intercepts every tool call for permission check (`GlobalPermissionManager.Check`), auto-review via `ReviewCommand`/`ReviewFileOperation`, human confirmation loop with support for `y`/`n`/`always`/`bypass`/`edit:`/`explain` responses, LLM-powered explanation on demand | +| `runner_confirmation_hooks.go` | `ToolCircuitBreaker` -- tracks consecutive identical-arg failures per tool, blocks after 3 strikes; `runWithHooks` -- three-stage tool execution pipeline: Stage A (PreToolUse hooks with input rewrite), Stage B (execute tool + circuit breaker check), Stage C (PostToolUse hooks + self-healing post-edit compile verification) | +| `runner_edit.go` | File edit snapshot and rollback helpers (`snapshotFile`, `rollbackPendingEdits`, `whitespaceTolerantEdit`) for atomic edit operations | +| `runner_exec.go` | Shell command execution, streaming output, sandbox wrapping | +| `tools.go` | Tool registration and dispatch -- registers all 40+ SWE tools with the ADK agent builder, delegates handlers to `tools_*.go` files | +| `tools_file.go` | File tools: `file_read` (line-range support, self-repair suggestions), `file_write` (auto-mkdir), `file_edit` (exact match, whitespace-tolerant fallback, dry-run mode) | +| `tools_file_batch.go` | `file_edit_batch` -- atomic multi-edit with two-phase validation and full rollback on any failure, up to 50 edits per batch | +| `tools_file_search.go` | `list_directory` (recursive up to depth 4, 200 entry cap), `search_grep` (regex, 1MB file limit, 50 match cap, `.git`/`node_modules` exclusion), `find_files` (glob with `**` support) | +| `tools_shell.go` | Shell tool: `shell_run` -- command execution with 30s timeout, 500-line stream cap, sandbox validation | +| `tools_mcp.go` | MCP tools: `mcp_server_list` -- lists connected MCP plugin servers | +| `tools_memory.go` | Memory tools: `memory_save`, `memory_list` -- persistent memory CRUD via `MemoryManager` | +| `tools_schedule.go` | Schedule tools: `schedule_create`, `schedule_list`, `schedule_delete` -- cron job management via `CronScheduler` | +| `tools_task.go` | Task tools: `task_create`, `task_update`, `task_list`, `task_get` -- DAG task management via `TaskManager` | +| `tools_team.go` | Team tools: `spawn_teammate`, `send_message`, `team_status` -- team coordination via `TeamManager` | +| `tools_todo.go` | Todo tool: `todo` -- session-level progress planning via `TodoManager` | +| `tools_worktree.go` | Worktree tools: `worktree_create`, `worktree_remove`, `worktree_status` -- git worktree isolation via `WorktreeManager` | +| `tools_subagent.go` | `spawn_subagent` tool -- delegates synchronous subagent execution via `GlobalSubagentManager.RunSubagent` | +| `tools_web.go` | `web_fetch` (HTTP GET with HTML-to-text conversion, 5MB limit, rate-limited 10/min) and `web_search` (DuckDuckGo HTML scraping or SearXNG JSON backend), both using SSRF-safe HTTP client | +| `tools_web_safety.go` | SSRF protection infrastructure: `rateLimiter` (sliding window), private IP detection (`isPrivateIP`), DNS-rebinding-safe `http.Transport` (`ssrfSafeTransport`), HTML-to-text converter stripping script/style/svg/iframe | +| `pool.go` | `AgentPool` -- multi-agent runner pool with per-agent LLM config, `GlobalAgentPool` singleton, dynamic runner creation with tool injection | +| `lsp.go` | `LSPClient` -- Language Server Protocol client over stdio JSON-RPC 2.0, supports `textDocument/completion`, `textDocument/definition`, `textDocument/references`, `textDocument/hover`, `textDocument/diagnostics` | +| `lsp_tools.go` | LSP tool handlers: `lsp_goto_definition`, `lsp_find_references`, `lsp_document_symbols`, `lsp_hover`, `lsp_diagnostics` -- each resolves paths, validates sandbox, calls LSP server, returns structured results with file snippets | +| `lsp_types.go` | LSP JSON-RPC types (`jsonrpcRequest`/`Response`/`Error`), LSP protocol types (`lspPosition`/`Range`/`Location`/`DocumentSymbol`), tool argument/result structs for all 5 LSP tools, `DefaultLSPServers` (gopls, typescript-language-server, pyright-langserver, rust-analyzer), `SetLSPServers` merge logic | +| `lsp_utils.go` | LSP utility functions: `pathToURI`/`uriToPath` conversion, `parseLocations` (handles single Location, Location array, LocationLink array), `getSnippet` (reads 15-line code preview), `symbolKindToString`, `registerLSPTools` (lazy config loading) | +| `git_helper.go` | Git utilities: `GitHasChanges`, `GitGetStagedDiff`, `GitGetCurrentBranch` -- porcelain helpers for CI/worktree integrations | +| `session_store.go` | `PersistentSessionService` -- wraps ADK `session.InMemoryService` with JSON persistence in `~/.iroha/sessions/`, CRUD + fork, session metadata, stale session GC | +| `session_store_helpers.go` | Session helpers: `estimateTokens` (text-len/4), `estimateCost`, `getFirstPrompt` (session title extraction, 60-char cap), `GetSessionsDir`, `CleanOldSessions` (age-based GC), `ValidateResume` (integrity checks for CWD, events, state, archive) | +| `permission.go` | `PermissionManager` -- rule-based allow/deny/ask with bash security validation, three modes (default/plan/auto), path and content pattern matching | +| `hooks.go` | `HookManager` -- external hook scripts loaded from `~/.iroha/hooks.json` and `./.iroha/hooks.json`, exit-code protocol (0=continue, 1=block, 2=inject), matcher support | +| `hooks_types.go` | Hook type definitions: 12 `HookEvent` constants (SessionStart/End, UserPrompt, AgentResponse, PreToolUse, PostToolUse, ToolError, Compaction, SubagentStop, Notification, PreCompact, PostCompact), 3 `HookType` (command, http, llm-prompt), `HookDef`/`HookConfig`/`HookContext`/`HookResult` structs, `hookTimeoutForEvent` per-category timeouts, `parseJSONResult` for JSON-mode hooks, `mergePluginHooks` | +| `hooks_exec.go` | Hook execution engine: `RunHooks` (dispatches by matcher, handles async hooks), `runHTTP` (POST with JSON payload, env-var header expansion, allowed-env-vars restriction), `runLLMPrompt` (LLM-based compliance audit with strict JSON decision parsing), `runCommand` (shell subprocess with whitelisted env vars, stdin JSON payload, dual JSON/exit-code protocol) | +| `memory.go` | `MemoryManager` -- file-based persistent memory with YAML frontmatter, four types (user/feedback/project/reference), two-layer storage (global `~/.iroha/memory/` + project `.iroha/memory/`), `MEMORY.md` index | +| `memory_frontmatter.go` | Memory type system (`MemoryType` constants: user/feedback/project/reference), `MemoryEntry` struct, `MaxMemoryEntries` cap (100), YAML frontmatter parse/render (`parseFrontmatter`/`renderFrontmatter`), `slugify` for safe filenames | +| `memory_helpers.go` | Memory utility helpers: `tokenizeKeywords` (lowercase word splitter with stop-word filter), `projectMemoryDir` (resolves `./.iroha/memory` with auto-create) | +| `memory_agents_sync.go` | `syncToAgentsMD` -- bidirectional sync between `MemoryManager` entries and the `## Agent Dynamic Learnings +- **test-mem** (user): desc + - *Content*: + hello +- **a** (user): a + - *Content*: + x +- **b** (feedback): b + - *Content*: + y +- **alpha** (user): alpha + - *Content*: + alpha content +- **up** (user): new desc + - *Content*: + new content + +` section of `AGENTS.md`; `syncFromAgentsMDLocked` -- parses AGENTS.md blocks back into memory files with mutex protection | +| `memory_dream.go` | `DreamConsolidator` -- automated memory consolidation with 7-gate validation (enabled, memory dir exists, not plan mode, cooldown, throttle, session count, PID lock); 4-phase consolidation: Orient, Gather, Consolidate (exact dedup + LLM semantic merge), Prune (enforce 100-entry cap) | +| `prompt.go` | `SystemPromptBuilder` -- dynamic prompt assembly with cache-friendly stable/dynamic boundary (`=== DYNAMIC_BOUNDARY ===`), CLAUDE.md layering, skill injection, live task/team/worktree context | +| `todo_manager.go` | `TodoManager` -- session-level task planning with status tracking (pending/in_progress/completed), max 12 items, nag reminder after 3 rounds without update | +| `task.go` | `TaskManager` -- durable work graph (DAG) persisted as JSON files in `.tasks/`, bidirectional edge reconciliation, DFS cycle detection, auto-created placeholder nodes | +| `background.go` | `BackgroundManager` -- slow-running shell commands in background goroutines, 5-min timeout, result preview, notification queue for next-turn delivery | +| `cron.go` | `CronScheduler` -- 5-field cron expression evaluator, PID-based lock for multi-session safety, durable/session storage, jitter on :00/:30 marks, 7-day auto-expiry, missed-task detection | +| `cron_helpers.go` | `CronLock` -- PID-based file lock with stale detection (`isPIDAlive` via signal 0), `cronMatches`/`fieldMatches` -- 5-field cron expression parser with range/step/comma/Sunday(0|7) support, `hashString` for jitter | +| `team.go` | `TeamManager` -- persistent specialist teammates with JSONL mailbox inbox, background polling loops, broadcast, `ProcessMessage` callback for LLM integration | +| `team_types.go` | Team type definitions: `TeamMessage` (sender/content/timestamp/extra), `Teammate` (name/role/type/status/lastActive), `TeamConfig` (roster), `TeamManager` struct with isolation mode fields (IPC bridge, watchdogs, binary path, cancel funcs) | +| `team_message.go` | Team mailbox operations: `AppendToInbox` (JSONL append), `ReadAndClearInbox` (atomic read+truncate), `PeekInbox` (non-destructive read), `Broadcast` (fan-out to all teammates except sender), `splitJSONLines` helper | +| `team_process.go` | Process-isolated team execution: `StartTeammateLoop` (goroutine or child process mode), `EnableProcessIsolation` (configures IPC bridge), `StartTeammateProcess` (spawns child with watchdog + heartbeat checker), `StopTeammateProcess`, `RunTeammateMode` (child-process entry point with IPC message loop and heartbeat ticker) | +| `subagent.go` | `SubagentManager` + `SubagentSpec`/`SubagentResult` -- synchronous subagent execution with worktree isolation for executor types, curated toolsets per type (explore=read-only, executor=all), cheaper model routing (haiku/flash/mini), JSONL execution logging, git diff analysis for file change detection | +| `protocol.go` | `ProtocolManager` -- structured request-response handshake (shutdown/plan_approval) persisted as JSON, single-use pending->approved/rejected lifecycle | +| `autonomous.go` | `AutonomousManager` -- task auto-polling and state transitions (WORK/IDLE), keyword-based task claiming for specialist agents | +| `mcp.go` | `MCPClient` + `MCPToolRouter` -- stdio-based JSON-RPC 2.0 lifecycle over child processes, dynamic tool discovery and ADK wrapping, plugins loaded from `.iroha/plugins.json` | +| `mcp_client.go` | Standalone `MCPClient` -- stdio JSON-RPC 2.0 client with pending-request map, 10s call timeout, MCP initialize handshake (`protocolVersion: 2024-11-05`), `SendNotification`, `Close` with process kill | +| `worktree.go` | `WorktreeManager` -- git worktree creation/removal/keep, JSON index + JSONL event log, cascading task status updates on closeout | +| `plugin.go` | `PluginManager` -- discovers and validates `plugin.json` manifests from `~/.iroha/plugins/*/` and `.iroha/plugins/*/`; `PluginManifest` (ID, name, semver version, MCP servers, hooks, skills, permissions); `ValidateManifest` (semver regex, plugin ID regex, no double underscores); `MigratePluginsConfig` (legacy flat -> manifest); `MergeMCPServers` (namespaced `pluginID__serverName`); `MergeHooks` | +| `skills.go` | `SkillManager` -- discovers `skill.json` manifests from `~/.iroha/skills/*/` and `.iroha/skills/*/`; `SkillManifest` (ID, name, triggers, type); three `SkillType`: `model_invoked` (keyword-triggered auto-injection), `user_invoked` (`/skill` command), `always` (permanent injection); `MatchTriggers` (case-insensitive keyword scan); `LoadInstructions` (path-escaped SKILL.md reader) | +| `sandbox.go` | OS-level sandboxing: `WrapSandboxCommand` dispatches to `wrapMacSandbox` (sandbox-exec with deny-write profile for /System, /usr, ~/.ssh, ~/.aws + allow-write for workspace/tmp/caches) or `wrapLinuxSandbox` (bwrap with read-only root + writable workspace/bind cache); `tokenizeCommand` (quote-aware shell tokenizer that blocks backticks, `$()`, pipes, `&&`, `;`, `>`, `<`); `safePrefixes` (configurable via `IROHA_SAFE_PREFIXES`) | | `auto_review.go` | Hybrid safety review for `shell_run`: heuristic rules first, then LLM semantic analysis, then local dangerous-pattern double-check | -| `compaction.go` | Conversation micro-compaction and archival — large tool outputs archived to transcripts, LLM-based conversational summarization (falls back to text extraction when no LLM provided) | +| `auto_review_apply.go` | `heuristicReview` -- rule-based safety check: newline injection, tokenizer-based subcommand splitting, command substitution detection, dangerous command names (rm/curl/sudo/etc), shell metacharacters (`;|&$<>\``), safe read-only command whitelist, path traversal detection | +| `auto_review_diff.go` | Phase 2 expanded security checks: `normalizeCommand` (strip quotes/backslashes/collapse whitespace/lowercase), 10 regex-based detectors for heredoc abuse, env expansion in write context, process substitution, named pipes, TTY escape sequences, file descriptor manipulation, unsafe source, encoding attacks, proxy injection, unsafe find-pipe-to-rm | +| `compaction.go` | Conversation micro-compaction and archival -- large tool outputs archived to transcripts, LLM-based conversational summarization (falls back to text extraction when no LLM provided) | +| `compaction_helpers.go` | Compaction helpers: `extractStickyBlocks`/`capStickyContent` (sticky block extraction with byte-budget trimming), `truncateOnlySummary` (circuit-breaker fallback), `extractStructuredSummary` (tool names, file paths, key decisions -> `[SUMMARY]` block), `summarizeRounds` (LLM-based or text-extraction fallback with 8K transcript cap) | | `diff.go` | LCS-based unified diff generator for file edit previews | | `ci_watcher.go` | GitHub Actions CI status monitoring via `gh` CLI | | `logger.go` | Dual JSONL + plaintext audit logger with secret redaction | +| `ipc.go` | `IPCBridge` -- Unix domain socket inter-process communication; length-prefixed JSON messages (4-byte big-endian header, 10MB safety cap); `Start` (parent listener), `Connect` (child dial), `Send`/`SendToParent`, `Receive` channel, `SetOnMessage` callback; `readMessage`/`writeMessage` framing | +| `watchdog.go` | `Watchdog` -- child process crash-tolerance manager: configurable crash budget with time-window pruning, `Start`/`Monitor` (auto-restart loop), `Stop` (SIGINT + 5s kill timeout), `Checkpoint`/`Recover` (JSON state persistence), `EnqueueDeadLetter`/`DrainDeadLetters` (disk-backed message queue for crash recovery) | +| `tokenizer.go` | `tokenizeShellCommand` -- state-machine shell command splitter that correctly handles single/double quotes, backslash escapes, and operators (`;`, `|`, `||`, `&&`); `isPathDangerous` -- directory traversal and sensitive path detection with whitelist | +| `migrate_legacy.go` | `migrateGoClaudeIfNeeded` -- one-time migration of memory files from legacy `~/.go-claude/` to `~/.iroha/` (global + project), writes `~/.iroha/.migrated` sentinel | +| `runner_test_helper.go` | `testLLMModel` (no-op LLM for tests) and `NewTestRunner` (creates minimal `CustomRunner` without network calls) | ## For AI Agents ### Working In This Directory -- Global singletons: `GlobalPermissionManager`, `GlobalHookManager`, `GlobalMemoryManager`, `GlobalTodoManager`, `GlobalTaskManager`, `GlobalBackgroundManager`, `GlobalCronScheduler`, `GlobalTeamManager`, `GlobalProtocolManager`, `GlobalAutonomyManager`, `GlobalWorktreeManager`, `GlobalMCPRouter`, `GlobalToolCircuitBreaker`, `GlobalAgentPool` -- `ConfirmationBridge` is the async channel between runner (goroutine) and TUI (main thread): `PromptChan`/`ResponseChan` +- Global singletons: `GlobalPermissionManager`, `GlobalHookManager`, `GlobalMemoryManager`, `GlobalTodoManager`, `GlobalTaskManager`, `GlobalBackgroundManager`, `GlobalCronScheduler`, `GlobalTeamManager`, `GlobalProtocolManager`, `GlobalAutonomyManager`, `GlobalWorktreeManager`, `GlobalMCPRouter`, `GlobalToolCircuitBreaker`, `GlobalAgentPool`, `GlobalPluginManager`, `GlobalSkillManager`, `GlobalSubagentManager`, `GlobalSandboxEnabled` +- `ConfirmationBridge` is the async channel between runner (goroutine) and TUI (main thread): `PromptChan`/`ResponseChan`/`CancelChan` - `ToolStatusBridge` provides real-time tool status to TUI via `StatusChan` with background drain worker - `blockingConfirmationTool` wraps every tool to intercept and confirm before execution - `SystemPromptBuilder` assembles the system instruction with a caching boundary - `ToolCircuitBreaker` halts after 3 consecutive identical-arg failures on the same tool +- `DreamConsolidator` runs automated memory deduplication through a 7-gate validation system +- `IPCBridge` enables process-isolated teammates via Unix domain sockets +- `Watchdog` manages child process teammates with crash budget, checkpoint/recovery, and dead-letter queue ### Testing Requirements - `go test ./pkg/agent/...` -- Tests exist for: hooks, memory, permission, todo_manager, autonomous, background, cron, mcp, protocol, task, team, worktree, prompt, auto_review, compaction, diff, ci_watcher, logger, session_store, runner, git_helper, lsp, pool, error_recovery -- **Gap**: `tools.go` and `tools_*.go` have no dedicated test files +- Tests exist for: hooks, hooks_types, memory, permission, todo_manager, autonomous, background, cron, mcp, protocol, task, team, worktree, prompt, auto_review, compaction, diff, ci_watcher, logger, session_store, runner, git_helper, lsp, pool, error_recovery, plugin, sandbox, skills, subagent, tokenizer, tools_file, tools_web +- **Gap**: `tools.go` has no dedicated test file (tool registration is integration-tested via runner tests) ### Common Patterns - Mutex-protected global singletons (`sync.RWMutex`) @@ -65,20 +122,29 @@ Core agent orchestration: runner lifecycle, SWE tool definitions (30+ tools), hu - Memory files use YAML frontmatter with auto-generated `MEMORY.md` index - DAG edge reconciliation is bidirectional with auto-unblocking cascade - MCP tools are dynamically discovered and wrapped as `DynamicMCPTool` implementing `tool.Tool` +- Plugin manifests (`plugin.json`) and skill manifests (`skill.json`) follow global-then-project overlay, project overrides global by ID +- Hook execution supports three types: shell command (exit-code protocol), HTTP POST (JSON decision), LLM prompt (JSON audit) +- Subagents use worktree isolation for executor types and read-only CWD for explore/planner/reviewer/researcher types +- Teammates support two modes: in-process goroutine (default) or child process with IPC bridge + watchdog +- Sandbox wrapping is OS-aware: macOS uses `sandbox-exec` with deny-write profile, Linux uses `bwrap` with read-only root +- Web tools use SSRF-safe HTTP transport that validates resolved IPs at connection time +- File edit batch uses two-phase commit: validate all -> snapshot -> apply all -> rollback on failure - Config path: `~/.iroha/` (auto-migrates from legacy `~/.go-claude/`) ## Dependencies ### Internal -- `pkg/llm` — Model adapter (`llm.NagReminderTrigger`, `llm.NoteRoundWithoutUpdate`, `llm.SystemPromptTrigger` callbacks) +- `pkg/llm` -- Model adapter (`llm.NagReminderTrigger`, `llm.NoteRoundWithoutUpdate`, `llm.SystemPromptTrigger` callbacks) +- `pkg/config` -- Configuration loading (`config.LoadConfig` for LSP servers, SearXNG URL) ### External -- `google.golang.org/adk/agent` — Agent framework -- `google.golang.org/adk/agent/llmagent` — LLM agent builder -- `google.golang.org/adk/tool` / `functiontool` — Tool system -- `google.golang.org/adk/runner` — Agent runner -- `google.golang.org/adk/session` — Session management -- `google.golang.org/genai` — Generative AI types -- `github.com/google/uuid` — Unique ID generation (background tasks, cron jobs) +- `google.golang.org/adk/agent` -- Agent framework +- `google.golang.org/adk/agent/llmagent` -- LLM agent builder +- `google.golang.org/adk/tool` / `functiontool` -- Tool system +- `google.golang.org/adk/runner` -- Agent runner +- `google.golang.org/adk/session` -- Session management +- `google.golang.org/genai` -- Generative AI types +- `github.com/google/uuid` -- Unique ID generation (background tasks, cron jobs) +- `golang.org/x/net/html` -- HTML parsing for web fetch/search diff --git a/pkg/agent/auto_review_apply_test.go b/pkg/agent/auto_review_apply_test.go new file mode 100644 index 0000000..b33efab --- /dev/null +++ b/pkg/agent/auto_review_apply_test.go @@ -0,0 +1,254 @@ +package agent + +import ( + "testing" +) + +// --------------------------------------------------------------------------- +// Direct coverage for the 10 security check functions in auto_review_apply.go. +// These are called indirectly via heuristicReview but the cover tool attributes +// coverage to the caller, not the individual check functions. +// --------------------------------------------------------------------------- + +// --- checkHeredoc --- + +func TestCheckHeredoc_Direct(t *testing.T) { + tests := []struct { + name string + cmd string + safe bool + reason string + }{ + {"safe_cat", "cat file.txt", true, ""}, + {"safe_echo", "echo hello world", true, ""}, + {"heredoc_double_dash", "cat <<-DELIM", false, "heredoc abuse detected"}, + {"heredoc_double", "cat < out.txt", true}, + // Unsafe: env var with write context + {"unsafe_redirect_var", "echo $HOME > out.txt", false}, + {"unsafe_append_var", "echo ${PATH} >> log.txt", false}, + {"unsafe_tee_var", "echo $USER | tee output.txt", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + safe, reason := checkEnvExpansion(tt.cmd) + if safe != tt.safe { + t.Errorf("checkEnvExpansion(%q) safe=%v, want %v, reason=%q", tt.cmd, safe, tt.safe, reason) + } + }) + } +} + +// --- checkProcessSubstitution --- + +func TestCheckProcessSubstitution_Direct(t *testing.T) { + tests := []struct { + name string + cmd string + safe bool + }{ + {"safe_diff", "diff a.txt b.txt", true}, + {"safe_cat", "cat file", true}, + {"unsafe_input_sub", "diff <(sort a.txt) <(sort b.txt)", false}, + {"unsafe_output_sub", "tee >(gzip > out.gz)", false}, + {"empty", "", true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + safe, reason := checkProcessSubstitution(tt.cmd) + if safe != tt.safe { + t.Errorf("checkProcessSubstitution(%q) safe=%v, want %v, reason=%q", tt.cmd, safe, tt.safe, reason) + } + }) + } +} + +// --- checkNamedPipe --- + +func TestCheckNamedPipe_Direct(t *testing.T) { + tests := []struct { + name string + cmd string + safe bool + }{ + {"safe_ls", "ls -la", true}, + {"unsafe_mkfifo", "mkfifo /tmp/pipe", false}, + {"unsafe_mknod", "mknod /tmp/pipe p", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + safe, reason := checkNamedPipe(tt.cmd) + if safe != tt.safe { + t.Errorf("checkNamedPipe(%q) safe=%v, want %v, reason=%q", tt.cmd, safe, tt.safe, reason) + } + }) + } +} + +// --- checkTTVEscape --- + +func TestCheckTTVEscape_Direct(t *testing.T) { + tests := []struct { + name string + cmd string + safe bool + }{ + {"safe_printf", `printf "hello world"`, true}, + {"unsafe_x1b", `printf "\x1b[2J"`, false}, + {"unsafe_033", `printf "\033[2J"`, false}, + {"unsafe_e_escape", `printf "\e[0m"`, false}, + {"unsafe_X1B_upper", `printf "\x1B"`, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + safe, reason := checkTTVEscape(tt.cmd) + if safe != tt.safe { + t.Errorf("checkTTVEscape(%q) safe=%v, want %v, reason=%q", tt.cmd, safe, tt.safe, reason) + } + }) + } +} + +// --- checkFileDescriptor --- + +func TestCheckFileDescriptor_Direct(t *testing.T) { + tests := []struct { + name string + cmd string + safe bool + }{ + {"safe_echo", "echo hello", true}, + {"unsafe_exec_fd", "exec 3>/tmp/out.txt", false}, + {"unsafe_redirect_fd", "command >&2", false}, + {"unsafe_read_fd", "command <&3", false}, + {"safe_exec_command", "exec ls", true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + safe, reason := checkFileDescriptor(tt.cmd) + if safe != tt.safe { + t.Errorf("checkFileDescriptor(%q) safe=%v, want %v, reason=%q", tt.cmd, safe, tt.safe, reason) + } + }) + } +} + +// --- checkUnsafeSource --- + +func TestCheckUnsafeSource_Direct(t *testing.T) { + tests := []struct { + name string + cmd string + safe bool + }{ + {"safe_ls", "ls -la", true}, + {"safe_source_relative", "source ./script.sh", true}, + {"unsafe_source_abs", "source /etc/malicious.sh", false}, + {"unsafe_dot_abs", ". /tmp/evil.sh", false}, + {"unsafe_source_root", "source /root/.bashrc", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + safe, reason := checkUnsafeSource(tt.cmd) + if safe != tt.safe { + t.Errorf("checkUnsafeSource(%q) safe=%v, want %v, reason=%q", tt.cmd, safe, tt.safe, reason) + } + }) + } +} + +// --- checkEncodingAttack --- + +func TestCheckEncodingAttack_Direct(t *testing.T) { + tests := []struct { + name string + cmd string + safe bool + }{ + {"safe_echo", `echo "hello"`, true}, + {"unsafe_hex", `echo "\x41"`, false}, + {"unsafe_unicode_short", "printf \"\\u0041\"", false}, + {"unsafe_unicode_long", `echo "\U00000041"`, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + safe, reason := checkEncodingAttack(tt.cmd) + if safe != tt.safe { + t.Errorf("checkEncodingAttack(%q) safe=%v, want %v, reason=%q", tt.cmd, safe, tt.safe, reason) + } + }) + } +} + +// --- checkProxyInjection --- + +func TestCheckProxyInjection_Direct(t *testing.T) { + tests := []struct { + name string + cmd string + safe bool + }{ + {"safe_git_clone", "git clone https://github.com/repo", true}, + {"safe_ssh", "ssh user@host", true}, + {"unsafe_proxy_command", "ssh -o ProxyCommand=evil user@host", false}, + {"unsafe_git_config", "git -c core.sshCommand=evil clone url", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + safe, reason := checkProxyInjection(tt.cmd) + if safe != tt.safe { + t.Errorf("checkProxyInjection(%q) safe=%v, want %v, reason=%q", tt.cmd, safe, tt.safe, reason) + } + }) + } +} + +// --- checkUnsafeFindPipe --- + +func TestCheckUnsafeFindPipe_Direct(t *testing.T) { + tests := []struct { + name string + cmd string + safe bool + }{ + {"safe_find", "find . -name '*.go'", true}, + {"unsafe_find_rm", "find . -name '*.log' | while read f; do rm \"$f\"; done", false}, + {"unsafe_find_mv", "find /tmp -type f | while read x; do mv \"$x\" /evil; done", false}, + {"safe_find_pipe_grep", "find . -name '*.go' | grep -v test", true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + safe, reason := checkUnsafeFindPipe(tt.cmd) + if safe != tt.safe { + t.Errorf("checkUnsafeFindPipe(%q) safe=%v, want %v, reason=%q", tt.cmd, safe, tt.safe, reason) + } + }) + } +} diff --git a/pkg/agent/auto_review_ext_test.go b/pkg/agent/auto_review_ext_test.go new file mode 100644 index 0000000..5ba106a --- /dev/null +++ b/pkg/agent/auto_review_ext_test.go @@ -0,0 +1,226 @@ +package agent + +import ( + "context" + "strings" + "testing" + "time" +) + +// --- callLLMForFileReview tests --- + +func TestCallLLMForFileReview_SafeResponse(t *testing.T) { + mock := &MockLLM{ + ResponseText: `{"safe": true, "reason": "Normal project file"}`, + } + cfg := &autoReviewConfig{Model: mock} + + result, err := callLLMForFileReview(context.Background(), cfg, "file_write", "main.go", "package main") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !result.Safe { + t.Errorf("expected safe=true, got safe=false, reason=%q", result.Reason) + } + if result.Reason != "Normal project file" { + t.Errorf("expected reason 'Normal project file', got %q", result.Reason) + } +} + +func TestCallLLMForFileReview_UnsafeResponse(t *testing.T) { + mock := &MockLLM{ + ResponseText: `{"safe": false, "reason": "Suspicious binary data"}`, + } + cfg := &autoReviewConfig{Model: mock} + + result, err := callLLMForFileReview(context.Background(), cfg, "file_write", "payload.bin", "binary data") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Safe { + t.Error("expected safe=false, got safe=true") + } + if result.Reason != "Suspicious binary data" { + t.Errorf("expected reason 'Suspicious binary data', got %q", result.Reason) + } +} + +func TestCallLLMForFileReview_LLMError(t *testing.T) { + mock := &MockLLM{ + ResponseErr: context.DeadlineExceeded, + } + cfg := &autoReviewConfig{Model: mock} + + result, err := callLLMForFileReview(context.Background(), cfg, "file_write", "test.bin", "data") + if err == nil { + t.Error("expected error from LLM failure") + } + if result.Safe { + t.Error("expected zero-value result on error") + } +} + +func TestCallLLMForFileReview_InvalidJSON(t *testing.T) { + mock := &MockLLM{ + ResponseText: `this is not JSON`, + } + cfg := &autoReviewConfig{Model: mock} + + result, err := callLLMForFileReview(context.Background(), cfg, "file_write", "test.bin", "data") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Safe { + t.Error("expected safe=false on invalid JSON") + } + if !strings.Contains(result.Reason, "format error") { + t.Errorf("expected format error in reason, got %q", result.Reason) + } +} + +func TestCallLLMForFileReview_JSONWrappedInCodeBlock(t *testing.T) { + mock := &MockLLM{ + ResponseText: "```json\n{\"safe\": true, \"reason\": \"Looks good\"}\n```", + } + cfg := &autoReviewConfig{Model: mock} + + result, err := callLLMForFileReview(context.Background(), cfg, "file_write", "app.go", "code") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !result.Safe { + t.Errorf("expected safe=true with code-wrapped JSON, got reason=%q", result.Reason) + } +} + +func TestCallLLMForFileReview_JSONInBackticks(t *testing.T) { + mock := &MockLLM{ + ResponseText: "```\n{\"safe\": false, \"reason\": \"Dangerous\"}\n```", + } + cfg := &autoReviewConfig{Model: mock} + + result, err := callLLMForFileReview(context.Background(), cfg, "file_write", "evil.bin", "data") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Safe { + t.Error("expected safe=false") + } + if result.Reason != "Dangerous" { + t.Errorf("expected reason 'Dangerous', got %q", result.Reason) + } +} + +// --- Additional ReviewFileOperation tests with LLM --- + +func TestReviewFileOperation_WithLLMFileReview(t *testing.T) { + mock := &MockLLM{ + ResponseText: `{"safe": true, "reason": "Normal project file"}`, + } + SetAutoReviewConfig(mock) + defer func() { GlobalAutoReviewConfig = nil }() + + // Unknown extension triggers semantic review path + result := ReviewFileOperation("file_write", "config.toml.bak", "some config data") + // This goes through heuristic review first (unknown extension => needs semantic review), + // then to callLLMForFileReview which returns safe=true from mock + if !result.Safe { + t.Errorf("expected LLM to approve, got safe=false, reason=%q", result.Reason) + } +} + +func TestReviewFileOperation_LLMFileReviewFailure(t *testing.T) { + mock := &MockLLM{ + ResponseErr: context.DeadlineExceeded, + } + SetAutoReviewConfig(mock) + defer func() { GlobalAutoReviewConfig = nil }() + + // Unknown extension triggers LLM review, which fails + result := ReviewFileOperation("file_write", "data.bin", "binary data") + if result.Safe { + t.Error("expected safe=false when LLM fails, got safe=true") + } + if !strings.Contains(result.Reason, "LLM review failed") { + t.Errorf("expected LLM review failure message, got %q", result.Reason) + } +} + +func TestReviewFileOperation_SafeExtensionBypassesLLM(t *testing.T) { + // Even with LLM returning error, safe extensions should bypass + mock := &MockLLM{ + ResponseErr: context.DeadlineExceeded, + } + SetAutoReviewConfig(mock) + defer func() { GlobalAutoReviewConfig = nil }() + + result := ReviewFileOperation("file_write", "main.go", "package main") + if !result.Safe { + t.Errorf("safe extension should bypass LLM, got safe=false, reason=%q", result.Reason) + } +} + +// --- callLLMForReview extended tests --- + +func TestCallLLMForReview_JSONCodeBlock(t *testing.T) { + mock := &MockLLM{ + ResponseText: "```json\n{\"safe\": true, \"reason\": \"Read-only command\"}\n```", + } + cfg := &autoReviewConfig{Model: mock} + + result, err := callLLMForReview(context.Background(), cfg, "ls") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !result.Safe { + t.Errorf("expected safe=true from code-block-wrapped JSON, got reason=%q", result.Reason) + } +} + +func TestCallLLMForReview_PlainBacktickWrap(t *testing.T) { + mock := &MockLLM{ + ResponseText: "```\n{\"safe\": false, \"reason\": \"Dangerous\"}\n```", + } + cfg := &autoReviewConfig{Model: mock} + + result, err := callLLMForReview(context.Background(), cfg, "rm -rf /") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Safe { + t.Error("expected safe=false") + } +} + +func TestCallLLMForReview_InvalidJSON(t *testing.T) { + mock := &MockLLM{ + ResponseText: "I think this command is safe.", + } + cfg := &autoReviewConfig{Model: mock} + + result, err := callLLMForReview(context.Background(), cfg, "some_cmd") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Safe { + t.Error("expected safe=false on invalid JSON response") + } + if !strings.Contains(result.Reason, "format error") { + t.Errorf("expected format error in reason, got %q", result.Reason) + } +} + +func TestCallLLMForReview_ContextCancellation(t *testing.T) { + mock := &MockLLM{ + ResponseErr: context.Canceled, + } + cfg := &autoReviewConfig{Model: mock} + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Nanosecond) + defer cancel() + + _, err := callLLMForReview(ctx, cfg, "ls") + if err == nil { + t.Error("expected error from cancelled context") + } +} diff --git a/pkg/agent/auto_review_test.go b/pkg/agent/auto_review_test.go index 2b2149e..512bceb 100644 --- a/pkg/agent/auto_review_test.go +++ b/pkg/agent/auto_review_test.go @@ -3,6 +3,7 @@ package agent import ( "context" "iter" + "strings" "testing" "google.golang.org/adk/model" @@ -623,6 +624,270 @@ func TestCheckUnsafeFindPipe(t *testing.T) { }) } +// TestFileHeuristicReview tests fileHeuristicReview with comprehensive table-driven cases. +func TestFileHeuristicReview(t *testing.T) { + tests := []struct { + name string + toolName string + filePath string + content string + wantSafe bool + wantInReason string + }{ + // System directory blocks + {"system_dir_etc", "file_write", "/etc/passwd", "", false, "System directory"}, + {"system_dir_usr", "file_write", "/usr/bin/foo", "", false, "System directory"}, + {"system_dir_var", "file_write", "/var/log/a", "", false, "System directory"}, + {"system_dir_sys", "file_write", "/sys/kernel", "", false, "System directory"}, + {"system_dir_proc", "file_write", "/proc/1/status", "", false, "System directory"}, + {"system_dir_dev", "file_write", "/dev/null", "", false, "System directory"}, + + // Sensitive path blocks + {"sensitive_ssh", "file_write", "/home/user/.ssh/id_rsa", "", false, "Sensitive path"}, + {"sensitive_gnupg", "file_write", "/home/user/.gnupg/secring.gpg", "", false, "Sensitive path"}, + {"sensitive_aws", "file_write", "/home/user/.aws/credentials", "", false, "Sensitive path"}, + {"sensitive_env", "file_write", ".env", "", false, "Sensitive path"}, + {"sensitive_credentials_json", "file_write", "credentials.json", "", false, "Sensitive path"}, + {"sensitive_id_rsa", "file_write", "id_rsa", "", false, "Sensitive path"}, + {"sensitive_id_ed25519", "file_write", "id_ed25519", "", false, "Sensitive path"}, + {"sensitive_pem", "file_write", "cert.pem", "", false, "Sensitive path"}, + {"sensitive_key", "file_write", "server.key", "", false, "Sensitive path"}, + {"sensitive_gitconfig", "file_write", "~/.gitconfig", "", false, "Sensitive path"}, + {"sensitive_bashrc", "file_write", "~/.bashrc", "", false, "Sensitive path"}, + {"sensitive_zshrc", "file_write", "~/.zshrc", "", false, "Sensitive path"}, + {"sensitive_profile", "file_write", "~/.profile", "", false, "Sensitive path"}, + + // Secret content blocks + {"secret_password_space", "file_write", "main.go", "password = secret", false, "secret"}, + {"secret_password_eq", "file_write", "main.go", "password=secret", false, "secret"}, + {"secret_key_space", "file_write", "main.go", "secret_key = abc", false, "secret"}, + {"secret_private_key", "file_write", "main.go", "private_key=xyz", false, "secret"}, + {"secret_api_secret", "file_write", "main.go", "api_secret = foo", false, "secret"}, + {"secret_rsa_key", "file_write", "main.go", "-----begin rsa private key-----", false, "secret"}, + {"secret_private_key_block", "file_write", "main.go", "-----begin private key-----", false, "secret"}, + + // Safe extensions + {"safe_go", "file_write", "main.go", "package main", true, ""}, + {"safe_ts", "file_write", "app.ts", "const x = 1", true, ""}, + {"safe_tsx", "file_write", "comp.tsx", "export default", true, ""}, + {"safe_js", "file_write", "index.js", "module.exports", true, ""}, + {"safe_jsx", "file_write", "view.jsx", "export default", true, ""}, + {"safe_py", "file_write", "script.py", "import os", true, ""}, + {"safe_rs", "file_write", "main.rs", "fn main()", true, ""}, + {"safe_rb", "file_write", "app.rb", "puts 'hi'", true, ""}, + {"safe_md", "file_write", "readme.md", "# Hello", true, ""}, + {"safe_txt", "file_write", "notes.txt", "some notes", true, ""}, + {"safe_json", "file_write", "config.json", "{}", true, ""}, + {"safe_yaml", "file_write", "values.yaml", "key: val", true, ""}, + {"safe_toml", "file_write", "data.toml", "[section]", true, ""}, + {"safe_css", "file_write", "style.css", "body {}", true, ""}, + {"safe_html", "file_write", "page.html", "", true, ""}, + {"safe_sql", "file_write", "query.sql", "SELECT 1", true, ""}, + {"safe_sh", "file_write", "run.sh", "#!/bin/bash", true, ""}, + {"safe_mod", "file_write", "go.mod", "module foo", true, ""}, + {"safe_sum", "file_write", "go.sum", "", true, ""}, + {"safe_proto", "file_write", "api.proto", "syntax =", true, ""}, + {"safe_graphql", "file_write", "schema.graphql", "type Query", true, ""}, + {"safe_vue", "file_write", "app.vue", "