diff --git a/internal/attractor/agents/tmux_env_test.go b/internal/attractor/agents/tmux_env_test.go new file mode 100644 index 00000000..37320238 --- /dev/null +++ b/internal/attractor/agents/tmux_env_test.go @@ -0,0 +1,68 @@ +// Unit tests for the tmux agent session env-building helper. +package agents + +import ( + "testing" + + "github.com/danshapiro/kilroy/internal/attractor/agents/templates" + "github.com/danshapiro/kilroy/internal/attractor/engine" +) + +func TestBuildTmuxAgentEnv_IncludesStageStatusContractAndRuntime(t *testing.T) { + tmpl := &templates.Template{ + BuildEnv: func() map[string]string { + return map[string]string{"TOOL_DEFAULT": "present"} + }, + } + workDir := t.TempDir() + logsRoot := t.TempDir() + execCtx := &engine.Execution{ + WorktreeDir: workDir, + LogsRoot: logsRoot, + Engine: &engine.Engine{ + Options: engine.RunOptions{RunID: "run-001"}, + }, + } + + env := buildTmuxAgentEnv(tmpl, execCtx, "node-alpha") + + cases := map[string]string{ + "TOOL_DEFAULT": "present", + "KILROY_RUN_ID": "run-001", + "KILROY_NODE_ID": "node-alpha", + "KILROY_WORKTREE_DIR": workDir, + "KILROY_LOGS_ROOT": logsRoot, + } + for k, want := range cases { + if got := env[k]; got != want { + t.Errorf("%s = %q, want %q", k, got, want) + } + } + if env["KILROY_STAGE_STATUS_PATH"] == "" { + t.Error("KILROY_STAGE_STATUS_PATH missing — agents cannot find where to write status.json") + } + if env["KILROY_STAGE_STATUS_FALLBACK_PATH"] == "" { + t.Error("KILROY_STAGE_STATUS_FALLBACK_PATH missing") + } + if env["KILROY_STAGE_LOGS_DIR"] == "" { + t.Error("KILROY_STAGE_LOGS_DIR missing") + } + if env["KILROY_DATA_DIR"] == "" { + t.Error("KILROY_DATA_DIR missing") + } +} + +func TestBuildTmuxAgentEnv_NilTemplateStartsClean(t *testing.T) { + execCtx := &engine.Execution{ + WorktreeDir: t.TempDir(), + LogsRoot: t.TempDir(), + Engine: &engine.Engine{Options: engine.RunOptions{RunID: "run-nil-tmpl"}}, + } + env := buildTmuxAgentEnv(nil, execCtx, "node") + if env == nil { + t.Fatal("env must not be nil even when template is nil") + } + if env["KILROY_RUN_ID"] != "run-nil-tmpl" { + t.Errorf("KILROY_RUN_ID = %q, want run-nil-tmpl", env["KILROY_RUN_ID"]) + } +} diff --git a/internal/attractor/agents/tmux_handler.go b/internal/attractor/agents/tmux_handler.go index 54727a67..30fd4a62 100644 --- a/internal/attractor/agents/tmux_handler.go +++ b/internal/attractor/agents/tmux_handler.go @@ -69,20 +69,7 @@ func (h *TmuxAgentHandler) Execute(ctx context.Context, exec *engine.Execution, sessionName := buildSessionName(runID, node.ID) // Build environment variables. - env := tmpl.BuildEnv() - if env == nil { - env = map[string]string{} - } - if runID != "" { - env["KILROY_RUN_ID"] = runID - } - env["KILROY_NODE_ID"] = node.ID - // Add input env vars if available. - if exec != nil && exec.Engine != nil { - for k, v := range engine.InputEnvVars(exec.Engine.Options.Inputs) { - env[k] = v - } - } + env := buildTmuxAgentEnv(tmpl, exec, node.ID) // Resolve model from node attributes. modelID := strings.TrimSpace(node.Attr("llm_model", "")) @@ -348,6 +335,31 @@ func resolveToolName(node *model.Node) string { return "claude" // default } +// buildTmuxAgentEnv constructs the environment variables passed to a tmux-run +// agent session. It consolidates the tool template's defaults with the engine +// runtime invariants (run/node IDs, worktree/logs paths, input env) and the +// stage status contract paths so the engine-injected status-contract preamble +// is actionable from inside the session. Without the status contract env vars, +// agents spend tool calls hunting for KILROY_STAGE_STATUS_PATH. +func buildTmuxAgentEnv(tmpl *templates.Template, exec *engine.Execution, nodeID string) map[string]string { + var env map[string]string + if tmpl != nil { + env = tmpl.BuildEnv() + } + if env == nil { + env = map[string]string{} + } + for k, v := range engine.BuildStageRuntimeEnv(exec, nodeID) { + env[k] = v + } + if exec != nil { + for k, v := range engine.BuildStageStatusContract(exec.WorktreeDir).EnvVars { + env[k] = v + } + } + return env +} + // buildSessionName creates a unique tmux session name for a node execution. func buildSessionName(runID, nodeID string) string { name := "kilroy" diff --git a/workflows/coding-loop/README.md b/workflows/coding-loop/README.md new file mode 100644 index 00000000..b56bd11c --- /dev/null +++ b/workflows/coding-loop/README.md @@ -0,0 +1,47 @@ +# coding-loop + +An iterative coding workflow that repeatedly chooses sub-tasks, implements them, reviews the results, and decides when the spec is complete. + +## What it does + +Runs up to 8 loops of: + +1. **Task Chooser** — reads the spec + latest review, picks the highest-priority unimplemented sub-task, writes `.kilroy/task.md`. +2. **Implementer** — reads the task, writes code, commits. +3. **Reviewer** — diffs HEAD~1..HEAD against the spec, writes `.reviews/iter-NNN.md` and `.reviews/latest.md`, commits. +4. **Done Gate** — reads spec + latest review, writes `COMPLETE` or `CONTINUE` to `.kilroy/decision.md`. + +When Done Gate writes `COMPLETE` (or `loop_max=8` is reached), the loop exits and a **Report** node writes `result.md`. + +## How to launch + +```bash +kilroy attractor run \ + --package workflows/coding-loop/ \ + --workspace /abs/path/to/target-repo \ + --input '{"spec":"/abs/path/to/spec.md"}' +``` + +- `--workspace` — the repo being coded against (must already exist; the caller handles `git init` / `go mod init` etc.) +- `--input spec` — absolute path to the spec/requirements file (read in-place; not copied into the repo) + +## Input contract + +| Key | Required | Description | +|--------|----------|-------------| +| `spec` | yes | Absolute path to the spec/requirements markdown file | + +## Output contract + +| File | Description | +|------------------|-------------| +| `result.md` | Summary: what was implemented, iterations run, final status | +| `.reviews/iter-NNN.md` | Per-iteration reviewer feedback (committed to repo) | +| `.reviews/latest.md` | Rolling copy of the most recent review | + +## Known limits + +- `loop_max=8` — hard ceiling; if the done-gate never writes `COMPLETE` after 8 iterations, the run fails. +- Chooser and done-gate use `claude-haiku-4.5` (cheap, API-based). Implementer and reviewer use `claude-sonnet-4.6` (Claude Code CLI via tmux, or API). +- Spec is NOT committed to the target repo — it is read in-place via the `spec` input path. +- No pre-flight scaffolding — the caller must initialize the repo before launching. diff --git a/workflows/coding-loop/graph.dot b/workflows/coding-loop/graph.dot new file mode 100644 index 00000000..ae1c80a2 --- /dev/null +++ b/workflows/coding-loop/graph.dot @@ -0,0 +1,206 @@ +// coding-loop: iterative coding agent with reviewer feedback and LLM done-gate. +// +// Inputs: spec (absolute path to a spec file). +// Output: result.md at the workspace root. +// +// Flow (up to loop_max=8 iterations): +// start → loop_begin → task_chooser → implementer → reviewer → done_gate → loop_end → report → done +// +// Termination: done_gate writes COMPLETE or CONTINUE to .kilroy/decision.md. +// loop_end checks for COMPLETE and either loops back to loop_begin or exits. + +digraph coding_loop { + graph [ + inputs="spec", + outputs="result.md", + model_stylesheet=" + * { llm_provider: anthropic; llm_model: claude-haiku-4.5; } + .implementer { llm_model: claude-sonnet-4.6; } + .reviewer { llm_model: claude-sonnet-4.6; } + " + ] + + start [shape=Mdiamond, label="Start"] + done [shape=Msquare, label="Done"] + + // Loop sentinel: marks the top of the iteration scope. + // The engine jumps back here when loop_end decides to continue. + loop_begin [ + shape=trapezium, + label="Loop Begin", + loop_id="main", + loop_max=12, + loop_until_file_contains=".kilroy/decision.md:COMPLETE" + ] + + // Task chooser: lightweight model reads spec + latest review, + // decides the most impactful next sub-task, writes .kilroy/task.md. + task_chooser [ + shape=box, + label="Task Chooser", + agent_mode="agent_loop", + prompt="You are the task chooser in an iterative coding workflow. + +Your job: read the spec and latest review, then decide the single most important next sub-task for the implementer to work on this iteration. + +Steps: +1. Read .kilroy/INPUT.md. Find the '## spec' section — it contains the absolute path to the spec file. +2. Read the spec file at that path. +3. Run: git log --oneline -10 +4. Run: git status +5. If .reviews/latest.md exists, read it for prior feedback and what remains incomplete. +6. Choose EXACTLY ONE smallest-possible self-contained sub-task. Do NOT bundle multiple features into one iteration. + - If no code files exist yet, pick ONLY the foundation step (e.g. go.mod bootstrap, or the type + constructor) and STOP there. Do not also implement any features. + - If foundation exists, pick the SINGLE smallest unimplemented feature. Not two. Not three. Exactly one. + - If all features exist but the reviewer flagged a bug, pick JUST that bug and nothing else. + The explicit goal is to force multiple tight iterations. Each iteration should produce the minimum-viable increment — a single method, a single test, a single bugfix. +7. Write a concise task description to .kilroy/task.md. Include: + - What to implement: ONE thing, explicitly and narrowly scoped. + - Acceptance criteria for THIS sub-task only (2-4 bullets, focused solely on what's being added this iteration). + - Files likely affected (your best guess). + - Explicit guardrail line, quoted verbatim: Do NOT implement any other features in this iteration. Stop immediately after the scoped sub-task is complete, even if other unimplemented items are visible in the spec. + Keep .kilroy/task.md under 30 lines. + +When finished, write {\"status\":\"success\"} to $KILROY_STAGE_STATUS_PATH or $KILROY_STAGE_STATUS_FALLBACK_PATH if that fails." + ] + + // Implementer: reads .kilroy/task.md, implements the task, commits. + implementer [ + shape=box, + label="Implementer", + class="implementer", + agent_tool="claude", + prompt="You are the implementer in an iterative coding workflow. + +Your job: read the task and implement it in the codebase. + +Steps: +1. Read .kilroy/task.md for the current sub-task and its acceptance criteria. +2. Read .kilroy/INPUT.md to find the spec path ('## spec' section), then read the spec for broader context. +3. Implement ONLY what .kilroy/task.md asks for. Do NOT implement any other features you see in the spec or guess at next steps — the chooser is responsible for scheduling and will hand you the next sub-task on the next iteration. Stay strictly in your lane. +4. Write clean, idiomatic code. Make targeted changes — do not refactor unrelated code. +5. Run any applicable tests or build commands to verify your work does not break existing functionality. +6. Stage and commit ALL changes with a clear message describing what was done (e.g., 'feat: add X per spec'). + If there is genuinely nothing to implement (task already done), write a note to .kilroy/implementer-note.md and commit that. + +When finished, write {\"status\":\"success\"} to $KILROY_STAGE_STATUS_PATH or $KILROY_STAGE_STATUS_FALLBACK_PATH if that fails." + ] + + // Reviewer: reads task + diff, writes .reviews/iter-NNN.md and .reviews/latest.md, commits. + reviewer [ + shape=box, + label="Reviewer", + class="reviewer", + agent_tool="claude", + prompt="You are the reviewer in an iterative coding workflow. + +Your job: review the latest commit against the spec and record structured feedback. + +Steps: +1. Read .kilroy/task.md to understand what was supposed to be implemented this iteration. +2. Read .kilroy/INPUT.md to find the spec path ('## spec' section), then read the spec for acceptance criteria. +3. Run: git log --oneline -5 +4. Run: git show HEAD (shows the commit message + full diff of the implementer's latest commit; works regardless of commit depth) +5. Compute the iteration number: + mkdir -p .reviews + N=$(ls .reviews/iter-*.md 2>/dev/null | wc -l | tr -d ' ') + ITER=$((N + 1)) + PADDED=$(printf '%03d' $ITER) +6. Write your review to both .reviews/iter-${PADDED}.md AND .reviews/latest.md (overwrite latest.md each time). + Review format: + ## Iteration ${PADDED} + ## What Changed + (summary of the diff — what files, what logic) + ## Against Spec + - Done: (list items completed) + - Remaining: (list required items not yet implemented) + ## Quality Notes + (code quality, edge cases, test coverage observations) + ## Recommendation + CONTINUE — if required spec items remain unimplemented. + COMPLETE — ONLY if ALL required spec items are done and the implementation is correct. +7. Stage and commit the .reviews/ changes: + git add .reviews/ + git commit -m \"review: iteration ${PADDED} feedback\" + +When finished, write {\"status\":\"success\"} to $KILROY_STAGE_STATUS_PATH or $KILROY_STAGE_STATUS_FALLBACK_PATH if that fails." + ] + + // Done-gate: lightweight LLM decides COMPLETE vs CONTINUE, writes to .kilroy/decision.md. + done_gate [ + shape=box, + label="Done Gate", + agent_mode="agent_loop", + prompt="You are the done-gate in an iterative coding workflow. + +Your job: decide whether the spec is fully implemented based on the latest review. + +Steps: +1. Read .kilroy/INPUT.md to find the spec path ('## spec' section), then read the spec. +2. Run: ls .reviews/ +3. Read .reviews/latest.md (the most recent review). +4. Optionally read additional .reviews/iter-NNN.md files if you need more context. +5. Decide: + - COMPLETE: ALL required spec items are implemented and the latest review confirms it. + - CONTINUE: Required spec items remain unimplemented or the reviewer says CONTINUE. + +CRITICAL: Write EXACTLY one of these two words to .kilroy/decision.md — nothing else, no whitespace, no newline: + echo -n COMPLETE > .kilroy/decision.md + OR + echo -n CONTINUE > .kilroy/decision.md + +The engine reads this file byte-by-byte to decide loop termination. Any extra characters (spaces, newlines, punctuation) will prevent termination detection. + +When finished, write {\"status\":\"success\"} to $KILROY_STAGE_STATUS_PATH or $KILROY_STAGE_STATUS_FALLBACK_PATH if that fails." + ] + + // Loop sentinel: marks the end of the iteration scope. + // Checks termination conditions; if not met, jumps back to loop_begin. + loop_end [ + shape=invtrapezium, + label="Loop End", + loop_id="main", + loop_max=12, + loop_until_file_contains=".kilroy/decision.md:COMPLETE" + ] + + // Report: summarizes all work done, iterations run, final status. + report [ + shape=box, + label="Report", + agent_mode="agent_loop", + prompt="You are the final reporter in an iterative coding workflow. + +Your job: write a comprehensive summary of the completed work. + +Steps: +1. Read .kilroy/INPUT.md to find the spec path ('## spec' section), then read the spec. +2. Read .reviews/latest.md for the final review and recommendation. +3. Count iterations: ls .reviews/iter-*.md 2>/dev/null | wc -l +4. Run: git log --oneline -20 +5. Read .kilroy/decision.md to confirm the final decision (COMPLETE or CONTINUE/loop_max exceeded). +6. Write result.md at the workspace root with these sections: + ## Summary + What was implemented and how it maps to the spec requirements. + ## Iterations + How many loops ran and what changed in each (brief, one line per iteration). + ## Final Review + Key points from .reviews/latest.md. + ## Commits + List of commits made during this run (from git log). + ## Status + COMPLETE — all required spec items implemented. + OR INCOMPLETE — what remains and why the loop stopped. + +When finished, write {\"status\":\"success\"} to $KILROY_STAGE_STATUS_PATH or $KILROY_STAGE_STATUS_FALLBACK_PATH if that fails." + ] + + start -> loop_begin + loop_begin -> task_chooser + task_chooser -> implementer + implementer -> reviewer + reviewer -> done_gate + done_gate -> loop_end + loop_end -> report + report -> done +} diff --git a/workflows/coding-loop/workflow.toml b/workflows/coding-loop/workflow.toml new file mode 100644 index 00000000..e75766fc --- /dev/null +++ b/workflows/coding-loop/workflow.toml @@ -0,0 +1,19 @@ +# Coding-loop workflow: iterative LLM coding agent with reviewer feedback and done-gate. +# Runs up to 8 choose→implement→review→gate iterations against a user-supplied spec. +# Usage: kilroy attractor run --package workflows/coding-loop/ \ +# --workspace \ +# --input '{"spec":"/abs/path/to/spec.md"}' + +name = "coding-loop" +description = "Iterative coding workflow: a task chooser selects sub-tasks, an implementer codes them, a reviewer scores each iteration, and a done-gate decides when the spec is complete." +version = "0.1.0" + +outputs = ["result.md"] + +[[inputs]] +name = "spec" +description = "Absolute path to the feature/task spec file that defines what to build." +required = true + +[defaults] +labels = { workflow = "coding-loop" }