nova-forge/prompt_builder.py at main · herakles-dev/nova-forge · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
"""Nova Forge PromptBuilder — 7-section prompt construction for agents.

Builds (system_prompt, user_prompt) tuples from role definitions,
FORGE.md project context, task data, upstream artifacts, and tool policies.

Also provides:
  gather_environment_context(project_root) — dynamic env info for prompt injection
  PromptBuilder.build_system_prompt()      — structured system-prompt assembly
  PromptBuilder.build_enriched_system_prompt() — system prompt + live env context
"""

from __future__ import annotations

import json
import subprocess
from pathlib import Path
from typing import Any

# YAML is optional — gracefully degrade to a simple --- splitter
try:
    import yaml as _yaml
    _YAML_AVAILABLE = True
except ImportError:
    _YAML_AVAILABLE = False


# ── Constants ─────────────────────────────────────────────────────────────────

# Chars per token (rough estimate, same convention used in ForgeAgent)
_CHARS_PER_TOKEN = 4

# Maximum chars for a single context value before truncation
_CONTEXT_ITEM_MAX_CHARS = 8_000

# Section index of CONTEXT in the user-prompt section list
_CONTEXT_SECTION_IDX = 1  # 0-based index within the user-prompt sections

# ── Slim system prompt for 32K models (~600 chars vs 4,975 for full set) ──────

_SECTION_SLIM = """\
## Identity
You are Nova, an AI build assistant. You ACT — don't chat. Write working code using tools.

## Rules
- CRITICAL: You MUST call the write_file tool to create files. Do NOT describe code in text. A task is NOT done until every required file is written to disk via write_file.
- Max ~80 lines per write_file call. For large files: write_file (first ~80 lines) then append_file (rest). Repeat until complete.
- Read existing files before editing. Match existing style.
- Before writing code that imports/uses other files, read them with read_file first. Use their ACTUAL interface — never guess function names.
- After writing, check the tool output for Syntax issue or INCOMPLETE — fix immediately.
- Before finishing, read back your key files and verify imports match actual exports.
- If a tool fails, try a different approach. Don't repeat failing calls.
- Write COMPLETE WORKING code — no stubs, placeholders, or TODOs.
- WRONG: addEventListener('click', () => {}); def delete(id): pass
  RIGHT: addEventListener('click', () => { fetch('/api/'+id,{method:'DELETE'}).then(load) }); def delete(id): db.execute(...)
- Only write files assigned to YOUR task. If you get a CONFLICT error, skip that file.
- After writing all files, verify: run bash("python3 -c 'import MODULE'") for Python modules.
- Be concise.\
"""

# ── V11-grade system prompt sections ──────────────────────────────────────────

_SECTION_IDENTITY = """\
## Identity
You are Nova, an autonomous AI build assistant.
You take command — investigate, build, fix, deploy.
You are NOT a chatbot that explains things. You ACT.
You complete tasks by using your tools, not by describing what you would do.
When you can fix something, fix it. Don't ask for permission on low-risk changes.
Think step by step before complex operations, then execute decisively.
Your output is working code and files on disk — not prose about what you plan to do.
You operate inside the Nova Forge framework, which provides tools, task state, and artifact handoffs.
Each agent in the framework owns a specific role; you execute yours with precision.
Success means: the task is done, tests pass, and downstream agents have what they need.
You work with all major LLM providers — AWS Bedrock (Nova), OpenRouter (Gemini), and Anthropic (Claude).
Your behavior must be consistent regardless of which model is executing these instructions.
Efficiency matters: minimize unnecessary tool calls, avoid re-reading files you just read.\
"""

_SECTION_TOOL_RULES = """\
## Tool Usage Rules
- ALWAYS read a file before editing it. Never edit blind.
- Use glob_files instead of bash find or ls. Use grep instead of bash grep.
- Use the think tool before complex multi-step operations to plan your approach.
- After writing or editing, check the verification result in the tool output.
- Prefer edit_file over write_file for existing files — it is safer and more precise.
- If edit_file fails (not unique), include more surrounding context to make old_string unique.
- Use list_directory to explore project structure, not bash ls.
- Use the remember tool to save important patterns you discover for future reference.
- For bulk renaming, use search_replace_all instead of multiple edit_file calls.
- When bash fails, check the exit code and stderr before retrying with a different approach.
- Chain tool calls efficiently: read → understand → plan → act → verify.
- CRITICAL: You MUST call write_file to create files. Do NOT describe file contents in text — use the write_file tool with the full content. A task is NOT complete until all required files are written to disk via write_file.
- For large files: use write_file for the first ~80 lines, then append_file to add remaining sections. Each append adds to the end. Never leave files incomplete.
- Only write files that belong to YOUR task. If a write returns CONFLICT, that file belongs to another agent — skip it and focus on your assigned files.
- Do not repeat the same failing tool call more than twice. Try a different approach instead.
- Prefer targeted reads (specific line ranges) over reading entire large files.
- Use grep to locate symbols before reading the surrounding context.
- When writing a new file, first check with glob_files whether it already exists.
- Never use bash to simulate what a dedicated tool can do — use the right tool.
- When running bash commands, always quote paths that may contain spaces.
- Use absolute paths in bash commands to avoid working-directory surprises.\
"""

_SECTION_BEHAVIOR = """\
## Behavior
- Be concise. Lead with action, not explanation.
- After completing work, suggest the next logical step.
- If blocked, explain what is blocking and propose concrete alternatives.
- Never create documentation files (README, CHANGELOG, docs/) unless explicitly asked.
- Write complete, production-quality code — not stubs, TODOs, or placeholders.
- Build on existing files when they exist. Check what is already there before creating new files.
- Do not add comments, docstrings, or type annotations to code you did not write or change.
- Match the coding style and conventions of existing code in the project.
- Handle errors defensively: validate inputs, catch exceptions, return useful error messages.
- Never hard-code secrets, credentials, or environment-specific values in source files.
- Keep functions small and focused. If a function exceeds 50 lines, consider splitting it.
- Prefer explicit imports over star imports. Never use wildcard imports in new code.
- When adding new dependencies, add them to requirements.txt or pyproject.toml.
- Use environment variables for all configuration that varies between environments.
- Do not leave dead code, commented-out blocks, or debug prints in files you modify.

## WORKING Code vs STUB Code
NEVER write stub code that looks valid but does nothing. Every function MUST have real logic.
STUB (WRONG):
  button.addEventListener('click', () => { /* TODO: implement */ });
  def delete_task(id): pass  # placeholder
  fetch('/api/tasks').then(data => { /* handle response */ });
WORKING (CORRECT):
  button.addEventListener('click', () => { fetch('/api/tasks/' + id, {method:'DELETE'}).then(() => loadTasks()) });
  def delete_task(id): db.execute('DELETE FROM tasks WHERE id=?', (id,)); db.commit()
  fetch('/api/tasks').then(r => r.json()).then(tasks => { taskList.innerHTML = ''; tasks.forEach(t => addTaskRow(t)) });\
"""

_SECTION_ERROR_HANDLING = """\
## Error Handling
- If a tool call fails, read the error carefully and try a different approach.
- If bash fails, check the exit code and stderr output before retrying.
- If you hit max turns, prioritize completing the most critical parts of the task.
- Report blockers clearly with specific details instead of guessing or producing incomplete work.
- If a file you expected does not exist, use glob_files to locate it before giving up.
- If tests fail, read the failure output carefully before making changes.
- Never swallow exceptions silently. Log or re-raise with meaningful context.
- If an import fails, check whether the package is in requirements.txt and installed.
- When a dependency is missing, add it and document the change — do not work around it.
- If you encounter a circular import, refactor to break the cycle rather than using local imports.\
"""

_SECTION_SELF_VERIFY = """\
## Self-Verification
After writing or editing a file, ALWAYS check the tool output for:
- **Syntax issue**: Fix immediately before proceeding. Do not move to the next file.
- **HTML ERROR / CSS ERROR**: Fix unclosed tags or braces before moving on.
- **INCOMPLETE**: Replace TODO/FIXME/stub/placeholder with real implementation. Never leave them.
- **WARNING**: Address the warning (e.g., read file before overwriting).

Before marking a task complete, verify your work:
1. All required files exist (use glob_files to confirm).
2. Python files: run bash("python3 -c \\"import MODULE\\"") to verify imports.
3. API endpoints: if you wrote a server, test it with bash("curl http://localhost:PORT/api/...").
4. Frontend: if you wrote HTML, verify it references the correct JS/CSS filenames.

If verification reveals a bug, fix it NOW — do not leave it for a later task.\
"""

_SECTION_CODE_QUALITY = """\
## Code Quality
- Write defensive code: validate function parameters, handle edge cases (empty lists, null values, 0).
- Every API endpoint: validate request data, return proper HTTP status codes, catch exceptions.
- Frontend: handle fetch errors with try/catch, show user-friendly error messages, handle loading states.
- Use consistent naming: snake_case for Python, camelCase for JavaScript/TypeScript.
- Keep functions under 40 lines. If longer, extract helper functions.
- No global mutable state unless absolutely necessary. Prefer function parameters.
- Close resources: database connections, file handles, network sockets.
- Security basics: parameterized SQL queries (never f-strings), escape HTML output, validate user input.\
"""

_SECTION_PREVIEWABILITY = """\
## Previewability
Your app MUST work with `/preview` (Cloudflare Tunnel) after build:
- Accept port from `PORT` env var with sensible default (5000 Python, 3000 Node)
- Flask: `app.run(host='127.0.0.1', port=int(os.environ.get('PORT', 5000)))`
- FastAPI: add `if __name__` block with `uvicorn.run(app, host='127.0.0.1', port=int(os.environ.get('PORT', 8000)))`
- Express: `app.listen(process.env.PORT || 3000, '127.0.0.1')`
- Put main app in `app.py` or `main.py`. Export `app` variable at module level.
- Always create `requirements.txt` (Python) or `package.json` with `start` script (Node).
- For static sites, put `index.html` in root or `public/` directory.\
"""

_SECTION_PREVIEWABILITY_SLIM = (
    "- Accept PORT env. Put app in app.py. Create requirements.txt or package.json."
)

# ── Focused system prompt for 300K models (Pro/Premier) ──────────────────────
# Not as terse as SLIM (32K) but avoids the 5K-char full prompt.
# Key additions over SLIM: self-verify, read-before-write, chunking guidance.

_SECTION_FOCUSED = """\
## Identity
You are Nova, an AI build assistant. You ACT — don't chat. Write working, production-quality code using tools.

## Rules
- CRITICAL: You MUST call the write_file tool to create files. Do NOT describe code in text. A task is NOT done until every required file is written to disk via write_file.
- For large files (~80+ lines): write_file (first ~80 lines) then append_file (remaining). Never leave files incomplete.
- ALWAYS read existing files with read_file BEFORE writing code that depends on them. Use their ACTUAL interface — never assume or hallucinate function names, class names, or APIs.
- After writing, check the tool output for Syntax issue — fix immediately before proceeding.
- If a tool fails, try a different approach. Don't repeat the same failing call.
- Write complete code — no stubs, placeholders, or TODOs.
- Only write files assigned to YOUR task. If you get a CONFLICT error, skip that file.

## Verification
After writing each file:
1. Check for Syntax issue in tool output — fix immediately.
2. Python: run bash("python3 -c 'import MODULE'") to verify imports work.
3. If upstream files exist, read them first to match their exact interface.

## Code Quality
- Use parameterized SQL queries, never f-strings for SQL.
- For SQLite with Flask: use per-request connections (not module-level) to avoid threading issues.
- Handle errors: validate inputs, return proper status codes, catch exceptions.
- Be concise. Minimize unnecessary tool calls.

## Self-Correction
Before finishing, read back files you created. Check: syntax, imports match exports, no TODO/stubs.
If you find issues, fix them now. Do not leave broken code for a downstream agent.

## WORKING Code — NOT Stubs
STUB (WRONG): addEventListener('click', () => { /* TODO */ }); def delete(id): pass
WORKING: addEventListener('click', () => { fetch('/api/'+id, {method:'DELETE'}).then(loadTasks) });
         def delete(id): db.execute('DELETE FROM tasks WHERE id=?', (id,)); db.commit()\
"""

# ── Autonomy-aware prompt sections ───────────────────────────────────────────

_SECTION_AUTONOMY_GUIDANCE: dict[int, str] = {
    0: (
        "## AUTONOMY: MANUAL\n"
        "You MUST describe every action you want to take BEFORE using any tool. "
        "Wait for explicit approval. Do not call any tool without the user saying 'yes' or 'go ahead'. "
        "Explain your reasoning at every step so the user can learn what you are doing and why."
    ),
    1: (
        "## AUTONOMY: GUIDED\n"
        "You may read files freely. Before writing or running commands, explain what you plan to do and why. "
        "Keep your explanations concise but clear. The user is learning, so name the concepts involved. "
        "Do not proceed with writes or commands until the user acknowledges."
    ),
    2: (
        "## AUTONOMY: SUPERVISED\n"
        "You may read and write files freely. For bash commands, prefer safe operations. "
        "Ask before destructive commands (rm -rf, git force push, database drops). "
        "Proceed efficiently for routine operations — the user trusts you with standard development tasks."
    ),
    3: (
        "## AUTONOMY: TRUSTED\n"
        "You have broad permissions. Proceed efficiently. Only pause for irreversible system operations "
        "(shutdown, reboot, raw device writes). Handle risky commands like Docker management and "
        "database operations without asking. Maximize throughput."
    ),
    4: (
        "## AUTONOMY: AUTONOMOUS\n"
        "You have full permissions. Maximize efficiency. Log your reasoning but don't wait for approval. "
        "Execute all operations including high-risk commands without pausing. "
        "The user trusts you completely for this workflow."
    ),
    5: (
        "## AUTONOMY: UNATTENDED\n"
        "Full permissions, unattended mode. Log everything. No interactive prompts. "
        "Write detailed audit entries for every significant action. "
        "Optimize for batch/CI execution — never block waiting for user input. "
        "If you encounter an ambiguous situation, choose the safer option and log the decision."
    ),
}

# Role-specific behavioral profiles injected after the core sections.
ROLE_PROFILES: dict[str, str] = {
    "builder": (
        "## Role: Builder\n"
        "Focus on writing working code. Be terse.\n"
        "Create files, run tests, iterate until the task is done.\n"
        "Prioritize getting things working over making them perfect.\n"
        "\n"
        "### Build Steps\n"
        "1. Read the task description and any upstream artifacts.\n"
        "2. Explore the existing project structure with list_directory and glob_files.\n"
        "3. Read relevant existing files before writing new ones.\n"
        "4. Implement the feature or fix incrementally — one file at a time.\n"
        "5. Run tests after each significant change to catch regressions early.\n"
        "6. Verify the implementation works end-to-end before calling it complete.\n"
        "7. Summarize what you built and what is left for the next wave.\n"
        "\n"
        "### Quality Bar\n"
        "- Code runs without modification on a clean checkout.\n"
        "- All new functions have at least one test.\n"
        "- No hard-coded paths, credentials, or environment-specific values.\n"
        "- Imports are clean, no unused imports introduced."
    ),
    "reviewer": (
        "## Role: Reviewer\n"
        "Focus on reading and analysis. Find issues, produce structured findings.\n"
        "Do NOT modify any files. Your output is a written report only.\n"
        "Check for: security vulnerabilities, logic errors, missing error handling,\n"
        "style violations, and test coverage gaps."
    ),
    "planner": (
        "## Role: Planner\n"
        "Focus on understanding requirements. Create spec.md and tasks.json.\n"
        "Think carefully before acting. Ask clarifying questions only when truly ambiguous.\n"
        "Produce a clear dependency graph in the task list so wave execution is efficient."
    ),
    "tester": (
        "## Role: Tester\n"
        "Focus on writing and running tests.\n"
        "Verify that behavior matches requirements before marking tasks complete.\n"
        "Use pytest and read existing test patterns before adding new ones.\n"
        "Aim for meaningful coverage: happy path, edge cases, and error conditions."
    ),
    "implementer": (
        "## Role: Implementer\n"
        "General-purpose implementation.\n"
        "Read existing code, understand patterns, then extend consistently.\n"
        "Preserve existing style, naming conventions, and architecture decisions.\n"
        "When in doubt about design, follow the pattern already established in the codebase."
    ),
    "chat": (
        "## Role: Interactive Coding Assistant\n"
        "You are chatting with a developer in a live REPL session.\n"
        "You have full tool access to explore and modify the project.\n"
        "\n"
        "### Workflow\n"
        "1. EXPLORE FIRST: Use glob_files and read_file to discover the project structure.\n"
        "   Never ask 'which file?' — find it yourself.\n"
        "2. EDIT EXISTING FILES: Use edit_file for targeted changes to existing code.\n"
        "   Never use write_file to overwrite a file you haven't read. Never create\n"
        "   duplicates of existing files.\n"
        "3. RESPECT STRUCTURE: If the project has frontend/, backend/ directories,\n"
        "   work within that structure. Don't create root-level files that conflict.\n"
        "4. ACT, DON'T ADVISE: Make actual code changes. Don't describe what you would do.\n"
        "5. VERIFY: Read back files after editing to confirm changes applied.\n"
        "6. SUMMARIZE: After changes, list which files you modified and what changed.\n"
        "\n"
        "### Context Awareness\n"
        "- Recent conversation history is provided. Reference it for continuity.\n"
        "- Task state shows what was built. Read those files before modifying.\n"
        "- If a preview URL is active, mention it after UI changes.\n"
        "- Match the existing code style, framework, and conventions.\n"
    ),
}


# ── Environment context gathering ─────────────────────────────────────────────

def gather_environment_context(project_root: "Path | str") -> dict[str, str]:
    """Gather dynamic environment context for prompt injection.

    Returns a dict with zero or more of these keys:
        git_state      — branch + changed-file count (string)
        packages       — dep list from requirements.txt or package.json (string)
        project_index  — from forge_index if available (string)
        memory         — from forge_memory if available (string)
        forge_md       — .forge/FORGE.md content, truncated to 2000 chars (string)

    Every section is wrapped in try/except so partial failures never raise.
    """
    context: dict[str, str] = {}
    project_root = Path(project_root)

    # 1. Git state
    try:
        result = subprocess.run(
            ["git", "status", "--short", "--branch"],
            cwd=str(project_root),
            capture_output=True,
            text=True,
            timeout=5,
        )
        if result.returncode == 0 and result.stdout.strip():
            lines = result.stdout.strip().split("\n")
            branch = lines[0] if lines else "unknown"
            changed = len(lines) - 1  # first line is branch info
            context["git_state"] = f"Branch: {branch}, {changed} changed files"
    except Exception:
        pass

    # 2. Package manifest (requirements.txt or package.json)
    req_file = project_root / "requirements.txt"
    pkg_file = project_root / "package.json"
    if req_file.exists():
        try:
            deps = [
                line.strip().split("==")[0].split(">=")[0].split("<=")[0].split("~=")[0]
                for line in req_file.read_text().splitlines()
                if line.strip() and not line.startswith("#")
            ]
            if deps:
                context["packages"] = f"Python deps: {', '.join(deps[:15])}"
        except Exception:
            pass
    elif pkg_file.exists():
        try:
            pkg = json.loads(pkg_file.read_text())
            deps = list(pkg.get("dependencies", {}).keys())[:15]
            if deps:
                context["packages"] = f"Node deps: {', '.join(deps)}"
        except Exception:
            pass

    # 3. Project index (from forge_index if available)
    try:
        from forge_index import get_or_create_index  # type: ignore[import]
        idx = get_or_create_index(project_root)
        context["project_index"] = idx.to_context(budget_chars=1500)
    except Exception:
        pass

    # 4. Project memory (from forge_memory if available)
    try:
        from forge_memory import ProjectMemory  # type: ignore[import]
        mem = ProjectMemory(project_root)
        mem_ctx = mem.to_context(budget_chars=1000)
        if mem_ctx:
            context["memory"] = mem_ctx
    except Exception:
        pass

    # 5. FORGE.md content
    forge_md = project_root / ".forge" / "FORGE.md"
    if forge_md.exists():
        try:
            content = forge_md.read_text()[:2000]
            context["forge_md"] = content
        except Exception:
            pass

    return context


# ── PromptBuilder ─────────────────────────────────────────────────────────────

class PromptBuilder:
    """Construct system and user prompts for a ForgeAgent invocation.

    System prompt (stable across turns):
        Section 1 — Role identity + constraints
        Section 2 — FORGE.md project context

    User prompt (changes per invocation):
        Section 3 — Current task details
        Section 4 — Upstream context / prior-wave artifacts
        Section 5 — Available tools + restrictions
        Section 6 — File-ownership constraints + forbidden paths
        Section 7 — Expected output deliverables
    """

    def __init__(self, project_root: Path | str) -> None:
        self.project_root = Path(project_root).resolve()

    # ── Public API ────────────────────────────────────────────────────────────

    def build(
        self,
        role: str,
        task: dict[str, Any],
        context: dict[str, Any] | None = None,
        formation: dict[str, Any] | None = None,
        tool_policy: dict[str, Any] | None = None,
        max_tokens: int = 32_000,
    ) -> tuple[str, str]:
        """Build a system prompt and user prompt for an agent.

        Args:
            role:        Role name, e.g. "spec-implementer" or "spec-tester".
            task:        Task dict with keys: subject, description, metadata (optional).
            context:     Upstream artifact dict — key → text content.
            formation:   Formation config dict (used for ownership/tool restrictions).
            tool_policy: Dict with 'available' and 'restricted' tool name lists.
            max_tokens:  Context-window budget; user prompt truncated to 70% of this.

        Returns:
            (system_prompt, user_prompt) as a two-element tuple.
        """
        formation = formation or {}
        tool_policy = tool_policy or {}

        # ── System prompt (sections 1 + 2) ───────────────────────────────────
        section1 = self._build_section_role(role)
        section2 = self._build_section_project()
        system_prompt = "\n\n".join(s for s in [section1, section2] if s)

        # ── User prompt sections (sections 3-7) ──────────────────────────────
        section3 = self._build_section_task(task)
        section4 = self._build_section_context(context)
        section5 = self._build_section_tools(tool_policy, formation)
        section6 = self._build_section_constraints(formation)
        section7 = self._build_section_output()

        user_sections = [section3, section4, section5, section6, section7]
        user_sections = self._truncate_to_budget(user_sections, max_tokens)
        user_prompt = "\n\n".join(s for s in user_sections if s)

        return system_prompt, user_prompt

    def build_system_prompt(
        self,
        role: str = "builder",
        project_context: str = "",
        memory_context: str = "",
        index_context: str = "",
        max_context_chars: int = 8_000,
        model_id: str = "",
        autonomy_level: int | None = None,
    ) -> str:
        """Build a V11-grade system prompt for a build-time agent.

        This is the PRIMARY method for generating system prompts for agents
        invoked directly from the CLI build path (not the pipeline path).

        Args:
            role:             Agent role key — one of the ROLE_PROFILES keys.
                              Defaults to "builder".
            project_context:  Spec or project description text (truncated to budget).
            memory_context:   Cross-session memory from FORGE_MEMORY.md.
            index_context:    Project file/structure index from the session scanner.
            max_context_chars: Max chars allowed for each context block individually.
            model_id:         Full model ID for identity hint (optional).
            autonomy_level:   Current autonomy level (0-5). If provided, injects
                              autonomy-aware behavioral guidance into the prompt.

        Returns:
            A complete system prompt string (typically 80+ lines for "builder").
        """
        from config import get_context_window
        ctx = get_context_window(model_id) if model_id else 200_000

        if ctx <= 32_000:
            # Slim prompt for small-context models (~325 tokens)
            sections: list[str] = [_SECTION_SLIM]
            profile = ROLE_PROFILES.get(role, "")
            if profile:
                # First 3 lines of role profile only
                short_profile = "\n".join(profile.strip().splitlines()[:3])
                sections.append(short_profile)
            sections.append(_SECTION_PREVIEWABILITY_SLIM)
        elif ctx <= 1_100_000:
            # Focused prompt for medium/large models (~716 tokens)
            # Tested: SLIM prompt gives same quality but no improvement.
            # Keep FOCUSED — extra guardrails (self-verify, code quality) are low-cost at 300K+.
            sections: list[str] = [_SECTION_FOCUSED]
            profile = ROLE_PROFILES.get(role)
            if profile:
                short_profile = "\n".join(profile.strip().splitlines()[:5])
                sections.append(short_profile)
            else:
                sections.append(
                    f"## Role: {role}\n"
                    "Complete assigned tasks precisely and report results clearly."
                )
            sections.append(_SECTION_PREVIEWABILITY)
        else:
            sections: list[str] = [
                _SECTION_IDENTITY,
                _SECTION_TOOL_RULES,
                _SECTION_BEHAVIOR,
                _SECTION_ERROR_HANDLING,
                _SECTION_SELF_VERIFY,
                _SECTION_CODE_QUALITY,
                _SECTION_PREVIEWABILITY,
            ]

            # Role-specific profile
            profile = ROLE_PROFILES.get(role)
            if profile:
                sections.append(profile)
            else:
                sections.append(
                    f"## Role: {role}\n"
                    "Complete assigned tasks precisely and report results clearly."
                )

        # Autonomy-aware guidance section
        if autonomy_level is not None and autonomy_level in _SECTION_AUTONOMY_GUIDANCE:
            sections.append(_SECTION_AUTONOMY_GUIDANCE[autonomy_level])

        # Optional context blocks — each truncated independently
        if project_context:
            truncated = self._truncate_context_block(
                project_context, max_context_chars, label="project spec"
            )
            sections.append(f"## Project Context\n{truncated}")

        if memory_context:
            truncated = self._truncate_context_block(
                memory_context, max_context_chars, label="memory"
            )
            sections.append(f"## Project Memory\n{truncated}")

        if index_context:
            truncated = self._truncate_context_block(
                index_context, max_context_chars, label="index"
            )
            sections.append(f"## Project Structure\n{truncated}")

        # Model identity hint
        if model_id:
            try:
                from forge_models import get_capability
                cap = get_capability(model_id)
                if cap:
                    sections.append(
                        f"You are {cap.alias} ({cap.context_window // 1000}K context). "
                        f"Strengths: {', '.join(cap.strengths)}. Be efficient with tool calls."
                    )
            except Exception:
                pass

        return "\n\n".join(sections)

    def build_enriched_system_prompt(
        self,
        role: str = "builder",
        task_context: str = "",
        wave_info: str = "",
        max_tokens: int = 32_000,
        model_id: str = "",
        autonomy_level: int | None = None,
    ) -> str:
        """Build a fully enriched system prompt with dynamic environment context.

        Combines the base system prompt (from build_system_prompt) with
        dynamically gathered environment context, respecting token budgets.

        Args:
            role:            Role name for the agent.
            task_context:    Optional free-form build context string.
            wave_info:       Optional wave/phase description.
            max_tokens:      Context-window size; system prompt is capped at 12.5%.
            model_id:        Full model ID for identity hint (optional).
            autonomy_level:  Current autonomy level (0-5) for prompt injection.

        Returns:
            A single enriched system-prompt string, truncated to budget.
        """
        # Gather live environment data
        env = gather_environment_context(self.project_root)

        # Budget: 12.5% of context window in chars, capped at 16K
        max_chars = min(int(max_tokens * 0.125 * _CHARS_PER_TOKEN), 16_000)

        # Build base prompt using structured context from env
        base = self.build_system_prompt(
            role=role,
            project_context=env.get("forge_md", ""),
            memory_context=env.get("memory", ""),
            index_context=env.get("project_index", ""),
            model_id=model_id,
            autonomy_level=autonomy_level,
        )

        # Append lightweight environment section
        extra_sections: list[str] = []
        if "git_state" in env:
            extra_sections.append(f"Git: {env['git_state']}")
        if "packages" in env:
            extra_sections.append(env["packages"])
        if task_context:
            extra_sections.append(f"Build context: {task_context}")
        if wave_info:
            extra_sections.append(f"Wave: {wave_info}")

        if extra_sections:
            env_block = "\n## Environment\n" + "\n".join(
                f"- {s}" for s in extra_sections
            )
            base += "\n" + env_block

        # Model identity hint
        if model_id:
            try:
                from forge_models import get_capability
                cap = get_capability(model_id)
                if cap:
                    base += f"\n\nYou are {cap.alias} ({cap.context_window // 1000}K context). "
                    base += f"Strengths: {', '.join(cap.strengths)}. Be efficient with tool calls."
            except Exception:
                pass

        # Truncate to budget
        if len(base) > max_chars:
            base = base[:max_chars] + "\n\n[... system prompt truncated to fit budget]"

        return base

    @staticmethod
    def _truncate_context_block(text: str, max_chars: int, label: str = "context") -> str:
        """Truncate a context block to max_chars, appending a note if cut."""
        if len(text) <= max_chars:
            return text
        return text[:max_chars] + f"\n\n... [{label} truncated at {max_chars} chars]"

    # ── Agent definition loading ──────────────────────────────────────────────

    def load_agent_definition(self, name: str) -> tuple[dict, str]:
        """Load .forge/agents/{name}.md — return (frontmatter_dict, body_markdown).

        If the file doesn't exist, return (empty_dict, empty_string).
        YAML frontmatter is parsed from between the first pair of '---' markers.
        """
        agent_file = self.project_root / ".forge" / "agents" / f"{name}.md"
        if not agent_file.exists():
            return {}, ""

        raw = agent_file.read_text(encoding="utf-8", errors="replace")
        return self._parse_frontmatter(raw)

    def load_forge_md(self) -> str:
        """Read .forge/FORGE.md if it exists, return content or empty string."""
        forge_md = self.project_root / ".forge" / "FORGE.md"
        if not forge_md.exists():
            return ""
        return forge_md.read_text(encoding="utf-8", errors="replace")

    # ── Token budget helpers ──────────────────────────────────────────────────

    def _estimate_tokens(self, text: str) -> int:
        """Rough estimate: 4 chars per token."""
        return len(text) // _CHARS_PER_TOKEN

    def _truncate_to_budget(
        self,
        sections: list[str],
        max_tokens: int,
    ) -> list[str]:
        """Truncate the CONTEXT section (index 1 in user-prompt list) if the
        total user-prompt token estimate exceeds 70% of max_tokens.

        Only the context section is shortened; all other sections are preserved.
        """
        budget = int(max_tokens * 0.70)
        total = sum(self._estimate_tokens(s) for s in sections)

        if total <= budget or not sections[_CONTEXT_SECTION_IDX]:
            return sections

        # How many tokens are available for the context section?
        non_context_tokens = sum(
            self._estimate_tokens(s)
            for i, s in enumerate(sections)
            if i != _CONTEXT_SECTION_IDX
        )
        context_budget = budget - non_context_tokens
        if context_budget <= 0:
            # No room at all — drop the context section
            result = list(sections)
            result[_CONTEXT_SECTION_IDX] = ""
            return result

        context_text = sections[_CONTEXT_SECTION_IDX]
        max_chars = context_budget * _CHARS_PER_TOKEN
        if len(context_text) > max_chars:
            context_text = (
                context_text[:max_chars]
                + "\n\n... [context truncated to fit token budget]"
            )

        result = list(sections)
        result[_CONTEXT_SECTION_IDX] = context_text
        return result

    # ── Section builders ──────────────────────────────────────────────────────

    def _build_section_role(self, role: str) -> str:
        """Section 1 — Role identity + constraints."""
        frontmatter, body = self.load_agent_definition(role)

        role_name = frontmatter.get("name", role)
        role_description = frontmatter.get("description", "")

        lines: list[str] = [
            f"You are {role_name}, a specialized agent in the Nova Forge framework.",
        ]
        if role_description:
            lines.append(f"Your role: {role_description}")
        else:
            lines.append(
                "Your role: Implement code by calling write_file, edit_file, and other tools. "
                "You must USE TOOLS to create and modify files — never just describe what you would write."
            )

        if body.strip():
            lines.append("")
            lines.append(body.strip())

        return "\n".join(lines)

    def _build_section_project(self) -> str:
        """Section 2 — FORGE.md project context."""
        content = self.load_forge_md()
        if not content:
            return ""
        return content.strip()

    def _build_section_task(self, task: dict[str, Any]) -> str:
        """Section 3 — Current task details."""
        metadata = task.get("metadata") or {}
        sprint = metadata.get("sprint", "N/A")
        risk = metadata.get("risk", "N/A")

        lines = [
            "## Current Task",
            f"Subject: {task.get('subject', 'N/A')}",
            f"Description: {task.get('description', 'N/A')}",
            f"Sprint: {sprint}",
            f"Risk: {risk}",
        ]

        # Include file paths from task
        task_files = task.get("files") or metadata.get("files") or []
        if task_files:
            lines.append(f"Files to create/modify: {', '.join(task_files)}")
            lines.append("IMPORTANT: Write files directly in the project root (e.g. write_file('game.js', ...)), NOT in a src/ subdirectory.")

        # Include any additional metadata fields that callers may add
        extra_keys = {k for k in metadata if k not in {"sprint", "risk", "files"}}
        for key in sorted(extra_keys):
            value = metadata[key]
            if isinstance(value, (str, int, float, bool)):
                lines.append(f"{key.capitalize()}: {value}")

        return "\n".join(lines)

    def _build_section_context(self, context: dict[str, Any] | None) -> str:
        """Section 4 — Upstream artifacts and prior wave outputs."""
        if not context:
            return ""

        parts: list[str] = []
        for key, value in context.items():
            text = str(value)
            if len(text) > _CONTEXT_ITEM_MAX_CHARS:
                text = (
                    text[:_CONTEXT_ITEM_MAX_CHARS]
                    + f"\n... [truncated: {len(text)} chars total]"
                )
            if key == "project-spec":
                parts.insert(0, f"## Project Specification (implement according to this)\n{text}")
            else:
                parts.append(f"## Context: {key}\n{text}")

        return "\n\n".join(parts)

    def _build_section_tools(
        self,
        tool_policy: dict[str, Any],
        formation: dict[str, Any],
    ) -> str:
        """Section 5 — Available tools list + restrictions."""
        # Merge tool_policy with formation-level overrides
        available: list[str] = tool_policy.get("available", [])
        restricted: list[str] = tool_policy.get("restricted", [])

        # Formation may specify additional restrictions
        formation_restricted: list[str] = (
            formation.get("tool_policy", {}).get("restricted", [])
        )
        all_restricted = sorted(set(restricted) | set(formation_restricted))

        lines = ["## Available Tools"]
        if available:
            lines.append(f"You have access to: {', '.join(available)}")
        else:
            lines.append(
                "You have access to: read_file, write_file, edit_file, bash, "
                "glob_files, grep"
            )
        if all_restricted:
            lines.append(f"Restricted tools: {', '.join(all_restricted)}")
        else:
            lines.append("Restricted tools: none")

        return "\n".join(lines)

    def _build_section_constraints(self, formation: dict[str, Any]) -> str:
        """Section 6 — File ownership + forbidden paths + rules."""
        ownership: list[str] = formation.get("ownership", [])
        forbidden: list[str] = formation.get("forbidden_paths", [])

        lines = ["## Constraints"]

        if ownership:
            ownership_str = ", ".join(ownership)
            lines.append(f"- Only modify files in: {ownership_str}")
        else:
            lines.append("- Only modify files within the project root")

        lines.append("- Do NOT modify files outside your ownership")

        if forbidden:
            for path in forbidden:
                lines.append(f"- Do NOT touch: {path}")

        lines.append("- Read existing files before editing")
        lines.append("- Report blockers instead of guessing")

        return "\n".join(lines)

    def _build_section_output(self) -> str:
        """Section 7 — Expected deliverables."""
        return (
            "## How to Work\n"
            "- You MUST call write_file/append_file tools to create files. Do NOT just describe code in text.\n"
            "- For files >80 lines: write_file the first ~80 lines, then call append_file repeatedly for remaining sections.\n"
            "- To modify an existing file: call read_file first, then edit_file or replace_lines.\n"
            "- CRITICAL: Write COMPLETE, WORKING implementations — never stubs, never `// TODO`, never empty function bodies.\n"
            "  Every function must have a full implementation. If the spec says 'scanline rendering', write the actual for-loop.\n"
            "- Read the Project Specification carefully — it contains exact constants, colors, and algorithm details.\n"
            "- When finished, briefly state which files you created/modified."
        )

    # ── Frontmatter parsing ───────────────────────────────────────────────────

    def _parse_frontmatter(self, raw: str) -> tuple[dict, str]:
        """Parse YAML frontmatter between --- markers.

        Returns (frontmatter_dict, body_markdown).
        Falls back to empty dict if frontmatter is malformed or yaml is absent.
        """
        raw = raw.lstrip()
        if not raw.startswith("---"):
            return {}, raw

        # Find closing ---
        end_marker = raw.find("\n---", 3)
        if end_marker == -1:
            return {}, raw

        fm_text = raw[3:end_marker].strip()
        body = raw[end_marker + 4:].lstrip("\n")

        if _YAML_AVAILABLE:
            try:
                parsed = _yaml.safe_load(fm_text)
                if isinstance(parsed, dict):
                    return parsed, body
            except _yaml.YAMLError:
                pass
        else:
            # Simple key: value parser (no nesting, no lists)
            result: dict[str, str] = {}
            for line in fm_text.splitlines():
                if ":" in line:
                    k, _, v = line.partition(":")
                    result[k.strip()] = v.strip()
            if result:
                return result, body

        return {}, body