Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
c5e3ebf
Add Gemini API client + MCP server for automated code review
junjihashimoto Jun 27, 2026
fe9d7a1
Gemini.lean: translate doc comments to English
junjihashimoto Jun 28, 2026
182470e
Gemini.lean: translate prompt strings to English
junjihashimoto Jun 28, 2026
81e2b67
LLM chat apps — CLI / TUI / Web demos over a generic MCP orchestrator
junjihashimoto Jun 28, 2026
f73d784
chat: image attachments + per-session history persistence
junjihashimoto Jun 28, 2026
7ce7b1c
chat: interactive allow/deny policy for tool calls
junjihashimoto Jun 28, 2026
628f3ce
coder_mcp_serve — file + shell tools for coding agents
junjihashimoto Jun 28, 2026
ad56f17
agent_dashboard_serve — Playbook + Conductor + Web UI
junjihashimoto Jun 28, 2026
67cfbf9
chuhan_serve — 楚漢恋歌 MVP (Act 1, six routes, two-layer dialogue)
junjihashimoto Jun 28, 2026
7f7d134
chuhan: Act 2 — 楚漢戦争 across all six routes + hidden 韓信西征 END
junjihashimoto Jun 28, 2026
e61e888
chuhan: 3 mini-games (面相 / 玉玦三度 / 背水之陣) + multi-slot save
junjihashimoto Jun 28, 2026
7fd5909
chuhan: LLM TRPG — free-form NPC chat with character cards
junjihashimoto Jun 28, 2026
358094b
chuhan: complete the mini-game and LLM chat coverage
junjihashimoto Jun 28, 2026
62a7c97
chuhan: Act 3 — 劉邦 + 項羽 完結 (TRUE END)
junjihashimoto Jun 28, 2026
1a966c8
chuhan: Act 3 — 韓信 / 張良 / 蕭何 完結 (全 6 ルート完成)
junjihashimoto Jun 28, 2026
8333dcd
chuhan: migrate save-menu JS → LeanJs via localStorage externs
junjihashimoto Jun 28, 2026
2a3eb66
chuhan: ending gallery + test mode + README
junjihashimoto Jun 28, 2026
8463fae
chuhan: photorealistic assets via ComfyUI FLUX schnell
junjihashimoto Jun 28, 2026
f7aa924
chuhan: 9 dramatic ending images + cinematic ending screen
junjihashimoto Jun 28, 2026
edbb409
ChuHan: use photorealistic portraits in character-select grid
junjihashimoto Jun 28, 2026
d684ed8
ChuHan: scene-driven BGM with cross-fading audio runtime
junjihashimoto Jun 28, 2026
28a2090
ChuHan: ship BGM + SFX assets and wire UI sound triggers
junjihashimoto Jun 29, 2026
b693aac
ChuHan: fix initState call — it's a value, not a function
junjihashimoto Jun 29, 2026
238a202
ChuHan: switch data-msg attributes to JSON.stringify via msgAttr helper
junjihashimoto Jun 29, 2026
5b6f7b9
ChuHan: lift choice buttons above dialogue box
junjihashimoto Jun 29, 2026
3e857d3
ChuHan: unblock clicks under .scene-bg gradient + add 呂公 portrait
junjihashimoto Jun 29, 2026
ab72be6
ChuHan: unified button kit (LeanJs helper + .btn-KIND CSS + sfx table)
junjihashimoto Jun 29, 2026
83d1be5
ChuHan: panel + portrait composability helpers
junjihashimoto Jun 29, 2026
a79e923
ChuHan: 子嬰 portrait + startup asset audit
junjihashimoto Jun 29, 2026
fc08d8d
ChuHan: track MISSING_ASSETS manifest (audit output)
junjihashimoto Jun 29, 2026
114db4a
ChuHan: compile-time asset check + speaker coverage audit
junjihashimoto Jun 29, 2026
1d73515
ChuHan: aiResolve TRPG step — free-text action + LLM-judged branch
junjihashimoto Jun 29, 2026
b7d9fa3
ChuHan: fix frozen jade_ring mini after .panel-mini rename
junjihashimoto Jun 29, 2026
8e48739
meta_orchestrator: Gemini-driven PM watching a Claude Code zellij pane
junjihashimoto Jun 30, 2026
fbfe5c7
meta_orchestrator: persist memos + --resume across runs
junjihashimoto Jun 30, 2026
398f5c0
meta_orchestrator: multi-agent runtime + JSON config + slash commands
junjihashimoto Jun 30, 2026
bfc2358
LeanTea.Tui: brick-style widget kit + pure test harness
junjihashimoto Jun 30, 2026
16d3412
meta_orchestrator: full-screen TUI on LeanTea.Tui
junjihashimoto Jul 1, 2026
b8646e2
llm_chat_tui: render half rebuilt on LeanTea.Tui widget kit
junjihashimoto Jul 1, 2026
9e2cfb9
ChuHan: hpBar + ringBar + btnRow + dialogue helpers
junjihashimoto Jul 1, 2026
712e354
meta_orchestrator: pluggable LLM backends + /review role routing
junjihashimoto Jul 1, 2026
478f3e2
Split ChuHan game + meta_orchestrator into their own repos
junjihashimoto Jul 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions LeanTea.lean
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import LeanTea.Net.Desktop
import LeanTea.Net.Memcached
import LeanTea.Net.Valkey
import LeanTea.Cloud.S3
import LeanTea.Cloud.Gemini
import LeanTea.Auth
import LeanTea.Js
import LeanTea.Css
Expand All @@ -40,9 +41,13 @@ import LeanTea.WebSpec
import LeanTea.Template
import LeanTea.Markdown
import LeanTea.Llm.Openai
import LeanTea.Llm.McpOrchestrator
import LeanTea.Llm.ChatStore
import LeanTea.Browser
import LeanTea.Agent.Memory
import LeanTea.Agent.Script
import LeanTea.Agent.Playbook
import LeanTea.Agent.Conductor
import LeanTea.Crypto.Base64
import LeanTea.Crypto.Sha256
import LeanTea.Crypto.Hmac
Expand Down
347 changes: 347 additions & 0 deletions LeanTea/Agent/Conductor.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,347 @@
import LeanTea.Agent.Playbook
import LeanTea.Llm.McpOrchestrator
import Lean.Data.Json

/-! # LeanTea.Agent.Conductor — observe / pick / run / record loop

Sits on top of `LeanTea.Llm.McpOrchestrator` (for MCP tool calls)
and `LeanTea.Agent.Playbook` (for the routines to schedule). The
loop is:

```
loop:
if paused or aborted: bail
obs ← observer(orch)
candidates ← playbooks.filter (·.pre.matches obs.screen ∧ ·.enabled)
candidates ← drop those that hit `maxBurst`
if candidates.empty:
decision ← escalator { reason := "no_match", observation := obs, ... }
apply decision
else:
pb ← bandit.select(candidates, stats)
try:
outcome ← runPlaybook(pb)
stats.record(pb, outcome.success, outcome.reward)
catch ex:
decision ← escalator { reason := "exception", ..., error? := some ex }
apply decision
```

The bandit is UCB1 over `Stats.avgReward + sqrt(2 ln N / n)`,
with the playbook's `estReward` as the optimistic prior used until
the first run is recorded. Same shape as the one used in most
contextual-bandit demos. -/

namespace LeanTea.Agent.Conductor

open Lean (Json)
open LeanTea.Agent.Playbook
open LeanTea.Llm.McpOrchestrator

/-! ## Observation -/

structure Observation where
/-- Identifier the conductor matches preconditions against. -/
screen : String
/-- Optional data URL screenshot, surfaced to the dashboard. -/
screenshot : String := ""
/-- Free-form context the observer can attach (entity counts,
energy bar, anything the playbooks might read in the future). -/
extra : Json := Json.null
deriving Inhabited

/-- An observer derives the current state from the live MCP world.

Common implementations:
* `dom.querySelector(...).textContent` via `browser__browser_evaluate`
* a static value (when the conductor drives a single-screen game)
* an LLM classifier given a screenshot

Keeping this pluggable means a dashboard can swap classifiers
without restarting the conductor process. -/
def Observer := Orchestrator → IO Observation

/-! ## Run outcome -/

structure RunOutcome where
success : Bool
reward : Float := 0.0
durationMs : Nat := 0
/-- The MCP tool responses, in execution order. Useful for the
dashboard's "live" pane. -/
steps : Array Json := #[]
error? : Option String := none
deriving Inhabited

/-! ## Step executor

We translate `LeanTea.Agent.Script.Action` into orchestrator
`callTool` invocations. Only the actions we actually need for v1
are handled; everything else routes through `.toolCall` which is
just "call any MCP tool with these args". -/

open LeanTea.Agent.Script in
private def runAction (orch : Orchestrator) (a : Action) : IO Json := do
match a with
| .clickXy x y =>
orch.callTool "browser__browser_click_xy" <| Json.mkObj [
("x", Json.num (Int.ofNat x)),
("y", Json.num (Int.ofNat y))
]
| .clickKnown _ =>
/- v1 doesn't wire Memory.recall here yet; surface a clear error so
the calling playbook switches to clickXy. -/
throw <| IO.userError "clickKnown is not yet supported by the v1 Conductor — \
use clickXy or wrap the lookup in a tool_call to ui_recall"
| .wait ms =>
IO.sleep ms.toUInt32
return Json.mkObj [("waited", Json.num (Int.ofNat ms))]
| .screenshot _ =>
orch.callTool "browser__browser_screenshot" (Json.mkObj [])
| .waitForScreen _ timeoutMs =>
/- Best-effort: we just sleep. A future version can poll the
observer until the screen tag matches. -/
IO.sleep timeoutMs.toUInt32
return Json.mkObj [("waited_for_screen", Json.bool true)]
| .toolCall tool args _ _ =>
orch.callTool tool args

private def runPlaybookSteps (orch : Orchestrator) (pb : Playbook)
: IO (Bool × Array Json × Option String) := do
let mut acc : Array Json := #[]
for s in pb.script.steps do
try
let r ← runAction orch s.act
acc := acc.push r
catch e =>
return (false, acc, some s!"{e}")
return (true, acc, none)

/-- Extract the `reward` field from the last step's result, if any.
Playbooks that want to report a custom reward should make their
last action a `tool_call` whose response carries a number under
`reward`; otherwise a successful run is worth `pb.estReward`. -/
private def rewardFromSteps (pb : Playbook) (steps : Array Json) : Float :=
match steps.back? with
| none => pb.estReward
| some last =>
match (last.getObjVal? "reward").toOption with
| some (.num n) => n.mantissa.toNat.toFloat
| some (.str s) =>
/- Reuse Playbook's tiny float parser by going through Json. -/
match s.toNat? with
| some n => n.toFloat
| none => pb.estReward
| _ => pb.estReward

/-! ## UCB1 selection

`score = avgReward + c * sqrt(ln totalRuns / n)` with c=√2. Playbooks
that haven't been run yet are scored with their `estReward` plus a
big bonus so each gets tried once. -/

private def ucbScore (s : Stats) (estReward : Float) (totalRuns : Nat) : Float :=
if s.runs == 0 then
estReward + 1e6 -- enormous explore bonus; rank above any played arm
else
let avg := s.avgReward
let nFlt := s.runs.toFloat
let totl := (totalRuns.max 1).toFloat
let bonus := (1.41421356 : Float) * Float.sqrt (Float.log totl / nFlt)
avg + bonus

/-- Pick the best candidate by UCB1. -/
def selectPlaybook (candidates : Array (Playbook × Stats)) (totalRuns : Nat)
: Option Playbook :=
if candidates.isEmpty then none
else
let scored := candidates.map fun (p, s) => (p, ucbScore s p.estReward totalRuns)
let best := scored.foldl
(init := scored[0]!)
(fun acc cur => if cur.2 > acc.2 then cur else acc)
some best.1

/-! ## Escalation -/

structure EscalationCtx where
reason : String -- "no_match" | "exception" | "burst_cap" | "manual"
observation : Observation
playbook? : Option Playbook := none
error? : Option String := none
/-- Recent (id, success) outcomes for context. -/
recent : Array (String × Bool) := #[]
deriving Inhabited

inductive EscalationDecision where
| retry
| usePlaybook (id : String)
| skipFor (ms : Nat)
| pause
| abort
deriving Repr, Inhabited

def Escalator := EscalationCtx → IO EscalationDecision

/-- Default escalator: log to stderr + `LeanTea.Agent.Memory`
escalations.jsonl, then `skipFor 2000` to try again later. Good
enough for the "headless smoke" case. -/
def defaultEscalator : Escalator := fun ctx => do
IO.eprintln s!"⚠ escalation: reason={ctx.reason} screen={ctx.observation.screen}"
if let some e := ctx.error? then IO.eprintln s!" error: {e}"
return .skipFor 2000

/-! ## Live state -/

/-- One row of the run history, suitable for the dashboard timeline. -/
structure HistoryEntry where
ts : Nat
playbookId : String
success : Bool
reward : Float
durationMs : Nat
deriving Inhabited

structure LiveState where
/-- Currently running playbook id, or empty when idle. -/
current : String := ""
observation : Observation := { screen := "(unknown)" }
/-- Recent history (cap ~ 200 entries). -/
history : Array HistoryEntry := #[]
stats : Std.HashMap String Stats := ∅
startedAtMs : Nat := 0
cumReward : Float := 0.0
/-- Count of consecutive runs of the same playbook. Reset on switch
/ failure; used to enforce `maxBurst`. -/
burstId : String := ""
burstCount : Nat := 0
deriving Inhabited

/-! ## Config -/

structure Config where
orch : Orchestrator
observer : Observer
escalator : Escalator := defaultEscalator
storeDir : String
/-- Time to sleep between iterations of the loop (ms). -/
tickMs : Nat := 250
/-- Flag the dashboard flips to pause/unpause. -/
paused : IO.Ref Bool
/-- Flag the dashboard flips to abort entirely. -/
aborted : IO.Ref Bool

/-- Build a fresh `Config` with default control refs. -/
def Config.mk' (orch : Orchestrator) (observer : Observer) (storeDir : String)
(escalator : Escalator := defaultEscalator) (tickMs : Nat := 250)
: IO Config := do
let paused ← IO.mkRef false
let aborted ← IO.mkRef false
return { orch, observer, escalator, storeDir, tickMs, paused, aborted }

/-! ## One step -/

private def applyDecision (cfg : Config) (decision : EscalationDecision) : IO Unit := do
match decision with
| .retry => pure ()
| .usePlaybook _ => pure () -- next iteration will see fresh stats
| .skipFor ms => IO.sleep ms.toUInt32
| .pause => cfg.paused.set true
| .abort => cfg.aborted.set true

private def recordOutcome (cfg : Config) (state : IO.Ref LiveState)
(pb : Playbook) (outcome : RunOutcome) : IO Unit := do
let now ← IO.monoMsNow
let st ← state.get
let cur := (st.stats.get? pb.id).getD {}
let cur := cur.record outcome.success outcome.reward now
let stats := st.stats.insert pb.id cur
let entry : HistoryEntry := {
ts := now, playbookId := pb.id,
success := outcome.success, reward := outcome.reward,
durationMs := outcome.durationMs
}
let hist := st.history.push entry
let hist := if hist.size > 200 then hist.extract (hist.size - 200) hist.size else hist
let burstCount :=
if pb.id == st.burstId then st.burstCount + 1 else 1
state.set { st with
current := "",
history := hist,
stats,
cumReward := st.cumReward + outcome.reward,
burstId := pb.id,
burstCount
}
/- Persist stats so a conductor restart doesn't lose the bandit
history. Cheap — written every tick at most. -/
saveAllStats cfg.storeDir stats

private def runPlaybook (cfg : Config) (state : IO.Ref LiveState) (pb : Playbook)
: IO Unit := do
let now ← IO.monoMsNow
state.modify (fun s => { s with current := pb.id })
let (ok, steps, err?) ← runPlaybookSteps cfg.orch pb
let later ← IO.monoMsNow
let outcome : RunOutcome := {
success := ok,
reward := if ok then rewardFromSteps pb steps else 0.0,
durationMs := later - now,
steps,
error? := err?
}
recordOutcome cfg state pb outcome
unless ok do
let st ← state.get
let ctx : EscalationCtx := {
reason := "exception",
observation := st.observation,
playbook? := some pb,
error? := outcome.error?,
recent := st.history.toList.reverse.take 5
|>.map (fun h => (h.playbookId, h.success))
|>.toArray
}
let decision ← cfg.escalator ctx
applyDecision cfg decision

def tick (cfg : Config) (state : IO.Ref LiveState) : IO Unit := do
if ← cfg.aborted.get then return
if ← cfg.paused.get then
IO.sleep cfg.tickMs.toUInt32
return
/- Observe. -/
let obs ← cfg.observer cfg.orch
state.modify (fun s => { s with observation := obs })
/- Load playbooks (cheap; live reload). -/
let playbooks ← listPlaybooks cfg.storeDir
let st ← state.get
let candidates := playbooks.filter fun pb =>
pb.enabled
&& pb.pre.matches obs.screen
&& (pb.maxBurst == 0 || pb.id != st.burstId || st.burstCount < pb.maxBurst)
if candidates.isEmpty then
let ctx : EscalationCtx := {
reason := "no_match", observation := obs,
recent := st.history.toList.reverse.take 5
|>.map (fun h => (h.playbookId, h.success))
|>.toArray
}
let decision ← cfg.escalator ctx
applyDecision cfg decision
return
let totalRuns := st.history.size
let scored := candidates.map fun pb =>
let s := (st.stats.get? pb.id).getD {}
(pb, s)
match selectPlaybook scored totalRuns with
| none => IO.sleep cfg.tickMs.toUInt32
| some pb => runPlaybook cfg state pb

partial def runLoop (cfg : Config) (state : IO.Ref LiveState) : IO Unit := do
if ← cfg.aborted.get then return
try tick cfg state
catch e => IO.eprintln s!"conductor: tick threw: {e}"
IO.sleep cfg.tickMs.toUInt32
runLoop cfg state

end LeanTea.Agent.Conductor
Loading
Loading