diff --git a/LeanTea.lean b/LeanTea.lean index eba8d1b..906b559 100644 --- a/LeanTea.lean +++ b/LeanTea.lean @@ -28,6 +28,7 @@ import LeanTea.Net.Desktop import LeanTea.Net.Memcached import LeanTea.Net.Valkey import LeanTea.Cloud.S3 +import LeanTea.Cloud.Gemini import LeanTea.Auth import LeanTea.Js import LeanTea.Css @@ -40,9 +41,13 @@ import LeanTea.WebSpec import LeanTea.Template import LeanTea.Markdown import LeanTea.Llm.Openai +import LeanTea.Llm.McpOrchestrator +import LeanTea.Llm.ChatStore import LeanTea.Browser import LeanTea.Agent.Memory import LeanTea.Agent.Script +import LeanTea.Agent.Playbook +import LeanTea.Agent.Conductor import LeanTea.Crypto.Base64 import LeanTea.Crypto.Sha256 import LeanTea.Crypto.Hmac diff --git a/LeanTea/Agent/Conductor.lean b/LeanTea/Agent/Conductor.lean new file mode 100644 index 0000000..b2c584f --- /dev/null +++ b/LeanTea/Agent/Conductor.lean @@ -0,0 +1,347 @@ +import LeanTea.Agent.Playbook +import LeanTea.Llm.McpOrchestrator +import Lean.Data.Json + +/-! # LeanTea.Agent.Conductor — observe / pick / run / record loop + +Sits on top of `LeanTea.Llm.McpOrchestrator` (for MCP tool calls) +and `LeanTea.Agent.Playbook` (for the routines to schedule). The +loop is: + +``` +loop: + if paused or aborted: bail + obs ← observer(orch) + candidates ← playbooks.filter (·.pre.matches obs.screen ∧ ·.enabled) + candidates ← drop those that hit `maxBurst` + if candidates.empty: + decision ← escalator { reason := "no_match", observation := obs, ... } + apply decision + else: + pb ← bandit.select(candidates, stats) + try: + outcome ← runPlaybook(pb) + stats.record(pb, outcome.success, outcome.reward) + catch ex: + decision ← escalator { reason := "exception", ..., error? := some ex } + apply decision +``` + +The bandit is UCB1 over `Stats.avgReward + sqrt(2 ln N / n)`, +with the playbook's `estReward` as the optimistic prior used until +the first run is recorded. Same shape as the one used in most +contextual-bandit demos. -/ + +namespace LeanTea.Agent.Conductor + +open Lean (Json) +open LeanTea.Agent.Playbook +open LeanTea.Llm.McpOrchestrator + +/-! ## Observation -/ + +structure Observation where + /-- Identifier the conductor matches preconditions against. -/ + screen : String + /-- Optional data URL screenshot, surfaced to the dashboard. -/ + screenshot : String := "" + /-- Free-form context the observer can attach (entity counts, + energy bar, anything the playbooks might read in the future). -/ + extra : Json := Json.null + deriving Inhabited + +/-- An observer derives the current state from the live MCP world. + + Common implementations: + * `dom.querySelector(...).textContent` via `browser__browser_evaluate` + * a static value (when the conductor drives a single-screen game) + * an LLM classifier given a screenshot + + Keeping this pluggable means a dashboard can swap classifiers + without restarting the conductor process. -/ +def Observer := Orchestrator → IO Observation + +/-! ## Run outcome -/ + +structure RunOutcome where + success : Bool + reward : Float := 0.0 + durationMs : Nat := 0 + /-- The MCP tool responses, in execution order. Useful for the + dashboard's "live" pane. -/ + steps : Array Json := #[] + error? : Option String := none + deriving Inhabited + +/-! ## Step executor + +We translate `LeanTea.Agent.Script.Action` into orchestrator +`callTool` invocations. Only the actions we actually need for v1 +are handled; everything else routes through `.toolCall` which is +just "call any MCP tool with these args". -/ + +open LeanTea.Agent.Script in +private def runAction (orch : Orchestrator) (a : Action) : IO Json := do + match a with + | .clickXy x y => + orch.callTool "browser__browser_click_xy" <| Json.mkObj [ + ("x", Json.num (Int.ofNat x)), + ("y", Json.num (Int.ofNat y)) + ] + | .clickKnown _ => + /- v1 doesn't wire Memory.recall here yet; surface a clear error so + the calling playbook switches to clickXy. -/ + throw <| IO.userError "clickKnown is not yet supported by the v1 Conductor — \ +use clickXy or wrap the lookup in a tool_call to ui_recall" + | .wait ms => + IO.sleep ms.toUInt32 + return Json.mkObj [("waited", Json.num (Int.ofNat ms))] + | .screenshot _ => + orch.callTool "browser__browser_screenshot" (Json.mkObj []) + | .waitForScreen _ timeoutMs => + /- Best-effort: we just sleep. A future version can poll the + observer until the screen tag matches. -/ + IO.sleep timeoutMs.toUInt32 + return Json.mkObj [("waited_for_screen", Json.bool true)] + | .toolCall tool args _ _ => + orch.callTool tool args + +private def runPlaybookSteps (orch : Orchestrator) (pb : Playbook) + : IO (Bool × Array Json × Option String) := do + let mut acc : Array Json := #[] + for s in pb.script.steps do + try + let r ← runAction orch s.act + acc := acc.push r + catch e => + return (false, acc, some s!"{e}") + return (true, acc, none) + +/-- Extract the `reward` field from the last step's result, if any. + Playbooks that want to report a custom reward should make their + last action a `tool_call` whose response carries a number under + `reward`; otherwise a successful run is worth `pb.estReward`. -/ +private def rewardFromSteps (pb : Playbook) (steps : Array Json) : Float := + match steps.back? with + | none => pb.estReward + | some last => + match (last.getObjVal? "reward").toOption with + | some (.num n) => n.mantissa.toNat.toFloat + | some (.str s) => + /- Reuse Playbook's tiny float parser by going through Json. -/ + match s.toNat? with + | some n => n.toFloat + | none => pb.estReward + | _ => pb.estReward + +/-! ## UCB1 selection + +`score = avgReward + c * sqrt(ln totalRuns / n)` with c=√2. Playbooks +that haven't been run yet are scored with their `estReward` plus a +big bonus so each gets tried once. -/ + +private def ucbScore (s : Stats) (estReward : Float) (totalRuns : Nat) : Float := + if s.runs == 0 then + estReward + 1e6 -- enormous explore bonus; rank above any played arm + else + let avg := s.avgReward + let nFlt := s.runs.toFloat + let totl := (totalRuns.max 1).toFloat + let bonus := (1.41421356 : Float) * Float.sqrt (Float.log totl / nFlt) + avg + bonus + +/-- Pick the best candidate by UCB1. -/ +def selectPlaybook (candidates : Array (Playbook × Stats)) (totalRuns : Nat) + : Option Playbook := + if candidates.isEmpty then none + else + let scored := candidates.map fun (p, s) => (p, ucbScore s p.estReward totalRuns) + let best := scored.foldl + (init := scored[0]!) + (fun acc cur => if cur.2 > acc.2 then cur else acc) + some best.1 + +/-! ## Escalation -/ + +structure EscalationCtx where + reason : String -- "no_match" | "exception" | "burst_cap" | "manual" + observation : Observation + playbook? : Option Playbook := none + error? : Option String := none + /-- Recent (id, success) outcomes for context. -/ + recent : Array (String × Bool) := #[] + deriving Inhabited + +inductive EscalationDecision where + | retry + | usePlaybook (id : String) + | skipFor (ms : Nat) + | pause + | abort + deriving Repr, Inhabited + +def Escalator := EscalationCtx → IO EscalationDecision + +/-- Default escalator: log to stderr + `LeanTea.Agent.Memory` + escalations.jsonl, then `skipFor 2000` to try again later. Good + enough for the "headless smoke" case. -/ +def defaultEscalator : Escalator := fun ctx => do + IO.eprintln s!"⚠ escalation: reason={ctx.reason} screen={ctx.observation.screen}" + if let some e := ctx.error? then IO.eprintln s!" error: {e}" + return .skipFor 2000 + +/-! ## Live state -/ + +/-- One row of the run history, suitable for the dashboard timeline. -/ +structure HistoryEntry where + ts : Nat + playbookId : String + success : Bool + reward : Float + durationMs : Nat + deriving Inhabited + +structure LiveState where + /-- Currently running playbook id, or empty when idle. -/ + current : String := "" + observation : Observation := { screen := "(unknown)" } + /-- Recent history (cap ~ 200 entries). -/ + history : Array HistoryEntry := #[] + stats : Std.HashMap String Stats := ∅ + startedAtMs : Nat := 0 + cumReward : Float := 0.0 + /-- Count of consecutive runs of the same playbook. Reset on switch + / failure; used to enforce `maxBurst`. -/ + burstId : String := "" + burstCount : Nat := 0 + deriving Inhabited + +/-! ## Config -/ + +structure Config where + orch : Orchestrator + observer : Observer + escalator : Escalator := defaultEscalator + storeDir : String + /-- Time to sleep between iterations of the loop (ms). -/ + tickMs : Nat := 250 + /-- Flag the dashboard flips to pause/unpause. -/ + paused : IO.Ref Bool + /-- Flag the dashboard flips to abort entirely. -/ + aborted : IO.Ref Bool + +/-- Build a fresh `Config` with default control refs. -/ +def Config.mk' (orch : Orchestrator) (observer : Observer) (storeDir : String) + (escalator : Escalator := defaultEscalator) (tickMs : Nat := 250) + : IO Config := do + let paused ← IO.mkRef false + let aborted ← IO.mkRef false + return { orch, observer, escalator, storeDir, tickMs, paused, aborted } + +/-! ## One step -/ + +private def applyDecision (cfg : Config) (decision : EscalationDecision) : IO Unit := do + match decision with + | .retry => pure () + | .usePlaybook _ => pure () -- next iteration will see fresh stats + | .skipFor ms => IO.sleep ms.toUInt32 + | .pause => cfg.paused.set true + | .abort => cfg.aborted.set true + +private def recordOutcome (cfg : Config) (state : IO.Ref LiveState) + (pb : Playbook) (outcome : RunOutcome) : IO Unit := do + let now ← IO.monoMsNow + let st ← state.get + let cur := (st.stats.get? pb.id).getD {} + let cur := cur.record outcome.success outcome.reward now + let stats := st.stats.insert pb.id cur + let entry : HistoryEntry := { + ts := now, playbookId := pb.id, + success := outcome.success, reward := outcome.reward, + durationMs := outcome.durationMs + } + let hist := st.history.push entry + let hist := if hist.size > 200 then hist.extract (hist.size - 200) hist.size else hist + let burstCount := + if pb.id == st.burstId then st.burstCount + 1 else 1 + state.set { st with + current := "", + history := hist, + stats, + cumReward := st.cumReward + outcome.reward, + burstId := pb.id, + burstCount + } + /- Persist stats so a conductor restart doesn't lose the bandit + history. Cheap — written every tick at most. -/ + saveAllStats cfg.storeDir stats + +private def runPlaybook (cfg : Config) (state : IO.Ref LiveState) (pb : Playbook) + : IO Unit := do + let now ← IO.monoMsNow + state.modify (fun s => { s with current := pb.id }) + let (ok, steps, err?) ← runPlaybookSteps cfg.orch pb + let later ← IO.monoMsNow + let outcome : RunOutcome := { + success := ok, + reward := if ok then rewardFromSteps pb steps else 0.0, + durationMs := later - now, + steps, + error? := err? + } + recordOutcome cfg state pb outcome + unless ok do + let st ← state.get + let ctx : EscalationCtx := { + reason := "exception", + observation := st.observation, + playbook? := some pb, + error? := outcome.error?, + recent := st.history.toList.reverse.take 5 + |>.map (fun h => (h.playbookId, h.success)) + |>.toArray + } + let decision ← cfg.escalator ctx + applyDecision cfg decision + +def tick (cfg : Config) (state : IO.Ref LiveState) : IO Unit := do + if ← cfg.aborted.get then return + if ← cfg.paused.get then + IO.sleep cfg.tickMs.toUInt32 + return + /- Observe. -/ + let obs ← cfg.observer cfg.orch + state.modify (fun s => { s with observation := obs }) + /- Load playbooks (cheap; live reload). -/ + let playbooks ← listPlaybooks cfg.storeDir + let st ← state.get + let candidates := playbooks.filter fun pb => + pb.enabled + && pb.pre.matches obs.screen + && (pb.maxBurst == 0 || pb.id != st.burstId || st.burstCount < pb.maxBurst) + if candidates.isEmpty then + let ctx : EscalationCtx := { + reason := "no_match", observation := obs, + recent := st.history.toList.reverse.take 5 + |>.map (fun h => (h.playbookId, h.success)) + |>.toArray + } + let decision ← cfg.escalator ctx + applyDecision cfg decision + return + let totalRuns := st.history.size + let scored := candidates.map fun pb => + let s := (st.stats.get? pb.id).getD {} + (pb, s) + match selectPlaybook scored totalRuns with + | none => IO.sleep cfg.tickMs.toUInt32 + | some pb => runPlaybook cfg state pb + +partial def runLoop (cfg : Config) (state : IO.Ref LiveState) : IO Unit := do + if ← cfg.aborted.get then return + try tick cfg state + catch e => IO.eprintln s!"conductor: tick threw: {e}" + IO.sleep cfg.tickMs.toUInt32 + runLoop cfg state + +end LeanTea.Agent.Conductor diff --git a/LeanTea/Agent/Playbook.lean b/LeanTea/Agent/Playbook.lean new file mode 100644 index 0000000..4f2c5e5 --- /dev/null +++ b/LeanTea/Agent/Playbook.lean @@ -0,0 +1,305 @@ +import LeanTea.Agent.Script +import Lean.Data.Json + +/-! # LeanTea.Agent.Playbook — named routine with precondition + reward + +A **playbook** is a `LeanTea.Agent.Script` plus the metadata a +conductor needs to pick it over its siblings: which screen it +applies to, how much reward we expect, how long it's allowed to +run. + +Playbooks are the unit the conductor schedules. The conductor: + + 1. observes the current screen + 2. picks a playbook whose `pre.whenScreen` matches (via a bandit + over `estReward` + recorded stats) + 3. runs its script via the MCP orchestrator + 4. records the outcome + +Persistence: one JSON file per playbook under +`/playbooks/.json`. Stats live in a sibling +`stats.json` so a conductor restart never loses the bandit history. -/ + +namespace LeanTea.Agent.Playbook + +open Lean (Json) + +/-! ## Precondition + +For v1 a precondition is just a tag match on the observed screen. +`"*"` matches anything. We keep this trivial on purpose — the +LLM-driven escalation handler is what handles complex conditions. -/ + +structure Precondition where + /-- Screen tag the playbook expects. Glob with `*` only — same + shape as `LeanTea.Llm.Policy`. -/ + whenScreen : String + deriving Inhabited, Repr + +private partial def globMatch (pat str : String) : Bool := + go pat.toList str.toList +where + go : List Char → List Char → Bool + | [], [] => true + | [], _ => false + | '*' :: ps, [] => go ps [] + | '*' :: ps, s@(_::ss) => go ps s || go ('*' :: ps) ss + | p :: ps, c :: cs => p == c && go ps cs + | _ :: _, [] => false + +def Precondition.matches (p : Precondition) (screen : String) : Bool := + globMatch p.whenScreen screen + +/-! ## Playbook -/ + +structure Playbook where + /-- File-stem ID (e.g. `daily_quest_easy`). Unique per store. -/ + id : String + /-- Human-readable name shown in the dashboard. -/ + name : String := "" + description : String := "" + pre : Precondition + /-- A priori expected reward; the bandit uses this as the + optimistic prior before any runs are recorded. -/ + estReward : Float := 1.0 + /-- Cap on consecutive runs of THIS same playbook. 0 = unlimited. + Use to avoid greedy loops where the agent just keeps re-running + the highest-reward routine and starves the rest. -/ + maxBurst : Nat := 0 + /-- Hard run timeout in ms. -/ + timeoutMs : Nat := 60000 + /-- Disable without deleting — easy way to bench a flaky playbook. -/ + enabled : Bool := true + /-- The script body. -/ + script : LeanTea.Agent.Script.Script + deriving Inhabited + +/-! ## Stats — what the bandit accumulates -/ + +structure Stats where + runs : Nat := 0 + wins : Nat := 0 + losses : Nat := 0 + /-- Sum of `RunOutcome.reward` across all runs. -/ + totalReward : Float := 0.0 + /-- monoMsNow of last execution. Used by the dashboard "last run". -/ + lastRunMs : Nat := 0 + /-- Last 20 outcomes — true = success, false = failure. The + dashboard uses this for a sparkline-style trend. -/ + recentWins : Array Bool := #[] + deriving Inhabited, Repr + +def Stats.avgReward (s : Stats) : Float := + if s.runs == 0 then 0.0 else s.totalReward / s.runs.toFloat + +def Stats.winRate (s : Stats) : Float := + if s.runs == 0 then 0.0 else s.wins.toFloat / s.runs.toFloat + +/-- Append one outcome, capping `recentWins` at 20. -/ +def Stats.record (s : Stats) (success : Bool) (reward : Float) (now : Nat) : Stats := + let w := s.recentWins.push success + let w := if w.size > 20 then w.extract (w.size - 20) w.size else w + { runs := s.runs + 1, + wins := s.wins + (if success then 1 else 0), + losses := s.losses + (if success then 0 else 1), + totalReward := s.totalReward + reward, + lastRunMs := now, + recentWins := w } + +/-! ## JSON codec -/ + +private def precondToJson (p : Precondition) : Json := + Json.mkObj [("whenScreen", Json.str p.whenScreen)] + +private def precondFromJson (j : Json) : Except String Precondition := do + let w ← (j.getObjVal? "whenScreen").bind (·.getStr?) + return { whenScreen := w } + +def Playbook.toJson (p : Playbook) : Json := + Json.mkObj [ + ("id", Json.str p.id), + ("name", Json.str p.name), + ("description", Json.str p.description), + ("pre", precondToJson p.pre), + ("estReward", Json.str (toString p.estReward)), + ("maxBurst", Json.num (Int.ofNat p.maxBurst)), + ("timeoutMs", Json.num (Int.ofNat p.timeoutMs)), + ("enabled", Json.bool p.enabled), + ("script", p.script.toJson) + ] + +private def getStr (j : Json) (k : String) : Except String String := + match j.getObjVal? k with + | .ok v => v.getStr? + | .error e => .error e + +private def getStrOpt (j : Json) (k : String) (default : String := "") : String := + (j.getObjVal? k).toOption.bind (·.getStr?.toOption) |>.getD default + +private def getNatOpt (j : Json) (k : String) (default : Nat := 0) : Nat := + match j.getObjVal? k with + | .ok (.num n) => n.mantissa.toNat + | _ => default + +private def getBoolOpt (j : Json) (k : String) (default : Bool := false) : Bool := + match j.getObjVal? k with + | .ok (.bool b) => b + | _ => default + +/-- Parse a decimal like `1.5` or `-0.25` to a Float. Tiny — only + handles the shapes we actually serialise. Returns `default` on + anything weird. -/ +private def parseFloat (s : String) (default : Float := 0.0) : Float := + let s := s.trimAscii.toString + if s.isEmpty then default + else + let (neg, body) := + if s.startsWith "-" then (true, (s.drop 1).toString) + else (false, s) + match body.splitOn "." with + | [whole] => + match whole.toNat? with + | some n => (if neg then -1.0 else 1.0) * n.toFloat + | none => default + | [whole, frac] => + let wn := whole.toNat?.getD 0 + let fn := frac.toNat?.getD 0 + let denom : Float := (10.0 : Float) ^ frac.length.toFloat + let v := wn.toFloat + (fn.toFloat / denom) + if neg then -v else v + | _ => default + +private def getFloatOpt (j : Json) (k : String) (default : Float := 0.0) : Float := + /- Float can land here as either a JSON number (e.g. `10.0`) or a + string (we serialise our own floats as strings to dodge JsonNumber + quirks). For the number case we round-trip through `toString` then + `parseFloat` so mantissa/exponent are handled correctly. -/ + match j.getObjVal? k with + | .ok (.str s) => parseFloat s default + | .ok (.num n) => parseFloat (toString n) default + | _ => default + +def Playbook.fromJson (j : Json) : Except String Playbook := do + let id ← getStr j "id" + let pre ← (j.getObjVal? "pre").bind precondFromJson + let scriptJ ← (j.getObjVal? "script") + let script ← LeanTea.Agent.Script.Script.fromJson scriptJ + return { + id, + name := getStrOpt j "name" id, + description := getStrOpt j "description", + pre, + estReward := getFloatOpt j "estReward" 1.0, + maxBurst := getNatOpt j "maxBurst", + timeoutMs := getNatOpt j "timeoutMs" 60000, + enabled := getBoolOpt j "enabled" true, + script + } + +def Stats.toJson (s : Stats) : Json := + Json.mkObj [ + ("runs", Json.num (Int.ofNat s.runs)), + ("wins", Json.num (Int.ofNat s.wins)), + ("losses", Json.num (Int.ofNat s.losses)), + ("totalReward", Json.str (toString s.totalReward)), + ("lastRunMs", Json.num (Int.ofNat s.lastRunMs)), + ("recentWins", Json.arr (s.recentWins.map Json.bool)) + ] + +def Stats.fromJson (j : Json) : Stats := + let recent := + match (j.getObjVal? "recentWins").toOption.bind (·.getArr?.toOption) with + | some arr => arr.filterMap fun v => + match v with | .bool b => some b | _ => none + | none => #[] + { + runs := getNatOpt j "runs", + wins := getNatOpt j "wins", + losses := getNatOpt j "losses", + totalReward := getFloatOpt j "totalReward", + lastRunMs := getNatOpt j "lastRunMs", + recentWins := recent + } + +/-! ## Disk -/ + +private def playbooksDir (storeDir : String) : String := s!"{storeDir}/playbooks" +private def statsPath (storeDir : String) : String := s!"{storeDir}/stats.json" + +private def ensureDir (dir : String) : IO Unit := do + unless (← System.FilePath.pathExists dir) do + IO.FS.createDirAll dir + +/-- Persist a single playbook. -/ +def Playbook.save (storeDir : String) (p : Playbook) : IO Unit := do + let dir := playbooksDir storeDir + ensureDir dir + IO.FS.writeFile s!"{dir}/{p.id}.json" p.toJson.pretty + +/-- Load a single playbook by id. -/ +def loadPlaybook (storeDir id : String) : IO (Option Playbook) := do + let path := s!"{playbooksDir storeDir}/{id}.json" + unless ← System.FilePath.pathExists path do return none + try + let src ← IO.FS.readFile path + match Json.parse src with + | .error _ => return none + | .ok j => + match Playbook.fromJson j with + | .ok p => return some p + | .error _ => return none + catch _ => return none + +/-- Load every playbook from disk. -/ +def listPlaybooks (storeDir : String) : IO (Array Playbook) := do + let dir := playbooksDir storeDir + ensureDir dir + let entries ← System.FilePath.readDir dir + let mut acc : Array Playbook := #[] + for e in entries do + if e.fileName.endsWith ".json" then + try + let src ← IO.FS.readFile e.path.toString + match Json.parse src with + | .ok j => + match Playbook.fromJson j with + | .ok p => acc := acc.push p + | .error _ => pure () + | .error _ => pure () + catch _ => pure () + return acc + +/-- Delete a playbook by id. Returns `true` if a file was removed. -/ +def deletePlaybook (storeDir id : String) : IO Bool := do + let path := s!"{playbooksDir storeDir}/{id}.json" + if ← System.FilePath.pathExists path then + IO.FS.removeFile path + return true + else + return false + +/-! ## Stats persistence (single file shared by all playbooks) -/ + +def loadAllStats (storeDir : String) : IO (Std.HashMap String Stats) := do + let path := statsPath storeDir + unless ← System.FilePath.pathExists path do return ∅ + try + let src ← IO.FS.readFile path + match Json.parse src with + | .error _ => return ∅ + | .ok j => + match j with + | .obj kvs => + let mut m : Std.HashMap String Stats := ∅ + for (k, v) in kvs.toList do + m := m.insert k (Stats.fromJson v) + return m + | _ => return ∅ + catch _ => return ∅ + +def saveAllStats (storeDir : String) (m : Std.HashMap String Stats) : IO Unit := do + ensureDir storeDir + let kvs := m.toList.map fun (k, v) => (k, Stats.toJson v) + IO.FS.writeFile (statsPath storeDir) (Json.mkObj kvs).pretty + +end LeanTea.Agent.Playbook diff --git a/LeanTea/Cloud/Gemini.lean b/LeanTea/Cloud/Gemini.lean new file mode 100644 index 0000000..2b0df10 --- /dev/null +++ b/LeanTea/Cloud/Gemini.lean @@ -0,0 +1,401 @@ +import LeanTea.Net.SafePath +import LeanTea.Json.Helpers +import Lean.Data.Json + +/-! # LeanTea.Cloud.Gemini — Google Gemini API client + +A tiny client for the `v1beta` REST surface of the Gemini API. +Lean has no native TLS so HTTPS goes through `curl(1)` — same +shape as `LeanTea.Auth.OAuth2`. + +## Models (as of 2026) + +| model | input ctx | use-case | +|---|---|---| +| `gemini-2.5-pro` (default) | 2,097,152 | heavy code / architecture review | +| `gemini-2.5-flash` | 1,048,576 | fast + cheap general-purpose | +| `gemini-2.5-flash-lite` | 1,048,576 | summarisation / cheap lint | + +For long-context use cases, `Gemini.reviewMany` bundles many +files through `SafePath` into a single Markdown prompt and ships +the lot in one request. + +## Auth + +Reads `GEMINI_API_KEY` from the environment. Issue a key at +https://aistudio.google.com/apikey. Free tier on `gemini-2.5-flash` +is 15 req/min / 1500 req/day. + +## Example + +```lean +let cfg ← Gemini.Config.fromEnv! +let resp ← Gemini.ask cfg "Hello, who are you?" {} +IO.println resp.text +IO.println s!"tokens in={resp.usage.input} out={resp.usage.output}" +``` + +```lean +-- Holistic multi-file review +let resp ← Gemini.reviewMany cfg + (workspace := "/path/to/repo") + (paths := #["LeanTea/Net/Http.lean", "LeanTea/Net/Server.lean"]) + (prompt := "Architecture review: call out design weaknesses") + {} +``` -/ + +namespace LeanTea.Cloud.Gemini + +open LeanTea.Net (SafePath) +open Lean (Json) + +/-! ## Config -/ + +structure Config where + apiKey : String + /-- Default `gemini-2.5-pro`. Override per-call via `CallOpts.model`. -/ + model : String := "gemini-2.5-pro" + endpoint : String := "https://generativelanguage.googleapis.com/v1beta" + /-- Max output tokens. `none` uses the model default. -/ + maxOutputTokens : Option Nat := none + /-- Temperature (`0.0` = deterministic). `none` uses the model default. -/ + temperature : Option Float := none + /-- curl timeout in seconds. Long-context generations can take a + while, so the default is 300s (5 min). -/ + timeoutSec : Nat := 300 + deriving Inhabited, Repr + +/-- Build a `Config` from environment variables. `GEMINI_API_KEY` + is required; `GEMINI_MODEL` / `GEMINI_ENDPOINT` are optional + overrides. -/ +def Config.fromEnv? : IO (Option Config) := do + match ← IO.getEnv "GEMINI_API_KEY" with + | none => return none + | some key => + let model := (← IO.getEnv "GEMINI_MODEL").getD "gemini-2.5-pro" + let endpoint := (← IO.getEnv "GEMINI_ENDPOINT").getD + "https://generativelanguage.googleapis.com/v1beta" + return some { apiKey := key, model, endpoint } + +/-- Panic variant — useful in CI to fail fast when the key is + missing rather than silently degrading. -/ +def Config.fromEnv! : IO Config := do + match ← Config.fromEnv? with + | some c => return c + | none => throw (IO.userError + "Gemini.Config.fromEnv!: GEMINI_API_KEY is not set") + +/-! ## Request / Response types -/ + +/-- One message in a multi-turn chat. -/ +structure Message where + /-- `"user"` or `"model"`. -/ + role : String + text : String + deriving Inhabited, Repr + +/-- Per-call options. All optional — pass `{}` for defaults. -/ +structure CallOpts where + /-- Override `Config.model`. -/ + model : Option String := none + /-- System prompt. Maps to Gemini's `systemInstruction` field. -/ + system : Option String := none + /-- Override `Config.maxOutputTokens`. -/ + maxOutputTokens : Option Nat := none + /-- Override `Config.temperature`. -/ + temperature : Option Float := none + /-- Response JSON schema (structured output). -/ + jsonSchema : Option Json := none + deriving Inhabited + +structure Usage where + input : Nat := 0 + output : Nat := 0 + total : Nat := 0 + deriving Inhabited, Repr + +structure Response where + text : String + usage : Usage + /-- Finish reason (`STOP` / `MAX_TOKENS` / `SAFETY` / …). -/ + finishReason : String := "" + /-- Model that actually served the request (resolved at call time). -/ + model : String := "" + /-- Raw response body — kept around for debugging / logging. -/ + raw : String := "" + deriving Inhabited, Repr + +/-! ## curl wrapper — TLS needs shell-out -/ + +private structure CurlResp where + status : Nat + body : String + err : String := "" + +/-- POST a JSON body to `url` via curl, returning status + body. + The request body is staged through a temp file so very large + prompts work and the bytes are passed binary-safe. -/ +private def curlPost (url : String) (jsonBody : String) (timeoutSec : Nat) + : IO CurlResp := do + let tmpDir := (← IO.getEnv "TMPDIR").getD "/tmp" + let bodyFile := s!"{tmpDir}/gemini-req-{← IO.rand 0 0xffff_ffff}.json" + let respFile := s!"{tmpDir}/gemini-resp-{← IO.rand 0 0xffff_ffff}.json" + IO.FS.writeFile bodyFile jsonBody + let out ← IO.Process.output { + cmd := "curl", + args := #[ + "-sS", + "--max-time", toString timeoutSec, + "-X", "POST", + "-H", "content-type: application/json", + "-w", "\n___STATUS:%{http_code}", + "-o", respFile, + "--data-binary", s!"@{bodyFile}", + url + ] + } + try IO.FS.removeFile bodyFile catch _ => pure () + let body ← (try IO.FS.readFile respFile catch _ => pure "") + try IO.FS.removeFile respFile catch _ => pure () + let parts := out.stdout.splitOn "\n___STATUS:" + let status := match parts with + | [_, codeS] => codeS.trimAscii.toString.toNat?.getD 0 + | _ => 0 + return { status, body, err := out.stderr } + +/-! ## Request body builder -/ + +private def jObj (xs : List (String × Json)) : Json := Json.mkObj xs +private def jArr (xs : List Json) : Json := Json.arr xs.toArray + +/-- `Json.num` wants a `JsonNumber`; we encode floats by + `toString`+reparse. Good enough for temperature / topP. Same + trick as `LeanTea.Llm.Openai`. -/ +private def floatJson (f : Float) : Json := + match Json.parse (toString f) with + | .ok j => j + | .error _ => Json.num 0 + +private def buildRequest (messages : List Message) (opts : CallOpts) : Json := Id.run do + let contents := messages.map fun m => + jObj [("role", Json.str m.role), ("parts", jArr [jObj [("text", Json.str m.text)]])] + let mut fields : List (String × Json) := [ + ("contents", jArr contents) + ] + if let some sys := opts.system then + fields := fields ++ [("systemInstruction", + jObj [("parts", jArr [jObj [("text", Json.str sys)]])])] + let mut gc : List (String × Json) := [] + if let some n := opts.maxOutputTokens then + gc := gc ++ [("maxOutputTokens", Json.num (Int.ofNat n))] + if let some t := opts.temperature then + gc := gc ++ [("temperature", floatJson t)] + if let some sch := opts.jsonSchema then + gc := gc ++ [ + ("responseMimeType", Json.str "application/json"), + ("responseSchema", sch)] + if !gc.isEmpty then fields := fields ++ [("generationConfig", jObj gc)] + return jObj fields + +/-! ## Response parser -/ + +private def parseResponse (model rawBody : String) : IO Response := do + match Json.parse rawBody with + | .error e => throw <| IO.userError s!"Gemini: bad JSON ({e})\n{rawBody.take 500}" + | .ok j => + -- API error envelope + match (j.getObjVal? "error").toOption with + | some err => + let msg := err.getStrD "message" + let code := err.getNatD "code" + throw <| IO.userError s!"Gemini API error {code}: {msg}" + | none => pure () + -- Concatenate candidates[0].content.parts[*].text + let candidates := (j.getObjVal? "candidates").toOption.getD Json.null + let arr := match candidates.getArr? with | .ok a => a.toList | .error _ => [] + let cand := arr.head?.getD Json.null + let content := (cand.getObjVal? "content").toOption.getD Json.null + let parts := (content.getObjVal? "parts").toOption.getD Json.null + let partArr := match parts.getArr? with | .ok a => a.toList | .error _ => [] + let text := String.intercalate "" (partArr.map (·.getStrD "text")) + let finishReason := cand.getStrD "finishReason" + let usageM := (j.getObjVal? "usageMetadata").toOption.getD Json.null + let usage : Usage := { + input := usageM.getNatD "promptTokenCount", + output := usageM.getNatD "candidatesTokenCount", + total := usageM.getNatD "totalTokenCount" + } + return { text, usage, finishReason, model, raw := rawBody } + +/-! ## Core: generate -/ + +/-- Send a message list (chat history) and return one turn's response. -/ +def generate (cfg : Config) (messages : List Message) (opts : CallOpts := {}) + : IO Response := do + let model := opts.model.getD cfg.model + let url := s!"{cfg.endpoint}/models/{model}:generateContent?key={cfg.apiKey}" + let body := (buildRequest messages opts).compress + let resp ← curlPost url body cfg.timeoutSec + if resp.status < 200 || resp.status >= 300 then + throw <| IO.userError s!"Gemini HTTP {resp.status}: {resp.body.take 500}\n{resp.err}" + parseResponse model resp.body + +/-! ## Convenience APIs -/ + +/-- One-shot prompt. -/ +def ask (cfg : Config) (prompt : String) (opts : CallOpts := {}) + : IO Response := + generate cfg [{ role := "user", text := prompt }] opts + +/-- Multi-turn chat. `history` must be alternating user/model turns + (Gemini's expected shape). -/ +def chat (cfg : Config) (history : List Message) (message : String) + (opts : CallOpts := {}) : IO Response := + generate cfg (history ++ [{ role := "user", text := message }]) opts + +/-! ## Holistic codebase review + +The headline API — bundles multiple files into one Markdown prompt +to exploit Pro's 2M-token context. `SafePath` guarantees the +workspace boundary is honoured even when the caller passes +attacker-controlled paths. -/ + +/-- Map a file extension to a fenced-code-block language hint. -/ +private def guessLang (path : String) : String := + let lc := path.toLower + if lc.endsWith ".lean" then "lean" + else if lc.endsWith ".go" then "go" + else if lc.endsWith ".rs" then "rust" + else if lc.endsWith ".ts" then "ts" + else if lc.endsWith ".tsx" then "tsx" + else if lc.endsWith ".js" then "js" + else if lc.endsWith ".jsx" then "jsx" + else if lc.endsWith ".py" then "python" + else if lc.endsWith ".java" then "java" + else if lc.endsWith ".c" || lc.endsWith ".h" then "c" + else if lc.endsWith ".cpp" || lc.endsWith ".cc" || lc.endsWith ".hpp" then "cpp" + else if lc.endsWith ".cs" then "csharp" + else if lc.endsWith ".kt" then "kotlin" + else if lc.endsWith ".swift" then "swift" + else if lc.endsWith ".rb" then "ruby" + else if lc.endsWith ".php" then "php" + else if lc.endsWith ".sh" then "bash" + else if lc.endsWith ".sql" then "sql" + else if lc.endsWith ".yaml" || lc.endsWith ".yml" then "yaml" + else if lc.endsWith ".toml" then "toml" + else if lc.endsWith ".json" then "json" + else if lc.endsWith ".html" then "html" + else if lc.endsWith ".css" then "css" + else if lc.endsWith ".md" then "markdown" + else if lc.endsWith ".proto" then "protobuf" + else if lc.endsWith ".tf" then "hcl" + else if lc.endsWith ".dockerfile" || lc.endsWith "Dockerfile" then "dockerfile" + else "" + +/-- Render one file as a `## File: \n\`\`\`lang\n\n\`\`\`` block. -/ +private def renderFile (relPath : String) (body : String) : String := + let lang := guessLang relPath + s!"\n\n## File: {relPath}\n\n```{lang}\n{body}\n```\n" + +/-- Default system prompt — asks the model to look across all files + first, then drill into specifics. Override via `CallOpts.system` + if you want a different style or language. -/ +def defaultReviewSystem : String := +"You are a senior software engineer. You will be given multiple +source files as a sequence of Markdown `## File: ` blocks. +Read every file before answering, then review in the following order: + +1. **Codebase overview**: overall structure, separation of concerns, + key dependencies. +2. **Consistency check**: conventions or patterns that contradict + each other across files. +3. **Design weaknesses**: architecturally fragile spots (god classes, + circular dependencies, over-abstraction, leaky abstractions, …). +4. **Concrete bug risks**: per-file defect candidates, cited as + file:line. +5. **Improvement proposals**: 3–10 items, prioritised. + +Emphasise **cross-file connections and design overview** over +single-file style nitpicks." + +structure ReviewStats where + fileCount : Nat + totalBytes : Nat + /-- Files we skipped while bundling, with reasons. -/ + skipped : List (String × String) := [] + deriving Inhabited, Repr + +/-- Read multiple files through `SafePath`, bundle them into one + Markdown prompt, and send to Gemini. + + - `workspace` is the read root (`..` can't escape it). + - `paths` are workspace-relative. + - `prompt` is the user instruction; empty falls back to the + default review prompt. + - When `opts.system` is unset, `defaultReviewSystem` is used. + + Returns the `Response` plus stats (how many files / bytes + actually made it into the bundle, plus any skipped ones). -/ +def reviewMany (cfg : Config) + (workspace : String) + (paths : Array String) + (prompt : String := "") + (opts : CallOpts := {}) + : IO (Response × ReviewStats) := do + let mut bundle := "" + let mut count := 0 + let mut bytes := 0 + let mut skipped : List (String × String) := [] + for raw in paths do + match SafePath.under workspace raw with + | .error msg => + skipped := skipped ++ [(raw, s!"SafePath: {msg}")] + | .ok p => + try + let body ← IO.FS.readFile p.value + bundle := bundle ++ renderFile raw body + count := count + 1 + bytes := bytes + body.utf8ByteSize + catch e => + skipped := skipped ++ [(raw, s!"read failed: {e}")] + let userText := + (if prompt.isEmpty then defaultReviewSystem else prompt) ++ bundle + let optsWithSys := + if opts.system.isSome then opts + else { opts with system := some defaultReviewSystem } + let resp ← ask cfg userText optsWithSys + return (resp, { fileCount := count, totalBytes := bytes, skipped }) + +/-! ## Diff review — lightweight git-diff focused review -/ + +/-- Run `git diff ` inside `workspace` and pipe the + output to Gemini for a focused review. Override the prompt / + system message via `prompt` and `opts.system`. -/ +def reviewDiff (cfg : Config) (workspace base head : String) + (prompt : String := "") (opts : CallOpts := {}) + : IO Response := do + let out ← IO.Process.output { + cmd := "git", + args := #["-C", workspace, "diff", base, head] + } + if out.exitCode != 0 then + throw <| IO.userError s!"git diff failed: {out.stderr}" + let diff := out.stdout + let sysDefault := +"You are a senior software engineer. You will be given a unified +diff. Review it in the following order: + +1. **Intent**: what is this change trying to achieve? +2. **Defect candidates**: bugs, regression risks, missing cases. +3. **Improvement proposals**: style / performance / maintainability + nits worth flagging. +4. **Verdict**: approve / request changes / nit, with a reason." + let userText := + (if prompt.isEmpty then "Please review per the criteria above." else prompt) + ++ "\n\n```diff\n" ++ diff ++ "\n```" + let optsWithSys := + if opts.system.isSome then opts + else { opts with system := some sysDefault } + ask cfg userText optsWithSys + +end LeanTea.Cloud.Gemini diff --git a/LeanTea/Llm/ChatStore.lean b/LeanTea/Llm/ChatStore.lean new file mode 100644 index 0000000..bb0fd05 --- /dev/null +++ b/LeanTea/Llm/ChatStore.lean @@ -0,0 +1,244 @@ +import LeanTea.Llm.McpOrchestrator +import Lean.Data.Json + +/-! # LeanTea.Llm.ChatStore — JSON-file-per-session chat persistence + +Each chat session is one JSON file under a configurable directory +(default `~/.cache/leantea-chat/`). The file format is a small +envelope around the orchestrator's `ChatMsg` history: + +```json +{ + "id": "20260628-091500-a1b2", + "name": "open github and read title", + "created": 1740000000, + "updated": 1740001234, + "messages": [ … ] +} +``` + +`name` is auto-generated from the first user message when it isn't +set explicitly. Images are stored inline as `data:` URLs — fine for +the scale of a personal-machine LLM-chat app; would not survive at +SaaS multi-tenant scale, but that's not what this is for. -/ + +namespace LeanTea.Llm.ChatStore + +open Lean (Json) +open LeanTea.Llm.McpOrchestrator (ChatMsg Role) + +structure Session where + id : String + name : String := "" + /-- Unix seconds. -/ + created : Nat := 0 + updated : Nat := 0 + messages : Array ChatMsg := #[] + deriving Inhabited + +/-! ## Default directory -/ + +private def defaultDirRel : String := ".cache/leantea-chat" + +def defaultDir : IO String := do + let home ← match (← IO.getEnv "HOME") with + | some h => pure h + | none => pure "." + return home ++ "/" ++ defaultDirRel + +private def ensureDir (dir : String) : IO Unit := do + unless (← System.FilePath.pathExists dir) do + IO.FS.createDirAll dir + +/-! ## ID generation + +`--` — sortable by date and visually distinct. -/ + +private def now? : IO Nat := do + let ms ← IO.monoMsNow + /- monoMsNow is monotonic, not wall-clock; we use it for the `created` + / `updated` fields too — UI never shows it as a real date, only as + a sort key. Wrapping to seconds keeps the numbers smaller. -/ + return ms / 1000 + +private def pad2 (n : Nat) : String := + let s := toString n + if s.length < 2 then "0" ++ s else s + +private def hexDigit (n : Nat) : Char := + if n < 10 then Char.ofNat ('0'.toNat + n) + else Char.ofNat ('a'.toNat + n - 10) + +private partial def hex (n : Nat) : String := + if n == 0 then "0" + else + let rec go (n : Nat) (acc : String) : String := + if n == 0 then acc + else go (n / 16) ((hexDigit (n % 16)).toString ++ acc) + go n "" + +/-- Sortable timestamped session ID using monoMsNow as the time + source plus a 4-hex random tail for uniqueness across the + millisecond. Format is `tNNNNNN-XXXX` so file listing sorts in + creation order. -/ +def freshId : IO String := do + let ms ← IO.monoMsNow + let r ← IO.rand 0 0xffff + return s!"t{ms}-{hex r}" + +/-! ## Disk format -/ + +private def chatMsgToStoreJson (m : ChatMsg) : Json := + Json.mkObj [ + ("role", Json.str m.role.toString), + ("text", Json.str m.text), + ("images", Json.arr (m.images.map Json.str)), + ("toolCalls", Json.arr m.toolCalls), + ("toolCallId", Json.str m.toolCallId), + ("toolName", Json.str m.toolName) + ] + +private def chatMsgFromStoreJson (j : Json) : Option ChatMsg := do + let roleS ← (j.getObjVal? "role").toOption.bind (·.getStr?.toOption) + let role : Role := match roleS with + | "system" => .system + | "user" => .user + | "assistant" => .assistant + | "tool" => .tool + | _ => .user + let text := (j.getObjVal? "text").toOption.bind (·.getStr?.toOption) |>.getD "" + let imagesArr := (j.getObjVal? "images").toOption.bind (·.getArr?.toOption) |>.getD #[] + let images := imagesArr.filterMap (·.getStr?.toOption) + let toolCalls := (j.getObjVal? "toolCalls").toOption.bind (·.getArr?.toOption) |>.getD #[] + let toolCallId := (j.getObjVal? "toolCallId").toOption.bind (·.getStr?.toOption) |>.getD "" + let toolName := (j.getObjVal? "toolName").toOption.bind (·.getStr?.toOption) |>.getD "" + return { role, text, images, toolCalls, toolCallId, toolName } + +private def sessionToJson (s : Session) : Json := + Json.mkObj [ + ("id", Json.str s.id), + ("name", Json.str s.name), + ("created", Json.num (Int.ofNat s.created)), + ("updated", Json.num (Int.ofNat s.updated)), + ("messages", Json.arr (s.messages.map chatMsgToStoreJson)) + ] + +private def sessionFromJson (j : Json) : Option Session := do + let id ← (j.getObjVal? "id").toOption.bind (·.getStr?.toOption) + let name := (j.getObjVal? "name").toOption.bind (·.getStr?.toOption) |>.getD "" + let created := (j.getObjVal? "created").toOption.bind (·.getNat?.toOption) |>.getD 0 + let updated := (j.getObjVal? "updated").toOption.bind (·.getNat?.toOption) |>.getD 0 + let msgsJ := (j.getObjVal? "messages").toOption.bind (·.getArr?.toOption) |>.getD #[] + let messages := msgsJ.filterMap chatMsgFromStoreJson + return { id, name, created, updated, messages } + +/-! ## CRUD -/ + +private def sessionPath (dir id : String) : String := + s!"{dir}/{id}.json" + +/-- Auto-name from the first user message (truncated). -/ +private def autoNameFromHistory (h : Array ChatMsg) (limit : Nat := 50) : String := + match h.find? (·.role == .user) with + | some m => + let t := (m.text.trimAscii.toString).replace "\n" " " + if t.length ≤ limit then t else (t.take limit).toString ++ "…" + | none => "(empty)" + +/-- Build a fresh empty session ready to be written. -/ +def newSession (name : String := "") : IO Session := do + let id ← freshId + let now ← now? + return { id, name, created := now, updated := now, messages := #[] } + +/-- Persist a session to disk. If `s.name` is empty we fill it from + the first user message. Updates `updated` timestamp. -/ +def save (dir : String) (s : Session) : IO Session := do + ensureDir dir + let name := if s.name.isEmpty then autoNameFromHistory s.messages else s.name + let now ← now? + let s' := { s with name, updated := now } + IO.FS.writeFile (sessionPath dir s.id) (sessionToJson s').pretty + return s' + +/-- Load a session by id. Returns `none` when the file doesn't exist + or isn't valid JSON we recognise. -/ +def load (dir id : String) : IO (Option Session) := do + let path := sessionPath dir id + unless ← System.FilePath.pathExists path do return none + let src ← IO.FS.readFile path + match Json.parse src with + | .error _ => return none + | .ok j => return sessionFromJson j + +/-- Delete a session by id. Returns `true` if a file was removed. -/ +def delete (dir id : String) : IO Bool := do + let path := sessionPath dir id + if ← System.FilePath.pathExists path then + IO.FS.removeFile path + return true + else + return false + +/-! ## Listing — load just the metadata, not the full history. -/ + +structure Summary where + id : String + name : String + created : Nat + updated : Nat + /-- Number of messages in the session, for the listing UI. -/ + count : Nat + deriving Inhabited + +private def summaryFromJson (j : Json) : Option Summary := do + let id ← (j.getObjVal? "id").toOption.bind (·.getStr?.toOption) + let name := (j.getObjVal? "name").toOption.bind (·.getStr?.toOption) |>.getD "" + let created := (j.getObjVal? "created").toOption.bind (·.getNat?.toOption) |>.getD 0 + let updated := (j.getObjVal? "updated").toOption.bind (·.getNat?.toOption) |>.getD 0 + let count := + match (j.getObjVal? "messages").toOption.bind (·.getArr?.toOption) with + | some a => a.size + | none => 0 + return { id, name, created, updated, count } + +/-- Return every session's summary, newest first by `updated`. -/ +def list (dir : String) : IO (Array Summary) := do + ensureDir dir + let entries ← System.FilePath.readDir dir + let mut summaries : Array Summary := #[] + for e in entries do + if e.fileName.endsWith ".json" then + let path := e.path.toString + try + let src ← IO.FS.readFile path + match Json.parse src with + | .error _ => pure () + | .ok j => + match summaryFromJson j with + | some s => summaries := summaries.push s + | none => pure () + catch _ => pure () + /- Sort by `updated` descending. -/ + return summaries.qsort (fun a b => a.updated > b.updated) + +/-- Rename a session. Returns the updated session, or `none` if not found. -/ +def rename (dir id newName : String) : IO (Option Session) := do + match ← load dir id with + | none => return none + | some s => + let s' ← save dir { s with name := newName } + return some s' + +/-! ## Summary JSON for the Web UI -/ + +def summaryToJson (s : Summary) : Json := + Json.mkObj [ + ("id", Json.str s.id), + ("name", Json.str s.name), + ("created", Json.num (Int.ofNat s.created)), + ("updated", Json.num (Int.ofNat s.updated)), + ("count", Json.num (Int.ofNat s.count)) + ] + +end LeanTea.Llm.ChatStore diff --git a/LeanTea/Llm/McpOrchestrator.lean b/LeanTea/Llm/McpOrchestrator.lean new file mode 100644 index 0000000..a0b0479 --- /dev/null +++ b/LeanTea/Llm/McpOrchestrator.lean @@ -0,0 +1,656 @@ +import LeanTea.Llm.Openai +import LeanTea.Llm.Policy +import LeanTea.Net.HttpClient +import Lean.Data.Json + +/-! # LeanTea.Llm.McpOrchestrator — chat-loop driver over N MCP servers + +Generalised version of the `BrowserAgent` loop: spawn an arbitrary +list of MCP servers (each as a stdio child or an HTTP endpoint), +aggregate their tool catalogues into one OpenAI `tools[]` array, +and run the standard tool-call chat loop against an OpenAI-compatible +backend (LM Studio with a local Gemma, OpenAI proper, etc.). + +The three demo apps under `examples/LlmChat{Cli,Tui,Web}/` all sit +on top of this; the orchestrator owns: + + * MCP child lifecycle (spawn / initialize / list-tools / dispatch / close) + * Name-prefix routing — tool names become `__` so two + servers can't collide + * The chat history shape and the LLM call (`runTurn`) + +The UI layers only deal with rendering and input. + +## Wire shape + +We talk to the LLM via the OpenAI Chat-Completions API (`/chat/completions` +with `tools[]` + `tool_choice:"auto"`). LM Studio speaks the same dialect, +so a local Gemma works without changing this code. The MCP servers speak +JSON-RPC 2.0 framed line-by-line over stdio (one request, one newline, +one response, one newline) — same wire `Claude Desktop` uses. + +## Config shape + +```json +{ + "model": "google/gemma-4-e4b", + "baseUrl": "http://127.0.0.1:11211/v1", + "system": "You are a helpful assistant. Reply in the user's language.", + "servers": [ + { "name": "chrome", "bin": "./.lake/build/bin/chrome_cdp_mcp_serve", "args": [] }, + { "name": "gemini", "bin": "./.lake/build/bin/gemini_mcp_serve", + "args": ["--workspace", "."] } + ] +} +``` + +The `name` becomes the tool-name prefix shown to the LLM: a tool +called `chrome_navigate` on the `chrome` server becomes +`chrome__chrome_navigate` in the catalogue. (We keep the original +tool name as the second half; some MCP servers already prefix their +tools — that's fine, the extra prefix just disambiguates.) -/ + +namespace LeanTea.Llm.McpOrchestrator + +open Lean (Json) +open LeanTea.Llm.Openai (Config) + +/-! ## Server spec / wire client -/ + +structure ServerSpec where + /-- Short, unique handle. Becomes the tool-name prefix. -/ + name : String + /-- Either an absolute / relative path to an executable (stdio + transport) or an `http://…/mcp` URL (HTTP transport). -/ + bin : String + /-- CLI args for the stdio binary. Ignored for HTTP. -/ + args : Array String := #[] + deriving Inhabited, Repr + +/-- Two transports — same shape as `BrowserAgent.Mcp` but exposed + publicly so the orchestrator can drive any MCP server. -/ +inductive McpKind where + | stdio (child : IO.Process.Child { stdin := .piped, stdout := .piped, stderr := .piped }) + | http (url : String) + +structure Mcp where + kind : McpKind + nextId : IO.Ref Nat + +private def Mcp.spawnStdio (bin : String) (args : Array String) : IO Mcp := do + /- Unset PORT in the child env. Several LeanTEA MCP servers + (gemini, tmux, browser, …) switch themselves into HTTP mode + when PORT is set, which silently breaks the stdio handshake + and steals the parent's port. -/ + let child ← IO.Process.spawn { + cmd := bin, args, + env := #[("PORT", none)], + stdin := .piped, stdout := .piped, stderr := .piped + } + let nextId ← IO.mkRef 1 + return { kind := .stdio child, nextId } + +private def Mcp.connectHttp (url : String) : IO Mcp := do + let nextId ← IO.mkRef 1 + return { kind := .http url, nextId } + +private def Mcp.spawn (spec : ServerSpec) : IO Mcp := do + if spec.bin.startsWith "http://" || spec.bin.startsWith "https://" then + Mcp.connectHttp spec.bin + else + Mcp.spawnStdio spec.bin spec.args + +private def Mcp.close (m : Mcp) : IO Unit := do + match m.kind with + | .stdio child => + let (_, child') ← child.takeStdin + let _ ← child'.wait + | .http _ => return () + +private def Mcp.sendReq (m : Mcp) (method : String) (params : Json) : IO Json := do + let id ← m.nextId.get + m.nextId.set (id + 1) + let req := Json.mkObj [ + ("jsonrpc", Json.str "2.0"), + ("id", Json.num (Int.ofNat id)), + ("method", Json.str method), + ("params", params) + ] + let raw ← match m.kind with + | .stdio child => + child.stdin.putStr (req.compress ++ "\n") + child.stdin.flush + let line ← child.stdout.getLine + if line.isEmpty then + throw <| IO.userError s!"mcp: server closed mid-request (method={method})" + pure line + | .http url => + LeanTea.Net.HttpClient.postJsonText url req.compress + match Json.parse raw with + | .error e => throw <| IO.userError s!"mcp: bad JSON: {e}\n{raw}" + | .ok j => return j + +private def Mcp.initialize (m : Mcp) : IO Unit := do + let _ ← m.sendReq "initialize" (Json.mkObj [ + ("protocolVersion", Json.str "2024-11-05"), + ("capabilities", Json.mkObj []), + ("clientInfo", Json.mkObj [ + ("name", Json.str "leantea-llm-chat"), + ("version", Json.str "0.1.0") + ]) + ]) + +private def Mcp.listTools (m : Mcp) : IO (Array Json) := do + let resp ← m.sendReq "tools/list" (Json.mkObj []) + let result := (resp.getObjVal? "result").toOption.getD Json.null + let tools := (result.getObjVal? "tools").toOption.getD (Json.arr #[]) + match tools.getArr? with + | .ok arr => return arr + | .error _ => return #[] + +private def Mcp.callTool (m : Mcp) (name : String) (args : Json) : IO Json := do + let resp ← m.sendReq "tools/call" (Json.mkObj [ + ("name", Json.str name), + ("arguments", args) + ]) + match resp.getObjVal? "result" with + | .ok r => return r + | .error _ => + let err := (resp.getObjVal? "error").toOption.getD Json.null + return Json.mkObj [ + ("isError", Json.bool true), + ("content", Json.arr #[ + Json.mkObj [ + ("type", Json.str "text"), + ("text", Json.str err.compress) + ] + ]) + ] + +/-! ## Tool-name prefixing + +Each tool gets the form `__`. We pick `__` as the +separator because: it's two ASCII chars (one round trip through the +LLM's tokenizer = 1 token), it's never legal in an MCP tool name +(which match `[a-zA-Z_][a-zA-Z0-9_]*`-ish), and round-tripping is +trivial via `splitOn`. -/ + +private def prefixSep : String := "__" + +private def prefixedToolName (server tool : String) : String := + server ++ prefixSep ++ tool + +private def splitPrefixedName (full : String) : Option (String × String) := + match full.splitOn prefixSep with + | server :: rest@(_ :: _) => some (server, String.intercalate prefixSep rest) + | _ => none + +/-! ## OpenAI tool conversion -/ + +/-- Convert one MCP tool entry to the OpenAI `tools[]` shape. The + `name` is rewritten to its prefixed form so collisions across + MCP servers can't happen, and so we can route on dispatch. -/ +private def mcpToolToOpenAi (serverName : String) (t : Json) : Json := + let name := (t.getObjVal? "name").toOption.bind (·.getStr?.toOption) |>.getD "" + let desc := (t.getObjVal? "description").toOption.bind (·.getStr?.toOption) |>.getD "" + let params := (t.getObjVal? "inputSchema").toOption.getD + (Json.mkObj [("type", Json.str "object"), ("properties", Json.mkObj [])]) + Json.mkObj [ + ("type", Json.str "function"), + ("function", Json.mkObj [ + ("name", Json.str (prefixedToolName serverName name)), + ("description", Json.str s!"[{serverName}] {desc}"), + ("parameters", params) + ]) + ] + +/-! ## Orchestrator -/ + +structure RunningServer where + spec : ServerSpec + mcp : Mcp + /-- Raw MCP tool entries (unprefixed names). Kept for diagnostics. -/ + tools : Array Json + /-- Tool entries already converted to OpenAI shape, with prefixed names. -/ + openAiTools : Array Json + +structure Orchestrator where + cfg : Config + model : String + systemPrompt : String + servers : Array RunningServer + /-- Per-LLM-call upper bound on max_tokens. -/ + maxTokens : Nat := 2000 + /-- Per-turn upper bound on tool-call rounds. -/ + maxRounds : Nat := 12 + +/-- Spawn every server, run the MCP handshake, and gather tools. -/ +def spawnAll (cfg : Config) (model systemPrompt : String) + (specs : Array ServerSpec) : IO Orchestrator := do + let mut running : Array RunningServer := #[] + for spec in specs do + let mcp ← Mcp.spawn spec + try + mcp.initialize + let rawTools ← mcp.listTools + let openAi := rawTools.map (mcpToolToOpenAi spec.name) + running := running.push { spec, mcp, tools := rawTools, openAiTools := openAi } + catch e => + mcp.close + throw <| IO.userError s!"mcp server `{spec.name}` ({spec.bin}): {e}" + return { cfg, model, systemPrompt, servers := running } + +def Orchestrator.shutdown (o : Orchestrator) : IO Unit := do + for s in o.servers do + try s.mcp.close catch _ => pure () + +/-- The aggregated OpenAI tool catalogue across every server. -/ +def Orchestrator.openAiTools (o : Orchestrator) : Array Json := Id.run do + let mut all : Array Json := #[] + for s in o.servers do + all := all ++ s.openAiTools + return all + +/-- Route a prefixed tool name back to the owning server, then call it. -/ +def Orchestrator.callTool (o : Orchestrator) (fullName : String) (args : Json) + : IO Json := do + match splitPrefixedName fullName with + | none => + throw <| IO.userError s!"orchestrator: tool name `{fullName}` is not prefixed" + | some (serverName, toolName) => + match o.servers.find? (·.spec.name == serverName) with + | none => + throw <| IO.userError s!"orchestrator: no server named `{serverName}`" + | some s => s.mcp.callTool toolName args + +/-! ## LLM call (OpenAI chat-completions with tools) -/ + +/-- POST one round to the LLM. Returns the parsed top-level response. -/ +private def chatOnce (o : Orchestrator) (messages : Array Json) : IO Json := do + let body := Json.mkObj [ + ("model", Json.str o.model), + ("temperature", Json.num 0), + ("max_tokens", Json.num (Int.ofNat o.maxTokens)), + ("messages", Json.arr messages), + ("tools", Json.arr o.openAiTools), + ("tool_choice", Json.str "auto") + ] + let url := s!"{o.cfg.baseUrl}/chat/completions" + let raw ← LeanTea.Net.HttpClient.postJsonText url body.compress + match Json.parse raw with + | .error e => throw <| IO.userError s!"openai: bad JSON\n{e}\n{raw}" + | .ok j => return j + +/-! ## Tool result rendering + +A tool result is an MCP `{content: [...]}` object where each item is +either `{type:"text",text:...}` or `{type:"image",mimeType:...,data:}`. +We split these: + + * `renderToolResult` joins all text parts into one string (which goes + back to the LLM via the `role:"tool"` message — OpenAI tool messages + only carry strings, not multimodal blocks). + * `extractToolImages` pulls every image block out as a `data:` URL, + ready for the UI to render and (for vision models) for the next + LLM round to actually look at via a synthetic user message. -/ + +private def renderToolResult (result : Json) : String := + let isErr := (result.getObjVal? "isError").toOption.bind (fun j => + match j with | .bool b => some b | _ => none) |>.getD false + let content := (result.getObjVal? "content").toOption.getD (Json.arr #[]) + let arr := (content.getArr?).toOption.getD #[] + let parts := arr.map fun item => + let typ := (item.getObjVal? "type").toOption.bind (·.getStr?.toOption) |>.getD "" + match typ with + | "text" => + (item.getObjVal? "text").toOption.bind (·.getStr?.toOption) |>.getD "" + | "image" => + let mime := (item.getObjVal? "mimeType").toOption.bind (·.getStr?.toOption) |>.getD "image/png" + s!"(image: {mime} — shown to the user; also attached as the next vision message)" + | _ => item.compress + let joined := String.intercalate "\n" parts.toList + if isErr then s!"ERROR: {joined}" else joined + +/-- Pull every `image` content block out of a tool result as + `data:;base64,<…>` URLs. -/ +private def extractToolImages (result : Json) : Array String := + let content := (result.getObjVal? "content").toOption.getD (Json.arr #[]) + let arr := (content.getArr?).toOption.getD #[] + arr.filterMap fun item => + let typ := (item.getObjVal? "type").toOption.bind (·.getStr?.toOption) |>.getD "" + if typ != "image" then none else + let mime := (item.getObjVal? "mimeType").toOption.bind (·.getStr?.toOption) |>.getD "image/png" + let data := (item.getObjVal? "data").toOption.bind (·.getStr?.toOption) |>.getD "" + if data.isEmpty then none else some s!"data:{mime};base64,{data}" + +/-! ## Public chat shape + +A higher-level message representation that the UI layers render. We +keep both this and the raw Json (`raw`) so the UI can show pretty +bubbles while the next LLM round still gets the exact OpenAI shape. -/ + +inductive Role where + | system | user | assistant | tool + deriving DecidableEq, Repr + +instance : Inhabited Role := ⟨.user⟩ + +def Role.toString : Role → String + | .system => "system" | .user => "user" + | .assistant => "assistant" | .tool => "tool" + +instance : ToString Role := ⟨Role.toString⟩ + +structure ChatMsg where + role : Role + /-- Human-visible text. For `assistant` messages that are pure + tool calls this can be empty; the call list lives in `toolCalls`. -/ + text : String := "" + /-- Attached images as `data:;base64,…` URLs. Populated for + user messages (uploads) and for tool messages (tool results that + included image content blocks). The renderer for both UI and + LLM uses this to surface images. -/ + images : Array String := #[] + /-- For `assistant` messages that called tools — the raw call list + from OpenAI. Each entry has `id` / `function.name` / `function.arguments`. -/ + toolCalls : Array Json := #[] + /-- For `tool` messages — which call id this is responding to. -/ + toolCallId : String := "" + /-- For `tool` messages — the prefixed tool name (so the UI can + render `chrome__chrome_navigate` etc.). -/ + toolName : String := "" + deriving Inhabited + +/-- Build an OpenAI multi-block `content` array from a text part and + a list of image data URLs. Used for `user` messages that carry + attachments (file uploads, paste, drag-drop). -/ +private def multiBlockContent (text : String) (images : Array String) : Json := + let textBlock := Json.mkObj [ + ("type", Json.str "text"), + ("text", Json.str text) + ] + let imgBlocks := images.map fun url => + Json.mkObj [ + ("type", Json.str "image_url"), + ("image_url", Json.mkObj [("url", Json.str url)]) + ] + Json.arr (#[textBlock] ++ imgBlocks) + +/-- Serialise a `ChatMsg` back to the OpenAI message Json shape. User + messages with attached images use the multi-block content shape + (`[{type:"text",...}, {type:"image_url",...}]`); everything else + is a plain string content. -/ +def ChatMsg.toJson (m : ChatMsg) : Json := + match m.role with + | .assistant => + let base : List (String × Json) := [ + ("role", Json.str "assistant"), + ("content", Json.str m.text) + ] + if m.toolCalls.isEmpty then Json.mkObj base + else Json.mkObj (base ++ [("tool_calls", Json.arr m.toolCalls)]) + | .tool => + Json.mkObj [ + ("role", Json.str "tool"), + ("tool_call_id", Json.str m.toolCallId), + ("name", Json.str m.toolName), + ("content", Json.str m.text) + ] + | r => + let content : Json := + if m.images.isEmpty then Json.str m.text + else multiBlockContent m.text m.images + Json.mkObj [ + ("role", Json.str r.toString), + ("content", content) + ] + +/-- Build the OpenAI messages array for a request, prepending the + system prompt automatically. -/ +private def Orchestrator.toRequestMessages (o : Orchestrator) + (history : Array ChatMsg) : Array Json := + let sys := Json.mkObj [ + ("role", Json.str "system"), + ("content", Json.str o.systemPrompt) + ] + #[sys] ++ history.map ChatMsg.toJson + +/-! ## One turn of the chat loop + +Append the user's new message, then loop until the LLM stops asking +for tools. Returns the appended messages so the UI can render the +intermediate tool calls + results, not just the final answer. -/ + +/-- What the user / policy decided about a specific tool invocation. + `allowOnce` / `denyOnce` apply only to this one call; + `allowAlways` / `denyAlways` also persist a new rule. -/ +inductive UserDecision where + | allowOnce + | denyOnce + | allowAlways + | denyAlways + deriving DecidableEq, Repr, Inhabited + +/-- Optional callbacks so a UI can stream visible progress (typing + indicator, tool-call notifications). All optional — pass `{}` to + suppress. -/ +structure ProgressHooks where + /-- Called just before each LLM round. `round` is 0-indexed. -/ + onLlmStart : Nat → IO Unit := fun _ => pure () + /-- Called as soon as the LLM responds, before any tool dispatch. -/ + onLlmEnd : Nat → IO Unit := fun _ => pure () + /-- Called for each tool call, before dispatch. `(name, argsJson)`. -/ + onToolCall : String → Json → IO Unit := fun _ _ => pure () + /-- Called after a tool call returns. `(name, rendered)`. -/ + onToolResult : String → String → IO Unit := fun _ _ => pure () + /-- Called when policy says `ask` — UI prompts the user and returns + the decision. The default rejects (`denyOnce`) so a hookless + caller can't be exploited by a tool that hasn't been allowed. + `(toolName, args)`. -/ + onAsk : String → Json → IO UserDecision := fun _ _ => pure .denyOnce + deriving Inhabited + +/-- Bundle of the live policy ref + a hook for asking the user. When + `policy` is `none` the orchestrator skips the check (legacy + behaviour — same as the old `runTurnFull` signature). -/ +structure PolicyConfig where + policy : Option LeanTea.Llm.Policy.LiveRef := none + deriving Inhabited + +/-- Run one user turn. `history` is the existing conversation (without + the new user message). `userImages` are `data:` URLs attached + alongside the prompt (file uploads, drag-drop, clipboard paste — + callers do the base64 conversion). Returns the new messages + appended to history. + + Tool results that include image content blocks are surfaced in + two places: the tool `ChatMsg` carries them as `images` (so the + UI can render them inline), and a synthetic `user` message with + those images is inserted right after the tool message so a + vision-capable LLM can actually look at them next round. + + On the LLM/MCP side, errors are surfaced via `throw` — let the + UI decide whether to render a red banner or rethrow. -/ +partial def Orchestrator.runTurnFull (o : Orchestrator) (history : Array ChatMsg) + (userInput : String) (userImages : Array String := #[]) + (hooks : ProgressHooks := {}) (policy : PolicyConfig := {}) + : IO (Array ChatMsg) := do + let userMsg : ChatMsg := { + role := .user, text := userInput, images := userImages + } + let mut acc : Array ChatMsg := #[userMsg] + let mut working : Array ChatMsg := history.push userMsg + let mut round : Nat := 0 + /- Bounded loop: each iteration is one LLM round-trip. We exit when + the assistant returns no tool_calls. -/ + for _ in [0:o.maxRounds] do + if round ≥ o.maxRounds then break + hooks.onLlmStart round + let resp ← chatOnce o (o.toRequestMessages working) + hooks.onLlmEnd round + /- Error envelope. -/ + if let .ok err := resp.getObjVal? "error" then + let msg := (err.getObjVal? "message").toOption.bind (·.getStr?.toOption) |>.getD err.compress + throw <| IO.userError s!"llm: {msg}" + let choices := (resp.getObjVal? "choices").toOption.bind (fun j => + (j.getArr?).toOption) |>.getD #[] + let choice := choices[0]?.getD Json.null + let message := (choice.getObjVal? "message").toOption.getD Json.null + let content := (message.getObjVal? "content").toOption.bind (·.getStr?.toOption) |>.getD "" + let toolCalls := (message.getObjVal? "tool_calls").toOption.bind (fun j => + (j.getArr?).toOption) |>.getD #[] + let assistantMsg : ChatMsg := { + role := .assistant, + text := content, + toolCalls + } + acc := acc.push assistantMsg + working := working.push assistantMsg + if toolCalls.isEmpty then + /- Final answer for this turn. -/ + return acc + /- Dispatch each tool call, append a tool message, loop. -/ + for call in toolCalls do + let id := (call.getObjVal? "id").toOption.bind (·.getStr?.toOption) |>.getD "" + let fn := (call.getObjVal? "function").toOption.getD Json.null + let name := (fn.getObjVal? "name").toOption.bind (·.getStr?.toOption) |>.getD "" + let argsS := (fn.getObjVal? "arguments").toOption.bind (·.getStr?.toOption) |>.getD "{}" + let argsJ := match Json.parse argsS with + | .ok j => j + | .error _ => Json.mkObj [] + hooks.onToolCall name argsJ + /- Policy gate. If no live policy is configured, we skip this + and just dispatch (legacy behaviour). Otherwise: check the + rules; on `ask` invoke the UI hook; on `deny` synthesize a + tool error rather than throwing so the LLM can recover. -/ + let decision : LeanTea.Llm.Policy.Decision ← + match policy.policy with + | none => pure LeanTea.Llm.Policy.Decision.allow + | some lr => + match ← lr.check name with + | .allow => pure LeanTea.Llm.Policy.Decision.allow + | .deny => pure LeanTea.Llm.Policy.Decision.deny + | .ask => + match ← hooks.onAsk name argsJ with + | .allowOnce => pure LeanTea.Llm.Policy.Decision.allow + | .denyOnce => pure LeanTea.Llm.Policy.Decision.deny + | .allowAlways => + lr.append { pattern := name, action := LeanTea.Llm.Policy.Action.allow } + pure LeanTea.Llm.Policy.Decision.allow + | .denyAlways => + lr.append { pattern := name, action := LeanTea.Llm.Policy.Action.deny } + pure LeanTea.Llm.Policy.Decision.deny + let result ← match decision with + | LeanTea.Llm.Policy.Decision.deny => + pure <| Json.mkObj [ + ("isError", Json.bool true), + ("content", Json.arr #[Json.mkObj [ + ("type", Json.str "text"), + ("text", Json.str s!"policy: tool `{name}` was denied by the user") + ]]) + ] + | _ => + try o.callTool name argsJ + catch e => pure <| Json.mkObj [ + ("isError", Json.bool true), + ("content", Json.arr #[Json.mkObj [ + ("type", Json.str "text"), + ("text", Json.str s!"orchestrator: {e}") + ]]) + ] + let rendered := renderToolResult result + let truncated := + if rendered.length > 4000 then + (rendered.take 4000).toString ++ s!"\n…[{rendered.length - 4000} more chars omitted]" + else rendered + let images := extractToolImages result + hooks.onToolResult name truncated + let toolMsg : ChatMsg := { + role := .tool, + text := truncated, + images, + toolCallId := id, + toolName := name + } + acc := acc.push toolMsg + working := working.push toolMsg + /- For vision-capable LLMs, append a synthetic user message with + the images so they're visible next round. Tool messages + carry only strings in the OpenAI spec; vision content has to + live on a user message. We hide this from the UI via + `synthetic := true`-style intent — the role stays `.user` but + the empty text + non-empty images keeps it visually unobtrusive + when the UI checks `m.text.isEmpty && m.images.isEmpty`. -/ + unless images.isEmpty do + let visionMsg : ChatMsg := { + role := .user, + text := s!"(image attached above is the result of `{name}` — \ +look at it to decide what to do next.)", + images + } + working := working.push visionMsg + /- We don't push the synthetic message into `acc` because the + UI already renders the tool message's images; surfacing it + twice would be confusing. The next round's request still + uses `working`, so the LLM sees the images. -/ + round := round + 1 + /- Hit maxRounds without a tool-free reply. Surface as an error + so the UI can show "agent gave up" rather than silently + truncating. -/ + throw <| IO.userError s!"orchestrator: hit maxRounds={o.maxRounds} \ +without a final answer" + +/-- Image-less convenience wrapper. The three demo apps default to + this when the caller doesn't attach images. -/ +def Orchestrator.runTurn (o : Orchestrator) (history : Array ChatMsg) + (userInput : String) (hooks : ProgressHooks := {}) + : IO (Array ChatMsg) := + o.runTurnFull history userInput #[] hooks + +/-! ## Config JSON parsing + +The three demo apps all accept `--config path/to/cfg.json`. We parse +it here so the UI binaries don't repeat the same code. -/ + +private def parseServerSpec (j : Json) : Except String ServerSpec := do + let name ← (j.getObjVal? "name").bind (·.getStr?) + let bin ← (j.getObjVal? "bin").bind (·.getStr?) + let args := + match (j.getObjVal? "args").toOption.bind (·.getArr?.toOption) with + | some a => a.filterMap (·.getStr?.toOption) + | none => #[] + return { name, bin, args } + +structure FileConfig where + model : String + baseUrl : String + systemPrompt : String + servers : Array ServerSpec + deriving Inhabited + +def loadConfig (path : String) : IO FileConfig := do + let src ← IO.FS.readFile path + match Json.parse src with + | .error e => throw <| IO.userError s!"config {path}: bad JSON: {e}" + | .ok j => + let model := (j.getObjVal? "model").toOption.bind (·.getStr?.toOption) + |>.getD "google/gemma-4-e4b" + let baseUrl := (j.getObjVal? "baseUrl").toOption.bind (·.getStr?.toOption) + |>.getD "http://127.0.0.1:11211/v1" + let systemPrompt := (j.getObjVal? "system").toOption.bind (·.getStr?.toOption) + |>.getD "You are a helpful assistant. Reply in the user's language." + let serversJ := (j.getObjVal? "servers").toOption.bind (·.getArr?.toOption) + |>.getD #[] + let mut specs : Array ServerSpec := #[] + for sj in serversJ do + match parseServerSpec sj with + | .ok s => specs := specs.push s + | .error e => throw <| IO.userError s!"config {path}: bad server entry: {e}" + return { model, baseUrl, systemPrompt, servers := specs } + +/-- Convenience: load config + spawn everything. -/ +def fromConfig (fc : FileConfig) : IO Orchestrator := do + let cfg : Config := { baseUrl := fc.baseUrl, timeoutSec := some 300 } + spawnAll cfg fc.model fc.systemPrompt fc.servers + +end LeanTea.Llm.McpOrchestrator diff --git a/LeanTea/Llm/Policy.lean b/LeanTea/Llm/Policy.lean new file mode 100644 index 0000000..262e0c6 --- /dev/null +++ b/LeanTea/Llm/Policy.lean @@ -0,0 +1,191 @@ +import Lean.Data.Json + +/-! # LeanTea.Llm.Policy — interactive allow/deny rules for MCP tool calls + +A simple persistent policy engine sitting between the orchestrator +and the MCP layer. Each rule is `{pattern: glob, action: allow|deny}` +matched against the prefixed tool name (`__`). The +first matching rule wins; if no rule matches the call is `Decision.ask` +and the orchestrator asks the user via a callback. + +The user's answer can be one-shot or persistent. When persistent, +we append a new rule and rewrite the policy file so the next call +to the same tool resolves without a prompt. + +Glob support is intentionally tiny: `*` matches any run of +characters (no `?`, no character classes, no recursive `**`). +That covers the common cases (`chrome__*`, `*__shell_run`, +`gemini__gemini_list_*`). + +Storage: one JSON file (`/policy.json`) shaped as: + +```json +{ + "rules": [ + {"pattern": "gemini__gemini_list_*", "action": "allow"}, + {"pattern": "chrome__*", "action": "allow"}, + {"pattern": "*__shell_*", "action": "deny"} + ] +} +``` -/ + +namespace LeanTea.Llm.Policy + +open Lean (Json) + +/-! ## Types -/ + +inductive Action where + | allow + | deny + deriving DecidableEq, Repr, Inhabited + +def Action.toString : Action → String + | .allow => "allow" + | .deny => "deny" + +instance : ToString Action := ⟨Action.toString⟩ + +def Action.ofString? : String → Option Action + | "allow" => some .allow + | "deny" => some .deny + | _ => none + +structure Rule where + pattern : String + action : Action + deriving Repr, Inhabited + +/-- Outcome of `check` on a tool name. `ask` means no rule fired — + the orchestrator should prompt the user. -/ +inductive Decision where + | allow + | deny + | ask + deriving DecidableEq, Repr, Inhabited + +/-! ## Glob matching + +`*` matches any (possibly empty) run of characters. Implemented +greedy with simple backtracking — fine for the dozens of rules a +human would ever write. -/ + +private partial def globMatch (pat str : String) : Bool := + go pat.toList str.toList +where + go : List Char → List Char → Bool + | [], [] => true + | [], _ => false + | '*' :: ps, [] => go ps [] + | '*' :: ps, s@(_::ss) => go ps s || go ('*' :: ps) ss + | p :: ps, c :: cs => p == c && go ps cs + | _ :: _, [] => false + +/-- Apply the rules in order; first match wins. -/ +def check (rules : List Rule) (toolName : String) : Decision := + let rec go : List Rule → Decision + | [] => .ask + | r :: rest => + if globMatch r.pattern toolName then + match r.action with + | .allow => .allow + | .deny => .deny + else go rest + go rules + +/-! ## JSON shape -/ + +def Rule.toJson (r : Rule) : Json := + Json.mkObj [ + ("pattern", Json.str r.pattern), + ("action", Json.str r.action.toString) + ] + +def Rule.fromJson? (j : Json) : Option Rule := do + let pat ← (j.getObjVal? "pattern").toOption.bind (·.getStr?.toOption) + let act ← (j.getObjVal? "action").toOption.bind (·.getStr?.toOption) + let action ← Action.ofString? act + return { pattern := pat, action } + +def rulesToJson (rs : List Rule) : Json := + Json.mkObj [("rules", Json.arr (rs.toArray.map Rule.toJson))] + +def rulesFromJson (j : Json) : List Rule := + let arr := (j.getObjVal? "rules").toOption.bind (·.getArr?.toOption) |>.getD #[] + arr.toList.filterMap Rule.fromJson? + +/-! ## Disk I/O -/ + +private def filePath (storeDir : String) : String := s!"{storeDir}/policy.json" + +private def ensureDir (dir : String) : IO Unit := do + unless (← System.FilePath.pathExists dir) do + IO.FS.createDirAll dir + +/-- Load rules from the policy file. Returns `[]` when the file + doesn't exist (fresh install) or is unreadable. -/ +def load (storeDir : String) : IO (List Rule) := do + let path := filePath storeDir + unless ← System.FilePath.pathExists path do return [] + try + let src ← IO.FS.readFile path + match Json.parse src with + | .error _ => return [] + | .ok j => return rulesFromJson j + catch _ => return [] + +/-- Persist a rule list. Overwrites the file. -/ +def save (storeDir : String) (rules : List Rule) : IO Unit := do + ensureDir storeDir + IO.FS.writeFile (filePath storeDir) (rulesToJson rules).pretty + +/-- Append a rule, skipping duplicates of `(pattern, action)`. + Returns the new rule list. -/ +def append (storeDir : String) (r : Rule) : IO (List Rule) := do + let rs ← load storeDir + let dup := rs.any fun x => x.pattern == r.pattern && x.action == r.action + let rs' := if dup then rs else rs ++ [r] + save storeDir rs' + return rs' + +/-- Drop the rule at the given index. No-op when out of range. + Returns the new rule list. -/ +def deleteAt (storeDir : String) (idx : Nat) : IO (List Rule) := do + let rs ← load storeDir + let rs' := rs.toArray.zipIdx + |>.filterMap (fun (r, i) => if i == idx then none else some r) + |>.toList + save storeDir rs' + return rs' + +/-! ## Live policy ref + +Hot-reloadable in-memory cache for the orchestrator. UIs and the +orchestrator share the same `IO.Ref` so editing rules from the UI +takes effect on the next tool call without a restart. -/ + +structure LiveRef where + ref : IO.Ref (List Rule) + storeDir : String + +def LiveRef.fromDisk (storeDir : String) : IO LiveRef := do + let rs ← load storeDir + let ref ← IO.mkRef rs + return { ref, storeDir } + +def LiveRef.get (lr : LiveRef) : IO (List Rule) := lr.ref.get + +def LiveRef.check (lr : LiveRef) (toolName : String) : IO Decision := do + let rs ← lr.ref.get + return Policy.check rs toolName + +/-- Add a rule and persist. Idempotent on duplicates. -/ +def LiveRef.append (lr : LiveRef) (r : Rule) : IO Unit := do + let rs ← Policy.append lr.storeDir r + lr.ref.set rs + +def LiveRef.deleteAt (lr : LiveRef) (idx : Nat) : IO Unit := do + let rs ← Policy.deleteAt lr.storeDir idx + lr.ref.set rs + +end LeanTea.Llm.Policy diff --git a/LeanTea/Net/Server.lean b/LeanTea/Net/Server.lean index c298f49..e65dca2 100644 --- a/LeanTea/Net/Server.lean +++ b/LeanTea/Net/Server.lean @@ -60,6 +60,19 @@ partial def serveLoop (server : Socket.Server) (handler : Handler) : IO Unit := handleConn handler client serveLoop server handler +/-- Concurrent variant: each accepted connection is handled in its + own `IO.asTask` so a long-running handler doesn't stall the rest + of the server. Use this for apps where one request can block + waiting on another (e.g. a chat UI that asks the user to approve + a tool call via a separate API endpoint). The single-threaded + `serveLoop` above is otherwise preferable — simpler lifetimes, + no task-spawn overhead. -/ +partial def serveLoopConcurrent (server : Socket.Server) (handler : Handler) + : IO Unit := do + let client ← (server.accept).block + let _ ← IO.asTask (handleConn handler client) + serveLoopConcurrent server handler + /-- Parse a dotted IPv4 string ("127.0.0.1") into a `Std.Net.IPv4Addr`. Returns `0.0.0.0` if parsing fails. -/ def parseIPv4 (s : String) : IPv4Addr := @@ -68,6 +81,21 @@ def parseIPv4 (s : String) : IPv4Addr := | [a, b, c, d] => ⟨#v[a.toUInt8, b.toUInt8, c.toUInt8, d.toUInt8]⟩ | _ => zero +/-- Same as `serve`, but accept loop hands each connection to a + concurrent task. Use when a handler can block on a user / external + event that has to be resolved via another HTTP request. -/ +def serveConcurrent (port : UInt16 := 8001) (host : String := "0.0.0.0") + (handler : Handler) : IO Unit := do + let server ← Socket.Server.mk + let addr : SocketAddress := .v4 { + addr := parseIPv4 host, + port := port + } + server.bind addr + server.listen 64 + IO.eprintln s!"serving (concurrent) on http://{host}:{port}/" + serveLoopConcurrent server handler + def serve (port : UInt16 := 8001) (host : String := "0.0.0.0") (handler : Handler) : IO Unit := do let server ← Socket.Server.mk diff --git a/LeanTea/Tui.lean b/LeanTea/Tui.lean new file mode 100644 index 0000000..247709b --- /dev/null +++ b/LeanTea/Tui.lean @@ -0,0 +1,5 @@ +import LeanTea.Tui.Core +import LeanTea.Tui.Combinator +import LeanTea.Tui.Element +import LeanTea.Tui.App +import LeanTea.Tui.Test diff --git a/LeanTea/Tui/App.lean b/LeanTea/Tui/App.lean new file mode 100644 index 0000000..e527ab4 --- /dev/null +++ b/LeanTea/Tui/App.lean @@ -0,0 +1,205 @@ +import LeanTea.Tui.Core +import LeanTea.Tui.Combinator + +/-! # LeanTea.Tui.App — the IO side: terminal setup, repaint, key read loop + +The pure widget tree from `Core` + `Combinator` + `Element` only +becomes a running app via `App.run`. The app: + + 1. Switches the terminal into raw mode + alt-screen + hides cursor. + 2. Renders the root widget to a `Box`, emits ANSI to repaint. + 3. Reads one keystroke (`Key`), routes it: + - Tab / Shift-Tab cycle focus across `RunOpts.focusOrder`. + - Anything else is passed to the root widget's `onKey`. If it + returns a `msg`, the app calls `update` and re-renders. + 4. Repeats until `App.quitWhen state` returns true. + +Terminal teardown lives in a `try ... finally`, so a crash leaves +the user's shell intact. + +## Known wart of v0 + +Focus traversal asks the caller to pass `RunOpts.focusOrder` +explicitly. Combinators (`vbox`, `hbox`) don't expose children, so +the framework can't auto-detect focusable ids from the tree. A +future revision will introduce a `Widget.focusables` visitor and +drop the manual list. +-/ + +namespace LeanTea.Tui + +/-! ## App definition -/ + +structure App (s : Type) (m : Type) where + init : s + view : s → Widget m + update : m → s → s + /-- The app exits cleanly the first time `quitWhen state` is true. -/ + quitWhen : s → Bool := fun _ => false + /-- Widget id to focus initially. Empty = first id in `RunOpts.focusOrder`. -/ + initialFocus : String := "" + +structure RunOpts where + width : Nat := 100 + height : Nat := 30 + /-- Explicit focus order, in tab cycle order. Must include every + `focusId` you want Tab to reach. -/ + focusOrder : List String := [] + +/-! ## ANSI primitives -/ + +private def esc (s : String) : String := s!"\x1b[{s}" + +private def clearScreen : String := esc "2J" ++ esc "H" +private def cursorHome : String := esc "H" +private def hideCursor : String := esc "?25l" +private def showCursor : String := esc "?25h" +private def altScreenOn : String := esc "?1049h" +private def altScreenOff : String := esc "?1049l" +private def reset : String := esc "0m" + +private def colorCode (c : Color) (bg : Bool) : String := + let base := if bg then 40 else 30 + match c with + | .default => esc s!"{base + 9}m" + | .black => esc s!"{base + 0}m" + | .red => esc s!"{base + 1}m" + | .green => esc s!"{base + 2}m" + | .yellow => esc s!"{base + 3}m" + | .blue => esc s!"{base + 4}m" + | .magenta => esc s!"{base + 5}m" + | .cyan => esc s!"{base + 6}m" + | .white => esc s!"{base + 7}m" + | .bright cc => + let bbase := if bg then 100 else 90 + match cc with + | .black => esc s!"{bbase + 0}m" + | .red => esc s!"{bbase + 1}m" + | .green => esc s!"{bbase + 2}m" + | .yellow => esc s!"{bbase + 3}m" + | .blue => esc s!"{bbase + 4}m" + | .magenta => esc s!"{bbase + 5}m" + | .cyan => esc s!"{bbase + 6}m" + | .white => esc s!"{bbase + 7}m" + | _ => esc s!"{base + 9}m" + +private def styleCode (s : Style) : String := Id.run do + let mut out := reset + out := out ++ colorCode s.fg false + out := out ++ colorCode s.bg true + if s.bold then out := out ++ esc "1m" + if s.dim then out := out ++ esc "2m" + if s.inverse then out := out ++ esc "7m" + return out + +/-- Render a Box to one ANSI string, batching consecutive cells of + the same style so we don't emit a fresh escape per character. -/ +def renderBoxAnsi (box : Box) : String := Id.run do + let mut out : String := cursorHome ++ reset + let mut curStyle : Style := { fg := .red } -- non-default so first cell forces an emit + for r in [:box.height] do + let row := box.cells[r]! + for c in [:box.width] do + let cell := row[c]! + if cell.style != curStyle then + out := out ++ styleCode cell.style + curStyle := cell.style + out := out.push cell.ch + out := out ++ reset ++ "\n" + curStyle := {} + return out ++ reset + +/-! ## Focus cycling -/ + +private def cycleNext (order : List String) (cur : String) : String := + let rec loop : List String → Option String → String + | [], _ => order.head?.getD "" + | x :: xs, none => loop xs (if x == cur then some x else none) + | x :: _, some _ => x + match order with + | [] => "" + | first :: _ => + if cur.isEmpty then first + else loop order none + +private def cyclePrev (order : List String) (cur : String) : String := + cycleNext order.reverse cur + +/-! ## Raw terminal mode (POSIX, via stty) -/ + +private def sttyRawOn : IO Unit := do + let _ ← IO.Process.run { cmd := "sh", args := #["-c", "stty raw -echo < /dev/tty"] } + pure () + +private def sttyRawOff : IO Unit := do + let _ ← IO.Process.run { cmd := "sh", args := #["-c", "stty -raw echo < /dev/tty"] } + pure () + +/-! ## Key reading -/ + +partial def readKey (stdin : IO.FS.Stream) : IO Key := do + let b0 ← stdin.read 1 + if b0.size == 0 then return Key.esc + let c0 := b0[0]! + if c0 == 0x1b then + let b1 ← stdin.read 1 + if b1.size == 0 then return Key.esc + let c1 := b1[0]! + if c1 == 0x5b /- [ -/ then + let b2 ← stdin.read 1 + if b2.size == 0 then return Key.esc + match b2[0]! with + | 0x41 /- A -/ => return Key.up + | 0x42 /- B -/ => return Key.down + | 0x43 /- C -/ => return Key.right + | 0x44 /- D -/ => return Key.left + | 0x46 /- F -/ => return Key.end_ + | 0x48 /- H -/ => return Key.home + | 0x5a /- Z -/ => return Key.shiftTab + | _ => return Key.esc + else + return Key.esc + else if c0 == 0x09 then return Key.tab + else if c0 == 0x0d || c0 == 0x0a then return Key.enter + else if c0 == 0x7f || c0 == 0x08 then return Key.backspace + else if c0 < 0x20 then + let letter := Char.ofNat (c0.toNat + 0x60) + return Key.ctrl letter + else + return Key.char (Char.ofNat c0.toNat) + +/-! ## Main loop -/ + +partial def App.runWith (app : App s m) (opts : RunOpts) : IO Unit := do + let stdout ← IO.getStdout + let stdin ← IO.getStdin + stdout.putStr (altScreenOn ++ hideCursor ++ clearScreen) + stdout.flush + sttyRawOn + let mut state := app.init + let mut focused := + if app.initialFocus.isEmpty then opts.focusOrder.head?.getD "" else app.initialFocus + try + while !(app.quitWhen state) do + let w := app.view state + let box := w.render opts.width opts.height (focused == w.focusId) + stdout.putStr (renderBoxAnsi box) + stdout.flush + let key ← readKey stdin + match key with + | .ctrl 'c' => break + | .tab => focused := cycleNext opts.focusOrder focused + | .shiftTab => focused := cyclePrev opts.focusOrder focused + | _ => + match w.dispatchKey focused key with + | none => pure () + | some msg => state := app.update msg state + finally + sttyRawOff + stdout.putStr (showCursor ++ altScreenOff ++ reset) + stdout.flush + +def App.run (app : App s m) : IO Unit := + App.runWith app {} + +end LeanTea.Tui diff --git a/LeanTea/Tui/Combinator.lean b/LeanTea/Tui/Combinator.lean new file mode 100644 index 0000000..44dcc72 --- /dev/null +++ b/LeanTea/Tui/Combinator.lean @@ -0,0 +1,199 @@ +import LeanTea.Tui.Core + +/-! # LeanTea.Tui.Combinator — vbox / hbox / border / padding / withStyle + +Layout combinators that take widgets and produce widgets. Each +combinator's `render` lays out children inside the allocated +`width × height`. Each combinator's `onKey` is dispatched by the +app — combinators themselves don't consume keys (children do). + +`vbox` and `hbox` allocate space: + + * Children with a `prefHeight` (vbox) or `prefWidth` (hbox) get + exactly that much (clamped to remaining space). + * Whatever's left is split evenly among unconstrained children. + +This is not flex-grow / flex-shrink — too much machinery for a TUI +of this scale. Use `prefHeight` for headers/footers and let the +single "content area" eat the remainder. +-/ + +namespace LeanTea.Tui + +/-! ## vbox -/ + +/-- Allocate space (height for vbox, width for hbox) to children. + Children with a `Some n` pref get exactly that much (clamped to + remaining); the rest is split evenly among `None` children. -/ +private def splitV (total : Nat) (prefs : List (Option Nat)) : List Nat := Id.run do + let mut allocated : Array Nat := #[] + let mut used : Nat := 0 + let mut unspecified : Nat := 0 + for p in prefs do + match p with + | some n => + let take := if used + n > total then total - used else n + allocated := allocated.push take + used := used + take + | none => + allocated := allocated.push 0 + unspecified := unspecified + 1 + let remaining := if used > total then 0 else total - used + let each := if unspecified > 0 then remaining / unspecified else 0 + let mut extra := if unspecified > 0 then remaining % unspecified else 0 + let mut out : Array Nat := #[] + let mut i : Nat := 0 + for p in prefs do + match p with + | some _ => + out := out.push allocated[i]! + | none => + let bonus := if extra > 0 then 1 else 0 + if extra > 0 then extra := extra - 1 + out := out.push (each + bonus) + i := i + 1 + return out.toList + +/-- Pad / crop a Box to exactly (`width × height`). Used by the box + layouts to enforce that each child occupies its allocation even + when the child's own renderer ignores the size hint. -/ +private def fitBox (w h : Nat) (b : Box) : Box := Id.run do + -- Truncate / pad height + let mut rows := b.cells + if rows.size > h then rows := rows.extract 0 h + while rows.size < h do + rows := rows.push (Array.replicate b.width ({} : Cell)) + -- Truncate / pad each row's width + let mut fixed : Array (Array Cell) := #[] + for row in rows do + let mut r := row + if r.size > w then r := r.extract 0 w + while r.size < w do r := r.push {} + fixed := fixed.push r + return { width := w, height := h, cells := fixed } + +def vbox (children : List (Widget m)) : Widget m := { + render := fun w h _focused => Id.run do + let heights := splitV h (children.map (·.prefHeight)) + let mut acc : Box := Box.empty w 0 + let mut idx := 0 + for child in children do + let ch := heights[idx]! + if ch > 0 then + let raw := child.render w ch false + let fitted := fitBox w ch raw + acc := Box.vcat acc fitted + idx := idx + 1 + if acc.height < h then + acc := Box.vcat acc (Box.empty w (h - acc.height)) + return acc, + onKey := fun _ => none, + children := children +} + +/-! ## hbox -/ + +def hbox (children : List (Widget m)) : Widget m := { + render := fun w h _focused => Id.run do + let widths := splitV w (children.map (·.prefWidth)) + let mut acc : Box := Box.empty 0 h + let mut idx := 0 + for child in children do + let cw := widths[idx]! + if cw > 0 then + let raw := child.render cw h false + let fitted := fitBox cw h raw + acc := Box.hcat acc fitted + idx := idx + 1 + if acc.width < w then + acc := Box.hcat acc (Box.empty (w - acc.width) h) + return acc, + onKey := fun _ => none, + children := children +} + +/-! ## padding -/ + +def padding (p : Nat) (child : Widget m) : Widget m := { + render := fun w h focused => Id.run do + if w < 2*p || h < 2*p then + return Box.empty w h + else + let innerW := w - 2*p + let innerH := h - 2*p + let inner := child.render innerW innerH focused + let frame := Box.empty w h + return Box.overlay frame inner p p, + onKey := child.onKey, + focusId := child.focusId, + children := [child] +} + +/-! ## border -/ + +inductive BorderStyle where + | none | line | doubleLine | dashed + deriving BEq + +private def borderChars (s : BorderStyle) : Char × Char × Char × Char × Char × Char := + match s with + | .none => (' ', ' ', ' ', ' ', ' ', ' ') + | .line => ('┌', '┐', '└', '┘', '─', '│') + | .doubleLine => ('╔', '╗', '╚', '╝', '═', '║') + | .dashed => ('+', '+', '+', '+', '-', '|') + +def border (style : BorderStyle := .line) (child : Widget m) : Widget m := { + render := fun w h focused => Id.run do + if w < 2 || h < 2 then + return child.render w h focused + let (tl, tr, bl, br, hChar, vChar) := borderChars style + let lineStyle : Style := if focused then { fg := .cyan, bold := true } else {} + -- Top row: tl + hChar*(w-2) + tr + let topRow := #[{ ch := tl, style := lineStyle : Cell }] + ++ Array.replicate (w - 2) ({ ch := hChar, style := lineStyle } : Cell) + ++ #[{ ch := tr, style := lineStyle : Cell }] + let botRow := #[{ ch := bl, style := lineStyle : Cell }] + ++ Array.replicate (w - 2) ({ ch := hChar, style := lineStyle } : Cell) + ++ #[{ ch := br, style := lineStyle : Cell }] + -- Middle rows: vChar + (w-2) cells + vChar (cells from child) + let inner := child.render (w - 2) (h - 2) focused + let mut rows : Array (Array Cell) := #[topRow] + for r in [:h - 2] do + let innerRow := if r < inner.height then inner.cells[r]! else Array.replicate (w - 2) ({} : Cell) + let row := #[{ ch := vChar, style := lineStyle : Cell }] + ++ innerRow + ++ #[{ ch := vChar, style := lineStyle : Cell }] + rows := rows.push row + rows := rows.push botRow + return { width := w, height := h, cells := rows }, + onKey := child.onKey, + focusId := child.focusId, + children := [child] +} + +/-! ## withStyle — paint every cell the child produces -/ + +def withStyle (s : Style) (child : Widget m) : Widget m := { + render := fun w h focused => + let inner := child.render w h focused + let cells := inner.cells.map (fun row => row.map (fun c => { c with style := s })) + { inner with cells := cells }, + onKey := child.onKey, + focusId := child.focusId, + children := [child] +} + +/-! ## text — non-interactive label (1 row) -/ + +def text (s : String) (style : Style := {}) : Widget m := { + render := fun w _h _focused => Box.text w s style, + prefHeight := some 1 +} + +/-! ## blank — empty filler -/ + +def blank : Widget m := { + render := fun w h _ => Box.empty w h +} + +end LeanTea.Tui diff --git a/LeanTea/Tui/Core.lean b/LeanTea/Tui/Core.lean new file mode 100644 index 0000000..a9c6226 --- /dev/null +++ b/LeanTea/Tui/Core.lean @@ -0,0 +1,253 @@ +/-! # LeanTea.Tui.Core — Box, Cell, Style, Key, Widget + +A tiny brick-style TUI substrate. The whole rendering model is: + + * `Box` — a width × height grid of `Cell`s, where each `Cell` is a + `Char` + a `Style`. + * `Widget m` — a renderer `Nat → Nat → Bool → Box` (width, height, + focused?) plus an `onKey` that translates a `Key` into a message + of type `m` (or `none` to ignore). + * Layout combinators (`vbox`, `hbox`, `border`, `padding`, + `withStyle`) and elements (`button`, `input`, `panel`, `text`) + live in sibling files. + +Everything is pure — the IO side (read keystroke, write to terminal, +clear screen) lives only in `LeanTea.Tui.App`. That separation is +what lets the same widget tree be rendered and key-driven by +`LeanTea.Tui.Test` without a real TTY. + +## Why not just hand-roll ANSI + +We tried that in `examples/LlmChatTui` and ran into the same trap +the ChuHan `button` kit fixed: every UI element wires render + +event handling separately and you eventually forget one. With +widgets you compose `button("primary", .quit, "/quit")` and the +key handling, style, and focus highlight come along for free. +-/ + +namespace LeanTea.Tui + +/-! ## Style — color + attribute bits -/ + +inductive Color where + | default + | black | red | green | yellow | blue | magenta | cyan | white + | bright (c : Color) + deriving Inhabited, BEq, Repr + +structure Style where + fg : Color := .default + bg : Color := .default + bold : Bool := false + dim : Bool := false + inverse : Bool := false + deriving Inhabited, BEq, Repr + +def Style.default : Style := {} + +/-! ## Cell + Box + +`Box.cells[r][c]` is row r, column c (both 0-indexed). Box has fixed +size; combinators that change size return a new Box. Out-of-range +access via `cellAt` returns a default ' ' cell so renderers can +overlay without bounds checks at every step. -/ + +structure Cell where + ch : Char := ' ' + style : Style := {} + deriving Inhabited, BEq + +structure Box where + width : Nat + height : Nat + /-- `cells[r][c]` for 0 ≤ r < height, 0 ≤ c < width. -/ + cells : Array (Array Cell) + deriving Inhabited + +namespace Box + +/-- An empty box of given size filled with the default cell. -/ +def empty (width height : Nat) : Box := { + width, height, + cells := Array.replicate height (Array.replicate width ({} : Cell)) +} + +/-- A box filled with one character + style. -/ +def filled (width height : Nat) (ch : Char) (style : Style := {}) : Box := { + width, height, + cells := Array.replicate height (Array.replicate width { ch, style }) +} + +/-- A single row from a string. Truncates or pads to `width`. -/ +def textRow (width : Nat) (s : String) (style : Style := {}) : Array Cell := Id.run do + let chars := s.toList + let mut row : Array Cell := #[] + for c in chars do + if row.size < width then row := row.push { ch := c, style := style } + while row.size < width do + row := row.push { ch := ' ', style := style } + return row + +/-- One-line text box. -/ +def text (width : Nat) (s : String) (style : Style := {}) : Box := { + width, height := 1, + cells := #[textRow width s style] +} + +/-- Get cell at (r, c). Out of range → default cell. -/ +def cellAt (b : Box) (r c : Nat) : Cell := + match b.cells[r]? with + | none => {} + | some row => row[c]?.getD {} + +/-- Overlay `over` onto `under` starting at (top, left). Cells of + `over` whose `ch` is the substitute-character `'\u0000'` mean + "transparent" and don't paint. -/ +def overlay (under over : Box) (top left : Nat) : Box := Id.run do + let mut cells := under.cells + for r in [:over.height] do + let dstRow := top + r + if dstRow ≥ under.height then break + let row := cells[dstRow]! + let mut newRow := row + for c in [:over.width] do + let dstCol := left + c + if dstCol ≥ under.width then break + let cell := (over.cells[r]!)[c]! + if cell.ch != '\u0000' then + newRow := newRow.set! dstCol cell + cells := cells.set! dstRow newRow + return { under with cells := cells } + +/-- Glue two boxes side by side. Heights are unified by padding the + shorter one at the bottom with default cells. -/ +def hcat (a b : Box) : Box := Id.run do + let h := if a.height > b.height then a.height else b.height + let w := a.width + b.width + let mut rows : Array (Array Cell) := #[] + for r in [:h] do + let aRow := if r < a.height then a.cells[r]! else Array.replicate a.width ({} : Cell) + let bRow := if r < b.height then b.cells[r]! else Array.replicate b.width ({} : Cell) + rows := rows.push (aRow ++ bRow) + return { width := w, height := h, cells := rows } + +/-- Stack two boxes vertically. Widths are unified by padding the + narrower one on the right with default cells. -/ +def vcat (a b : Box) : Box := Id.run do + let w := if a.width > b.width then a.width else b.width + let pad := fun (row : Array Cell) (rowW : Nat) => + if rowW < w then row ++ Array.replicate (w - rowW) ({} : Cell) else row + let mut rows : Array (Array Cell) := #[] + for row in a.cells do rows := rows.push (pad row a.width) + for row in b.cells do rows := rows.push (pad row b.width) + return { width := w, height := a.height + b.height, cells := rows } + +/-- Flatten the box to a list of plain text rows (no style info). + Used by the test framework for substring assertions. -/ +def toRows (b : Box) : List String := + b.cells.toList.map (fun row => + String.mk (row.toList.map (fun c => c.ch))) + +/-- All rows joined with '\n'. -/ +def toString (b : Box) : String := + String.intercalate "\n" b.toRows + +/-- Does any row contain `needle` as a substring? Test convenience. -/ +def contains (b : Box) (needle : String) : Bool := + b.toRows.any (fun row => (row.splitOn needle).length > 1) + +end Box + +/-! ## Key — what `onKey` receives -/ + +inductive Key where + | char (c : Char) + | enter + | tab + | shiftTab + | esc + | backspace + | delete + | up | down | left | right + | home | end_ + | ctrl (c : Char) + deriving Inhabited, BEq, Repr + +/-! ## Widget + +A widget is fully described by its renderer + key handler. `focusId` +is `none` when the widget can't receive focus; otherwise the app's +focus traversal will visit it. -/ + +structure Widget (msg : Type) where + /-- Render to a Box that fits exactly the given width / height. + `focused` is whether this widget currently holds focus, so it + can paint itself differently (e.g. inverse colour highlight). -/ + render : Nat → Nat → Bool → Box + /-- Key handling. `none` = "I don't consume this key, let the app + route it (e.g. Tab to next widget)". -/ + onKey : Key → Option msg := fun _ => none + /-- Used by the app's focus traversal. `""` = not focusable. -/ + focusId : String := "" + /-- Natural size hint. The app honours it inside vbox/hbox if it + can — otherwise it stretches to fit. `none` = "as big as you'll + give me". -/ + prefWidth : Option Nat := none + prefHeight : Option Nat := none + /-- Direct child widgets, in render order. Combinators (`vbox`, + `hbox`, `border`, …) set this so the App's focus dispatch can + walk the tree to locate the widget that owns the current focus + id. Leaf widgets leave it empty. -/ + children : List (Widget msg) := [] + +namespace Widget + +/-- Walk the tree depth-first and apply `key` to the widget whose + `focusId` matches `focused`. Returns the first `some msg` found, + `none` if no focused widget consumes the key. -/ +partial def dispatchKey (w : Widget m) (focused : String) (key : Key) : Option m := + if w.focusId == focused && !focused.isEmpty then + w.onKey key + else + let rec go : List (Widget m) → Option m + | [] => none + | c :: rest => + match dispatchKey c focused key with + | some m => some m + | none => go rest + go w.children + +/-- All focus ids in the tree, depth-first, in render order. + Used by the app to derive the Tab cycle order without the + caller having to declare it. -/ +partial def focusables (w : Widget m) : List String := + let here := if w.focusId.isEmpty then [] else [w.focusId] + here ++ (w.children.flatMap focusables) + +/-- Render with focus propagation: each focusable child gets + `focused = true` only if its id matches `focusId`. Combinators' + `render` only sees `focused = false` since combinators themselves + aren't focusable. -/ +partial def renderFocused (w : Widget m) (width height : Nat) (focusId : String) : Box := + if w.children.isEmpty then + -- Leaf widget: just render with the right focused flag. + w.render width height (w.focusId == focusId && !focusId.isEmpty) + else + -- Combinator: render via its layout but with children that + -- propagate focus themselves. We do this by building a + -- "shadow" widget tree where each child's render closure is + -- replaced with one that knows about focus. Simpler: just + -- call the combinator's render which already calls child + -- renders — but with no awareness of focus. So we'd need + -- the combinator's render to accept a focus-resolver. + -- + -- For v0 we cheat: combinators (vbox/hbox/border) call their + -- children's `render` directly without focus. Until that's + -- refactored, we approximate by walking the children and + -- compositing in App.runWith. So this function is mostly a + -- placeholder for future use. + w.render width height (w.focusId == focusId && !focusId.isEmpty) + +end Widget + +end LeanTea.Tui diff --git a/LeanTea/Tui/Element.lean b/LeanTea/Tui/Element.lean new file mode 100644 index 0000000..c15fda7 --- /dev/null +++ b/LeanTea/Tui/Element.lean @@ -0,0 +1,154 @@ +import LeanTea.Tui.Core +import LeanTea.Tui.Combinator + +/-! # LeanTea.Tui.Element — button / input / panel / listView + +Concrete focusable widgets. Each follows the same kind-driven idea +as the ChuHan button kit: + + * `kind` selects style (and, for buttons, the SFX in the GUI case) + * `msg` is what's emitted on activation + * `label` / `value` / `items` is the payload + +A widget's `focusId` is what the app's focus traversal sees; we +use a structured `prefix-id` string so it stays unique even when +the same widget kind appears multiple times in the same view. + +## Why a single Element file (not one-per-widget) + +Each widget is ~25 lines; the count is bounded. Splitting per file +adds 4-5 imports per app build. Once we cross ~6 widgets I'll +re-split. +-/ + +namespace LeanTea.Tui + +/-! ## button — single-line activation -/ + +inductive ButtonKind where + | primary | secondary | danger | ghost + deriving BEq + +private def buttonStyle (kind : ButtonKind) (focused : Bool) : Style := + match kind, focused with + | .primary, true => { fg := .black, bg := .cyan, bold := true } + | .primary, false => { fg := .cyan, bold := true } + | .secondary, true => { fg := .black, bg := .white } + | .secondary, false => { fg := .white } + | .danger, true => { fg := .black, bg := .red, bold := true } + | .danger, false => { fg := .red, bold := true } + | .ghost, true => { fg := .yellow, inverse := true } + | .ghost, false => { fg := .yellow, dim := true } + +def button (id : String) (kind : ButtonKind) (label : String) (msg : m) : Widget m := + let labelLen := label.length + let render := fun w _h focused => + let chrome := if focused then "▶ " else " " + let inner := chrome ++ label + Box.text w inner (buttonStyle kind focused) + { + render := render, + onKey := fun + | .enter => some msg + | .char ' ' => some msg + | _ => none, + focusId := id, + prefHeight := some 1, + prefWidth := some (labelLen + 2) + } + +/-! ## input — single-line text entry + +The widget renders the current value and the caret when focused. It +fires one `msg` per keystroke; the parent state holds the actual +string and is the source of truth — this keeps focus & state +ownership clean. + +Common pattern: parent decides how to wire each key via `dispatch`. -/ + +structure InputBindings (m : Type) where + onChar : Char → m + onBackspace : m + onSubmit : m + onEsc : Option m := none + +def input (id : String) (value : String) (bindings : InputBindings m) : Widget m := + let render := fun w _h focused => + let caret := if focused then "│" else " " + let visible := value ++ caret + -- truncate from the left if value overflows so caret stays on screen + let display := + if visible.length > w then + let drop := visible.length - w + (visible.drop drop).toString + else visible + let style : Style := if focused then { bg := .blue, fg := .white } else { dim := true } + Box.text w display style + { + render := render, + onKey := fun + | .char c => some (bindings.onChar c) + | .backspace => some bindings.onBackspace + | .enter => some bindings.onSubmit + | .esc => bindings.onEsc + | _ => none, + focusId := id, + prefHeight := some 1 + } + +/-! ## panel — bordered container with a title -/ + +def panel (title : String) (style : BorderStyle := .line) (body : Widget m) : Widget m := + let titleBar : Widget m := { + render := fun w _h focused => + let mark := if focused then " ◆ " else " · " + let line := mark ++ title + let s : Style := if focused then { fg := .cyan, bold := true } else { fg := .white, bold := true } + Box.text w line s, + prefHeight := some 1 + } + border style (vbox [titleBar, body]) + +/-! ## listView — scrollable item list with cursor + +Items are plain strings (formatted by the caller). The selected +index is owned by the parent state. The list renders a window of +items around the selection. -/ + +def listView (id : String) (items : List String) (selected : Nat) + (onUp onDown : m) (onSelect : m) : Widget m := + let render := fun w h focused => Id.run do + if items.isEmpty || h == 0 then + return Box.empty w h + -- Window: keep selected in view. + let total := items.length + let start := + if selected >= h then min (selected - h + 1) (total - 1) else 0 + let window := (items.drop start).take h + let mut rows : Array (Array Cell) := #[] + let mut idx := start + for itm in window do + let isSel := idx == selected + let chrome := if isSel then "▶ " else " " + let line := chrome ++ itm + let s : Style := + if isSel && focused then { fg := .black, bg := .cyan } + else if isSel then { bold := true } + else {} + rows := rows.push (Box.textRow w line s) + idx := idx + 1 + -- Pad bottom. + while rows.size < h do + rows := rows.push (Array.replicate w ({} : Cell)) + return { width := w, height := h, cells := rows } + { + render := render, + onKey := fun + | .up => some onUp + | .down => some onDown + | .enter => some onSelect + | _ => none, + focusId := id + } + +end LeanTea.Tui diff --git a/LeanTea/Tui/Test.lean b/LeanTea/Tui/Test.lean new file mode 100644 index 0000000..3d7eb0a --- /dev/null +++ b/LeanTea/Tui/Test.lean @@ -0,0 +1,116 @@ +import LeanTea.Tui.Core +import LeanTea.Tui.Combinator +import LeanTea.Tui.App + +/-! # LeanTea.Tui.Test — pure render + key dispatch, no TTY + +The widget kit's main payoff: every part of `App.run`'s loop except +the IO bits (terminal modes, stdin reads, stdout writes) is a pure +function. This file exposes those pure pieces as a testable harness: + + * `mkSession init view update` — wrap an app + * `Session.render` — produce the current `Box` for inspection + * `Session.sendKey` — fire a synthetic `Key` and return the new state + * `Session.containsText` / `Session.rowAt` — text assertions + +The same widgets can be unit-tested without running a real terminal. +Combined with LSpec we can keep regression tests of layout + +keybindings cheap. + +## Pattern + +``` +let app : App Counter Msg := { init, view, update } +let s := mkSession app { width := 20, height := 3 } +let s' := s.sendKey .enter +assert (s'.state.count == 1) +let s'' := s'.sendKey (.char 'q') +assert (s''.box.contains "1") +``` +-/ + +namespace LeanTea.Tui + +/-! ## Session — the test harness -/ + +structure Session (s : Type) (m : Type) where + app : App s m + width : Nat := 80 + height : Nat := 24 + state : s + focused : String := "" + /-- Per-session focus traversal order. Same shape as `RunOpts.focusOrder`. -/ + focusOrder : List String := [] + +/-- Build a Session from an App. -/ +def mkSession (app : App s m) (width : Nat := 80) (height : Nat := 24) + (focusOrder : List String := []) : Session s m := { + app := app, + width := width, + height := height, + state := app.init, + focused := if app.initialFocus.isEmpty then focusOrder.head?.getD "" else app.initialFocus, + focusOrder := focusOrder +} + +namespace Session + +/-- Render the current state to a `Box`. -/ +def render (s : Session a m) : Box := + let w := s.app.view s.state + w.render s.width s.height (s.focused == w.focusId) + +/-- Flatten the rendered Box to a single string with rows separated + by '\n'. Convenient for snapshot tests. -/ +def text (s : Session a m) : String := + s.render.toString + +/-- Does any rendered row contain `needle`? -/ +def containsText (s : Session a m) (needle : String) : Bool := + s.render.contains needle + +/-- Get row `r` as plain text. Useful when ordering matters. -/ +def rowAt (s : Session a m) (r : Nat) : String := + (s.render.toRows[r]?).getD "" + +/-- Cycle one position forward through a focus list. Returns the + first element when `cur` isn't found or is empty. -/ +private partial def cycleFwd (order : List String) (cur : String) : String := + match order with + | [] => "" + | first :: _ => + if cur.isEmpty then first + else + let rec go : List String → String + | [] => first + | [_] => first + | a :: b :: rest => + if a == cur then b else go (b :: rest) + go order + +/-- Send a single key and return the new Session. Same dispatch + logic as `App.runWith` but pure — no IO. -/ +def sendKey (s : Session a m) (key : Key) : Session a m := + match key with + | .tab => { s with focused := cycleFwd s.focusOrder s.focused } + | .shiftTab => { s with focused := cycleFwd s.focusOrder.reverse s.focused } + | _ => + let w := s.app.view s.state + match w.dispatchKey s.focused key with + | none => s + | some msg => { s with state := s.app.update msg s.state } + +/-- Send a list of keys in sequence. -/ +def sendKeys (s : Session a m) (keys : List Key) : Session a m := + keys.foldl (fun acc k => acc.sendKey k) s + +/-- Type a literal string as character keys. -/ +def typeString (s : Session a m) (text : String) : Session a m := + s.sendKeys (text.toList.map Key.char) + +/-- Has the app's quit condition fired? -/ +def isQuit (s : Session a m) : Bool := s.app.quitWhen s.state + +end Session + +end LeanTea.Tui diff --git a/examples/AgentDashboard/README.md b/examples/AgentDashboard/README.md new file mode 100644 index 0000000..9f2a9eb --- /dev/null +++ b/examples/AgentDashboard/README.md @@ -0,0 +1,97 @@ +# agent_dashboard_serve + +Visual control + telemetry for `LeanTea.Agent.Conductor`. Pair with +`browser_mcp_serve` and a small JSON playbook collection to play a +browser game. + +## Try it against the bundled toy game + +The toy at `examples/AgentDashboard/game/index.html` is a 3-button +mini-game with reward hierarchy `Big Quest (+10) > Coin (+1) > Rest +(+0.1)`. It exposes `window.__screen` / `__coins` / `__energy` so the +observer can read state cheaply. + +```bash +# 1. Serve the toy game on some port (any static file server works) +python3 -m http.server 8090 --directory examples/AgentDashboard/game & + +# 2. Launch Chrome with remote debugging (one-time per profile dir) +mkdir -p /tmp/cdp-profile +/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \ + --remote-debugging-port=9222 --user-data-dir=/tmp/cdp-profile \ + http://localhost:8090/ & + +# 3. Boot the dashboard pointed at our example playbooks +mkdir -p examples/AgentDashboard/state/playbooks +cp examples/AgentDashboard/playbooks/*.json \ + examples/AgentDashboard/state/playbooks/ + +./.lake/build/bin/agent_dashboard_serve \ + --config examples/AgentDashboard/dashboard-config.json \ + --store examples/AgentDashboard/state \ + --port 8040 + +# 4. Open the dashboard +open http://localhost:8040/ +``` + +The conductor will: +1. Observe `window.__screen` every 500 ms. +2. Filter playbooks whose `pre.whenScreen` matches the observation. +3. UCB1-rank candidates by `Stats.avgReward + sqrt(2 ln N / n)`, + using `estReward` as an optimistic prior on cold starts. +4. Run the picked playbook's `script` via the orchestrator's MCP calls. +5. Record outcome + accumulated reward; persist stats to disk. + +After ~20 runs the bandit should converge to **Big Quest** when +energy ≥ 3 and **Rest** otherwise — Coin gets squeezed out because +its reward density is dominated by Big Quest. + +## Smoke without a browser + +```bash +mkdir -p /tmp/dash-smoke/playbooks +cp examples/AgentDashboard/playbooks/*.json /tmp/dash-smoke/playbooks/ +./.lake/build/bin/agent_dashboard_serve \ + --config examples/AgentDashboard/dashboard-config-empty.json \ + --store /tmp/dash-smoke --port 8041 +``` + +The observer returns `"unknown"` every tick, no playbook matches, the +escalator prints `⚠ escalation: reason=no_match` and skips ahead 2 s. +`/api/state`, `/api/playbooks`, `/api/live` all return sane JSON — +useful for verifying wiring without booting Chrome. + +## Adding playbooks + +A playbook is one JSON file under `/playbooks/`: + +```json +{ + "id": "my_routine", + "name": "human-readable name", + "description": "...", + "pre": {"whenScreen": "hub"}, + "estReward": 5.0, + "maxBurst": 3, + "timeoutMs": 6000, + "enabled": true, + "script": { + "name": "my_routine", + "steps": [ + {"act": "click_xy", "x": 100, "y": 200}, + {"act": "wait", "ms": 500}, + {"act": "tool_call", + "tool": "browser__browser_evaluate", + "args": {"expression": "({reward: 5})"}} + ] + } +} +``` + +The conductor hot-reloads the playbook directory every tick — drop a +new file in and it shows up in the dashboard on the next refresh. + +Step shapes are the same as `LeanTea.Agent.Script.Action`: +`click_xy`, `wait`, `screenshot`, `wait_for_screen`, `tool_call`. +`tool_call` is the escape hatch — invoke any MCP tool with any args. diff --git a/examples/AgentDashboard/Serve.lean b/examples/AgentDashboard/Serve.lean new file mode 100644 index 0000000..d5b1c6c --- /dev/null +++ b/examples/AgentDashboard/Serve.lean @@ -0,0 +1,579 @@ +import LeanTea +import Lean.Data.Json + +/-! # agent_dashboard_serve — visual control + telemetry for the Conductor + +Wraps a `LeanTea.Agent.Conductor` in a single-page Web UI: + + * **Live** — current playbook, current screenshot, last N runs. + * **Playbooks** — every defined playbook with bandit stats + (runs / win-rate / avg reward / last run) and a + toggle to enable/disable. + * **Rewards** — cumulative reward chart over time. + +The dashboard owns the orchestrator and the conductor loop (boots +them at startup, spawns the loop in `IO.asTask`). Browser-side state +is read-only polling — every 1s the page re-fetches `/api/live` and +`/api/playbooks`. + +``` +agent_dashboard_serve \\ + --config examples/AgentDashboard/dashboard-config.json \\ + --store examples/AgentDashboard/state \\ + --port 8040 +``` + +The config JSON declares: the MCP servers to spawn (typically just +`browser_mcp_serve`), and the **observer** — a JS expression +evaluated against the page each tick whose return value becomes the +screen tag matched against playbook preconditions. -/ + +open Lean (Json) +open LeanTea LeanTea.Net.Http LeanTea.Net.Server +open LeanTea.Llm.McpOrchestrator +open LeanTea.Agent.Playbook +open LeanTea.Agent.Conductor + +namespace AgentDashboard + +/-! ## Config -/ + +structure FileConfig where + /-- Orchestrator config (model + servers + baseUrl + systemPrompt). + Reuses the same JSON shape as `llm_chat_*`. -/ + orchestrator : LeanTea.Llm.McpOrchestrator.FileConfig + /-- JS expression — must return a string the conductor uses as the + screen tag. Empty falls back to constant `"unknown"`. -/ + observerJs : String + /-- Loop tick in ms. -/ + tickMs : Nat := 250 + +private def loadFileConfig (path : String) : IO FileConfig := do + let src ← IO.FS.readFile path + match Json.parse src with + | .error e => throw <| IO.userError s!"config {path}: bad JSON: {e}" + | .ok j => + let orchestrator ← LeanTea.Llm.McpOrchestrator.loadConfig path + let observerJs := + (j.getObjVal? "observerJs").toOption.bind (·.getStr?.toOption) |>.getD "" + let tickMs := + match (j.getObjVal? "tickMs").toOption with + | some (.num n) => n.mantissa.toNat + | _ => 250 + return { orchestrator, observerJs, tickMs } + +/-! ## Observer + +We support exactly one kind of observer in v1: evaluate a JS expression +via the browser MCP and treat its return value as the screen tag. Also +take a screenshot every tick so the dashboard can show what the agent +sees. -/ + +/-- Pull the first `image` content block out of an MCP tool response + and return it as a `data:` URL, or `""` when there's none. -/ +private def extractScreenshot (resp : Json) : String := + let content := (resp.getObjVal? "content").toOption.getD (Json.arr #[]) + let arr := (content.getArr?).toOption.getD #[] + let firstImg := arr.findSome? fun item => + let typ := (item.getObjVal? "type").toOption.bind (·.getStr?.toOption) |>.getD "" + if typ != "image" then none else + let mime := (item.getObjVal? "mimeType").toOption.bind (·.getStr?.toOption) |>.getD "image/png" + let data := (item.getObjVal? "data").toOption.bind (·.getStr?.toOption) |>.getD "" + if data.isEmpty then none else some s!"data:{mime};base64,{data}" + firstImg.getD "" + +/-- Pull the text from a `browser_evaluate` response. -/ +private def extractEvalText (resp : Json) : Option String := + let content := (resp.getObjVal? "content").toOption.getD (Json.arr #[]) + let arr := (content.getArr?).toOption.getD #[] + arr.findSome? fun item => + let typ := (item.getObjVal? "type").toOption.bind (·.getStr?.toOption) |>.getD "" + if typ != "text" then none else + (item.getObjVal? "text").toOption.bind (·.getStr?.toOption) + +private def jsObserver (jsExpr : String) : Observer := fun orch => do + let shot ← (try orch.callTool "browser__browser_screenshot" (Json.mkObj []) + catch _ => pure Json.null) + let screenshot := extractScreenshot shot + let screen ← + if jsExpr.isEmpty then pure "unknown" + else do + let r ← (try orch.callTool "browser__browser_evaluate" (Json.mkObj [ + ("expression", Json.str jsExpr)]) + catch _ => pure Json.null) + pure ((extractEvalText r).getD "unknown").trimAscii.toString + return { screen, screenshot, extra := Json.null } + +/-! ## Server context -/ + +private structure Ctx where + cfg : Config + state : IO.Ref LiveState + storeDir : String + /-- The configured observer JS — surfaced read-only on /api/state. -/ + observerJs : String + +/-! ## JSON serialisation -/ + +private def statsToUiJson (s : Stats) : Json := + Json.mkObj [ + ("runs", Json.num (Int.ofNat s.runs)), + ("wins", Json.num (Int.ofNat s.wins)), + ("losses", Json.num (Int.ofNat s.losses)), + ("totalReward", Json.str (toString s.totalReward)), + ("avgReward", Json.str (toString s.avgReward)), + ("winRate", Json.str (toString s.winRate)), + ("lastRunMs", Json.num (Int.ofNat s.lastRunMs)), + ("recentWins", Json.arr (s.recentWins.map Json.bool)) + ] + +private def playbookToUiJson (pb : Playbook) (s : Stats) : Json := + Json.mkObj [ + ("id", Json.str pb.id), + ("name", Json.str pb.name), + ("description", Json.str pb.description), + ("whenScreen", Json.str pb.pre.whenScreen), + ("estReward", Json.str (toString pb.estReward)), + ("enabled", Json.bool pb.enabled), + ("maxBurst", Json.num (Int.ofNat pb.maxBurst)), + ("stats", statsToUiJson s) + ] + +private def historyEntryJson (h : HistoryEntry) : Json := + Json.mkObj [ + ("ts", Json.num (Int.ofNat h.ts)), + ("playbookId", Json.str h.playbookId), + ("success", Json.bool h.success), + ("reward", Json.str (toString h.reward)), + ("durationMs", Json.num (Int.ofNat h.durationMs)) + ] + +/-! ## Endpoints -/ + +private def handleState (ctx : Ctx) : IO Response := do + let paused ← ctx.cfg.paused.get + let aborted ← ctx.cfg.aborted.get + let body := Json.mkObj [ + ("paused", Json.bool paused), + ("aborted", Json.bool aborted), + ("tickMs", Json.num (Int.ofNat ctx.cfg.tickMs)), + ("observerJs", Json.str ctx.observerJs), + ("storeDir", Json.str ctx.storeDir), + ("serverCount", Json.num (Int.ofNat ctx.cfg.orch.servers.size)), + ("toolCount", Json.num (Int.ofNat ctx.cfg.orch.openAiTools.size)) + ] + return Response.text 200 body.compress + +private def handlePlaybooks (ctx : Ctx) : IO Response := do + let pbs ← listPlaybooks ctx.storeDir + let st ← ctx.state.get + let arr := pbs.map fun pb => + let s := (st.stats.get? pb.id).getD {} + playbookToUiJson pb s + let body := Json.mkObj [("playbooks", Json.arr arr)] + return Response.text 200 body.compress + +private def handleLive (ctx : Ctx) : IO Response := do + let st ← ctx.state.get + let body := Json.mkObj [ + ("current", Json.str st.current), + ("screen", Json.str st.observation.screen), + ("screenshot", Json.str st.observation.screenshot), + ("cumReward", Json.str (toString st.cumReward)), + ("burstId", Json.str st.burstId), + ("burstCount", Json.num (Int.ofNat st.burstCount)), + ("history", Json.arr (st.history.map historyEntryJson)) + ] + return Response.text 200 body.compress + +private def handleControl (ctx : Ctx) (req : Request) : IO Response := do + match Json.parse (String.fromUTF8! req.body) with + | .error e => return Response.text 400 (Json.mkObj [("error", Json.str s!"bad json: {e}")]).compress + | .ok j => + let action := (j.getObjVal? "action").toOption.bind (·.getStr?.toOption) |>.getD "" + match action with + | "pause" => ctx.cfg.paused.set true; return Response.text 200 "{\"ok\":true}" + | "resume" => ctx.cfg.paused.set false; return Response.text 200 "{\"ok\":true}" + | "abort" => ctx.cfg.aborted.set true; return Response.text 200 "{\"ok\":true}" + | "reset-stats" => + ctx.state.modify (fun s => { s with stats := ∅, cumReward := 0.0, history := #[] }) + saveAllStats ctx.storeDir ∅ + return Response.text 200 "{\"ok\":true}" + | _ => return Response.text 400 (Json.mkObj [("error", Json.str s!"unknown action: {action}")]).compress + +private def handleToggle (ctx : Ctx) (req : Request) : IO Response := do + match Json.parse (String.fromUTF8! req.body) with + | .error e => return Response.text 400 (Json.mkObj [("error", Json.str s!"bad json: {e}")]).compress + | .ok j => + let id := (j.getObjVal? "id").toOption.bind (·.getStr?.toOption) |>.getD "" + let enabled := + match (j.getObjVal? "enabled").toOption with + | some (.bool b) => b + | _ => true + match ← loadPlaybook ctx.storeDir id with + | none => + let body := (Json.mkObj [("error", Json.str s!"playbook {id} not found")]).compress + return Response.text 404 body + | some pb => + let pb' := { pb with enabled } + pb'.save ctx.storeDir + return Response.text 200 "{\"ok\":true}" + +/-! ## Inline HTML page -/ + +private def pageHtml : String := " + + + + +agent-dashboard + + + +
+

agent-dashboard

+ loading… + running + + + + +
+
+
live
+
playbooks
+
rewards
+
+
+
+
+

screen

+
+
current playbook
+
observed screen
+
cum reward
0
+
burst
+
+
+
+

screenshot

+ \"(no +
+
+
+

recent runs

+ + + +
whenplaybookokrewardms
+
+
+
+ + + + + + +
idscreenestrunswinswin-rateavg rewardrecentenabled
+
+
+ +
+ + + +" + +/-! ## Path matching + handler -/ + +private def matchPath (path : String) : Option String := + if path == "/api/playbooks/toggle" then some "toggle" else none + +private def handler (ctx : Ctx) : Handler := fun req => do + match req.path, req.method with + | "/", _ => return Response.html 200 pageHtml + | "/favicon.ico", _ => return { status := 204, headers := #[], body := .empty } + | "/api/state", "GET" => handleState ctx + | "/api/playbooks", "GET" => handlePlaybooks ctx + | "/api/playbooks/toggle", "POST" => handleToggle ctx req + | "/api/live", "GET" => handleLive ctx + | "/api/control", "POST" => handleControl ctx req + | _, _ => return Response.notFound + +/-! ## CLI -/ + +private structure Args where + port : UInt16 := 8040 + host : String := "0.0.0.0" + configPath : String := "" + storeDir : String := "" + +private partial def parseArgs (xs : List String) (a : Args) : Args := + match xs with + | "--port" :: v :: rest => parseArgs rest { a with port := (v.toNat?.getD 8040).toUInt16 } + | "--host" :: v :: rest => parseArgs rest { a with host := v } + | "--config" :: v :: rest => parseArgs rest { a with configPath := v } + | "--store" :: v :: rest => parseArgs rest { a with storeDir := v } + | _ :: rest => parseArgs rest a + | [] => a + +def serveMain (args : List String) : IO Unit := do + let mut a := parseArgs args {} + if let some p ← IO.getEnv "PORT" then + if let some n := p.toNat? then a := { a with port := n.toUInt16 } + if a.configPath.isEmpty then + IO.eprintln "usage: agent_dashboard_serve --config [--store DIR] [--port N] [--host H]" + IO.Process.exit 2 + let storeDir := + if a.storeDir.isEmpty then "examples/AgentDashboard/state" else a.storeDir + let fc ← loadFileConfig a.configPath + IO.eprintln s!"agent-dashboard: spawning {fc.orchestrator.servers.size} MCP server(s)…" + let orch ← LeanTea.Llm.McpOrchestrator.fromConfig fc.orchestrator + IO.eprintln s!"agent-dashboard: loaded {orch.openAiTools.size} tool(s)" + /- Restore stats from disk so a restart picks up where it left off. -/ + let stats0 ← loadAllStats storeDir + let now0 ← IO.monoMsNow + let state ← IO.mkRef ({ + current := "", observation := { screen := "(unknown)" }, + history := #[], stats := stats0, startedAtMs := now0, + cumReward := 0.0, burstId := "", burstCount := 0 + } : LiveState) + let cfg ← Config.mk' orch (jsObserver fc.observerJs) storeDir + defaultEscalator fc.tickMs + let ctx : Ctx := { cfg, state, storeDir, observerJs := fc.observerJs } + /- Spawn the conductor loop. -/ + let _ ← IO.asTask (runLoop cfg state) + IO.eprintln s!"agent-dashboard: conductor running; http://{a.host}:{a.port}/" + serveConcurrent a.port a.host (handler ctx) + +end AgentDashboard + +def main (args : List String) : IO Unit := AgentDashboard.serveMain args diff --git a/examples/AgentDashboard/dashboard-config-empty.json b/examples/AgentDashboard/dashboard-config-empty.json new file mode 100644 index 0000000..d37085a --- /dev/null +++ b/examples/AgentDashboard/dashboard-config-empty.json @@ -0,0 +1,8 @@ +{ + "model": "(unused)", + "baseUrl": "http://127.0.0.1:11211/v1", + "system": "(unused)", + "servers": [], + "observerJs": "", + "tickMs": 500 +} diff --git a/examples/AgentDashboard/dashboard-config.json b/examples/AgentDashboard/dashboard-config.json new file mode 100644 index 0000000..26d006b --- /dev/null +++ b/examples/AgentDashboard/dashboard-config.json @@ -0,0 +1,14 @@ +{ + "model": "(unused — the dashboard doesn't talk to an LLM)", + "baseUrl": "http://127.0.0.1:11211/v1", + "system": "(unused)", + "servers": [ + { + "name": "browser", + "bin": "./.lake/build/bin/browser_mcp_serve", + "args": [] + } + ], + "observerJs": "window.__screen || 'unknown'", + "tickMs": 500 +} diff --git a/examples/AgentDashboard/game/index.html b/examples/AgentDashboard/game/index.html new file mode 100644 index 0000000..d633961 --- /dev/null +++ b/examples/AgentDashboard/game/index.html @@ -0,0 +1,143 @@ + + + + +toy game — agent target + + + +
+

toy game

+ hub +
+
+
coins
0
+
energy
5
+
+
+
runs
0
+
+
+ + + +
+
+ + + diff --git a/examples/AgentDashboard/playbooks/big_quest.json b/examples/AgentDashboard/playbooks/big_quest.json new file mode 100644 index 0000000..de73040 --- /dev/null +++ b/examples/AgentDashboard/playbooks/big_quest.json @@ -0,0 +1,21 @@ +{ + "id": "big_quest", + "name": "big quest", + "description": "Click Big Quest. +10 reward, costs 3 energy, blocks for 1.5s on the battle screen.", + "pre": {"whenScreen": "hub"}, + "estReward": 10.0, + "maxBurst": 3, + "timeoutMs": 6000, + "enabled": true, + "script": { + "name": "big_quest", + "description": "click big quest", + "steps": [ + {"act": "click_xy", "x": 240, "y": 256, "note": "Big Quest button"}, + {"act": "wait", "ms": 1700, "note": "wait for battle screen to clear"}, + {"act": "tool_call", "tool": "browser__browser_evaluate", + "args": {"expression": "({reward: 10})"}, + "note": "report 10 coins"} + ] + } +} diff --git a/examples/AgentDashboard/playbooks/coin.json b/examples/AgentDashboard/playbooks/coin.json new file mode 100644 index 0000000..1b9d7b8 --- /dev/null +++ b/examples/AgentDashboard/playbooks/coin.json @@ -0,0 +1,21 @@ +{ + "id": "coin", + "name": "collect coin", + "description": "Click the Collect Coin button. Cheap, always available when energy>0.", + "pre": {"whenScreen": "hub"}, + "estReward": 1.0, + "maxBurst": 5, + "timeoutMs": 5000, + "enabled": true, + "script": { + "name": "coin", + "description": "click coin", + "steps": [ + {"act": "click_xy", "x": 80, "y": 256, "note": "Collect Coin button"}, + {"act": "wait", "ms": 300}, + {"act": "tool_call", "tool": "browser__browser_evaluate", + "args": {"expression": "({reward: 1})"}, + "note": "report 1 coin"} + ] + } +} diff --git a/examples/AgentDashboard/playbooks/rest.json b/examples/AgentDashboard/playbooks/rest.json new file mode 100644 index 0000000..c87c7aa --- /dev/null +++ b/examples/AgentDashboard/playbooks/rest.json @@ -0,0 +1,21 @@ +{ + "id": "rest", + "name": "rest (refill energy)", + "description": "Click Rest when no quest is doable. No reward, but unblocks the others.", + "pre": {"whenScreen": "hub"}, + "estReward": 0.1, + "maxBurst": 1, + "timeoutMs": 3000, + "enabled": true, + "script": { + "name": "rest", + "description": "click rest", + "steps": [ + {"act": "click_xy", "x": 400, "y": 256, "note": "Rest button"}, + {"act": "wait", "ms": 200}, + {"act": "tool_call", "tool": "browser__browser_evaluate", + "args": {"expression": "({reward: 0.1})"}, + "note": "minor reward so bandit doesn't shun rest entirely"} + ] + } +} diff --git a/examples/AgentDashboard/playbooks/wait_battle.json b/examples/AgentDashboard/playbooks/wait_battle.json new file mode 100644 index 0000000..1be031c --- /dev/null +++ b/examples/AgentDashboard/playbooks/wait_battle.json @@ -0,0 +1,20 @@ +{ + "id": "wait_battle", + "name": "wait out battle", + "description": "When the toy game is on the battle screen, just wait — clicks don't do anything until it returns to hub.", + "pre": {"whenScreen": "battle"}, + "estReward": 0.0, + "maxBurst": 0, + "timeoutMs": 3000, + "enabled": true, + "script": { + "name": "wait_battle", + "description": "sleep through the battle animation", + "steps": [ + {"act": "wait", "ms": 1500, "note": "battle screen auto-clears after 1.5s"}, + {"act": "tool_call", "tool": "browser__browser_evaluate", + "args": {"expression": "({reward: 0})"}, + "note": "no-op reward — but successful run"} + ] + } +} diff --git a/examples/CoderMcp/Serve.lean b/examples/CoderMcp/Serve.lean new file mode 100644 index 0000000..b3876c8 --- /dev/null +++ b/examples/CoderMcp/Serve.lean @@ -0,0 +1,397 @@ +import LeanTea +import Lean.Data.Json + +/-! # coder_mcp_serve — MCP server for code-editing agents + +Seven tools, all workspace-bound via `LeanTea.Net.SafePath`: + +| tool | what it does | typical policy | +|---|---|---| +| `coder_read_file` | read one file in the workspace | allow | +| `coder_list_dir` | one-level listing of a directory | allow | +| `coder_glob` | recursive suffix match (e.g. `.lean`) | allow | +| `coder_grep` | shell out to `grep -rn` | allow | +| `coder_write_file` | write the whole file contents | **ask** | +| `coder_edit_file` | exact search-and-replace | **ask** | +| `coder_run` | `sh -c CMD` inside the workspace cwd | **ask** | + +Pair with `LeanTea.Llm.Policy` in the chat app: allow the read-only +tools globally, leave the mutating ones (`write_file`, `edit_file`, +`run`) to be ask'd. That gives a Claude-Code-style flow where the +LLM proposes, the human approves. + +### Why `edit_file` requires unique-match + +`coder_write_file` rewrites the file from scratch — fine for new +files, blunt for tiny edits. `coder_edit_file` is a search-and- +replace with **exactly-one** match enforced: if the search string +appears zero times the call errors (model has the wrong spelling), +and if it appears more than once the call errors (model needs to +add surrounding context to disambiguate). This is the same +invariant Claude Code's Edit tool uses and the failure modes are +easy for the LLM to recover from. + +### Workspace boundary + +Every path argument is resolved through `SafePath.under workspace`. +Absolute paths outside the workspace are rejected before any IO +happens. `..` segments are normalised. NUL bytes in paths are +rejected. The `coder_run` tool sets `cwd := workspace` and `env := +[]` overrides — it can still shell out to `cd /etc; cat shadow` +because we don't sandbox `/bin/sh` here, but that's exactly the +class of thing the policy `ask` gate is for. -/ + +open LeanTea LeanTea.Net.Http LeanTea.Net.Server +open Lean (Json) + +namespace CoderMcp + +open LeanTea.Mcp (jsonOk jsonErr textContent errContent + argSchema toolDef defaultInitializeResult) + +/-! ## Argument helpers (mirror TmuxMcp / GeminiMcp) -/ + +private def getStr (args : Json) (k : String) : Except String String := + match args.getObjVal? k with + | .ok v => v.getStr? + | .error e => .error e + +private def getStrOpt (args : Json) (k : String) (default : String := "") : String := + match args.getObjVal? k with + | .ok v => match v.getStr? with + | .ok s => s + | _ => default + | _ => default + +/-! ## Workspace state -/ + +private initialize wsRef : IO.Ref String ← IO.mkRef "" + +private def getWs : IO String := do + let ws ← wsRef.get + if ws.isEmpty then + throw <| IO.userError "coder-mcp: --workspace not set; refusing to operate" + else + return ws + +/-! ## SafePath wrapper + +Every tool that takes a path runs it through `SafePath.under ws` +and returns the resolved absolute path. Pull the path arg out, +resolve it, and either dispatch the tool with the safe path or +surface a clear error block. -/ + +private def withSafePath + (args : Json) (key : String) + (k : String → IO Json) : IO Json := do + match getStr args key with + | .error e => return errContent s!"{key}: {e}" + | .ok raw => + let ws ← getWs + match LeanTea.Net.SafePath.under ws raw with + | .error msg => return errContent s!"{key}: {msg}" + | .ok p => k p.value + +/-! ## Output-size caps + +LLMs can't reason over a 200 KB read result and the next round's +prompt blows the context window. Cap at 64 KB / 800 lines per +read tool. The caller can ask for more via a follow-up read of a +narrower slice — that's intentional pressure to keep prompts small. -/ + +private def maxReadBytes : Nat := 64 * 1024 +private def maxLines : Nat := 800 + +private def truncateBody (body : String) : String × Bool := + let lines := body.splitOn "\n" + if lines.length > maxLines then + let head := lines.take maxLines + (String.intercalate "\n" head, true) + else if body.utf8ByteSize > maxReadBytes then + /- Split on bytes, not chars — UTF-8 safe? we use `take` on + String which is char-based; close enough for size-bound. -/ + ((body.take maxReadBytes).toString, true) + else + (body, false) + +/-! ## Tool implementations -/ + +private def readFile (path : String) : IO Json := do + try + let body ← IO.FS.readFile path + let (text, truncated) := truncateBody body + let suffix := + if truncated then + s!"\n…[output truncated; original was {body.utf8ByteSize} bytes / \ +{body.splitOn "\n" |>.length} lines]" + else "" + return textContent (text ++ suffix) + catch e => + return errContent s!"read failed: {e}" + +private def writeFile (path : String) (body : String) : IO Json := do + try + /- Create parent dir if missing — typical agent workflow is to + generate new test / scaffold files in subdirs that don't yet + exist. -/ + let dir := (System.FilePath.mk path).parent + if let some d := dir then + unless ← System.FilePath.pathExists d.toString do + IO.FS.createDirAll d.toString + IO.FS.writeFile path body + return textContent s!"wrote {body.utf8ByteSize} bytes to {path}" + catch e => + return errContent s!"write failed: {e}" + +/-- Count occurrences of `needle` in `hay`. `splitOn` produces N+1 + pieces for N matches, so `length - 1` is the count. -/ +private def countOccurrences (hay needle : String) : Nat := + if needle.isEmpty then 0 + else (hay.splitOn needle).length - 1 + +private def editFile (path search replace : String) : IO Json := do + if search.isEmpty then + return errContent "edit_file: `search` must not be empty" + try + let body ← IO.FS.readFile path + let n := countOccurrences body search + if n == 0 then + return errContent s!"edit_file: `search` not found in {path}" + if n > 1 then + return errContent s!"edit_file: `search` matches {n} places in {path} — \ +add surrounding context so it's unique" + let updated := body.replace search replace + IO.FS.writeFile path updated + return textContent s!"edited {path} (1 replacement)" + catch e => + return errContent s!"edit failed: {e}" + +private def listDir (path : String) : IO Json := do + try + let entries ← System.FilePath.readDir path + let lines := entries.map fun e => + let nm := e.fileName + /- We can't cheaply tell directory from file without an extra + stat per entry. Keep it lean — the agent can call us back + on any entry. -/ + s!" {nm}" + let header := s!"{entries.size} entry(ies) in {path}:" + return textContent (header ++ "\n" ++ String.intercalate "\n" lines.toList) + catch e => + return errContent s!"list failed: {e}" + +/-- Recursive directory walk filtering by filename suffix. We + deliberately do **not** support full glob (`?`, `[a-z]`, `**`) + because the typical agent query is just `.py` / `*.lean` + suffix-match. Skips `.git`, `node_modules`, and `.lake` — the + "I never want these" set for code search. -/ +private partial def globSuffix (root : String) (suffix : String) (cap : Nat := 500) + : IO (Array String) := do + let acc ← IO.mkRef (#[] : Array String) + let rec walk (dir : String) : IO Unit := do + let cur ← acc.get + if cur.size ≥ cap then return () + let entries ← (try System.FilePath.readDir dir catch _ => pure #[]) + for e in entries do + let cur ← acc.get + if cur.size ≥ cap then break + let nm := e.fileName + if nm == ".git" || nm == "node_modules" || nm == ".lake" then continue + let p := e.path.toString + let isDir ← (try (System.FilePath.mk p).isDir catch _ => pure false) + if isDir then + walk p + else if nm.endsWith suffix then + acc.modify (·.push p) + walk root + acc.get + +private def glob (suffix : String) : IO Json := do + let ws ← getWs + let files ← globSuffix ws suffix + let capNote := + if files.size ≥ 500 then "\n[truncated at 500 matches]" + else "" + return textContent <| + s!"{files.size} match(es) for *{suffix}:\n" ++ + String.intercalate "\n" files.toList ++ capNote + +private def grep (pattern : String) (pathFilter : String) : IO Json := do + let ws ← getWs + /- We shell out to `grep -rn` because its output format (`file:line:text`) + is exactly what LLMs are trained on. Add `--include` when the caller + gives a path glob. -/ + let mut args : Array String := #["-rnI", "--color=never", "--exclude-dir=.git", + "--exclude-dir=node_modules", "--exclude-dir=.lake"] + unless pathFilter.isEmpty do args := args ++ #["--include", pathFilter] + args := args ++ #[pattern, ws] + let out ← (try IO.Process.output { cmd := "grep", args } + catch e => pure { exitCode := 1, stdout := "", stderr := s!"{e}" }) + if out.exitCode ≥ 2 then + return errContent s!"grep failed: {out.stderr}" + let stdout := out.stdout + let lines := stdout.splitOn "\n" + let capped := + if lines.length > maxLines then + String.intercalate "\n" (lines.take maxLines) ++ + s!"\n[truncated at {maxLines} lines; got {lines.length}]" + else stdout + let body := + if capped.isEmpty then s!"(no match for {pattern})" + else capped + return textContent body + +private def run (cmd : String) : IO Json := do + let ws ← getWs + if cmd.isEmpty then + return errContent "run: empty command" + let out ← (try IO.Process.output { + cmd := "sh", args := #["-c", cmd], cwd := some ws + } catch e => pure { exitCode := 127, stdout := "", stderr := s!"{e}" }) + /- Combine stdout + stderr with markers — LLMs need both. -/ + let stdoutPart := + if out.stdout.isEmpty then "" else s!"--- stdout ---\n{out.stdout}" + let stderrPart := + if out.stderr.isEmpty then "" else s!"--- stderr ---\n{out.stderr}" + let exitLine := s!"--- exit {out.exitCode} ---" + let joined := String.intercalate "\n" <| + (#[stdoutPart, stderrPart, exitLine].filter (!·.isEmpty)).toList + let (text, truncated) := truncateBody joined + let suffix := if truncated then "\n…[output truncated]" else "" + if out.exitCode == 0 then + return textContent (text ++ suffix) + else + return errContent (text ++ suffix) + +/-! ## Tool catalogue -/ + +def toolsList : Json := + Json.mkObj [ + ("tools", Json.arr #[ + toolDef "coder_read_file" + "Read one file inside the workspace. Returns up to 64 KB / 800 lines." + #[ argSchema "path" "string" "workspace-relative path" ] + #["path"], + toolDef "coder_list_dir" + "List entries in a directory (one level)." + #[ argSchema "path" "string" "workspace-relative directory" ] + #["path"], + toolDef "coder_glob" + ("Recursive suffix match across the workspace. Pass `.py`, " + ++ "`.lean`, etc. Skips .git / node_modules / .lake.") + #[ argSchema "suffix" "string" "filename suffix (typically `.`)" ] + #["suffix"], + toolDef "coder_grep" + ("`grep -rnI` over the workspace. Optional `pathFilter` " + ++ "narrows by --include (e.g. `*.go`).") + #[ argSchema "pattern" "string" "regex (basic POSIX)", + argSchema "pathFilter" "string" "(optional) glob like `*.lean`" ] + #["pattern"], + toolDef "coder_write_file" + ("Overwrite a file with the given contents. Creates parent " + ++ "directories if missing.") + #[ argSchema "path" "string" "workspace-relative path", + argSchema "content" "string" "full file contents" ] + #["path", "content"], + toolDef "coder_edit_file" + ("Exact search-and-replace inside a file. Fails if `search` " + ++ "isn't present or matches more than once — add surrounding " + ++ "context to disambiguate.") + #[ argSchema "path" "string" "workspace-relative path", + argSchema "search" "string" "exact text to find (one match required)", + argSchema "replace" "string" "replacement text" ] + #["path", "search", "replace"], + toolDef "coder_run" + ("`sh -c CMD` in the workspace cwd. Returns stdout / stderr / " + ++ "exit code. Use this for tests, build, git, etc.") + #[ argSchema "cmd" "string" "shell command (e.g. `lake build`)" ] + #["cmd"] + ]) + ] + +def initializeResult : Json := defaultInitializeResult "lean-elm-coder-mcp" + +/-! ## Dispatch -/ + +def callTool (name : String) (args : Json) : IO Json := do + try + match name with + | "coder_read_file" => + withSafePath args "path" readFile + | "coder_list_dir" => + withSafePath args "path" listDir + | "coder_glob" => + match getStr args "suffix" with + | .error e => return errContent s!"suffix: {e}" + | .ok suf => glob suf + | "coder_grep" => + match getStr args "pattern" with + | .error e => return errContent s!"pattern: {e}" + | .ok pat => + let pf := getStrOpt args "pathFilter" + grep pat pf + | "coder_write_file" => + match getStr args "content" with + | .error e => return errContent s!"content: {e}" + | .ok body => + withSafePath args "path" (fun p => writeFile p body) + | "coder_edit_file" => + match getStr args "search", getStr args "replace" with + | .error e, _ => return errContent s!"search: {e}" + | _, .error e => return errContent s!"replace: {e}" + | .ok srch, .ok repl => + withSafePath args "path" (fun p => editFile p srch repl) + | "coder_run" => + match getStr args "cmd" with + | .error e => return errContent s!"cmd: {e}" + | .ok cmd => run cmd + | _ => return errContent s!"unknown tool: {name}" + catch e => + return errContent s!"{name}: {e}" + +/-! ## Transport -/ + +private structure Args where + mode : String := "stdio" + port : UInt16 := 8021 + host : String := "0.0.0.0" + workspace : String := "" + +private partial def parseArgs (xs : List String) (a : Args) : Args := + match xs with + | "--stdio" :: rest => parseArgs rest { a with mode := "stdio" } + | "--http" :: rest => parseArgs rest { a with mode := "http" } + | "--port" :: v :: rest => + parseArgs rest { a with mode := "http", port := (v.toNat?.getD 8021).toUInt16 } + | "--host" :: v :: rest => parseArgs rest { a with host := v } + | "--workspace" :: v :: rest => parseArgs rest { a with workspace := v } + | _ :: rest => parseArgs rest a + | [] => a + +def serveMain (args : List String) : IO Unit := do + let mut a := parseArgs args {} + if let some p ← IO.getEnv "PORT" then + if let some n := p.toNat? then a := { a with mode := "http", port := n.toUInt16 } + if let some w ← IO.getEnv "CODER_MCP_WORKSPACE" then + if a.workspace.isEmpty then a := { a with workspace := w } + if a.workspace.isEmpty then + IO.eprintln "coder-mcp: --workspace is required (or set CODER_MCP_WORKSPACE)" + IO.Process.exit 2 + wsRef.set a.workspace + IO.eprintln s!"coder-mcp: workspace = {a.workspace}" + let mcpHandler : LeanTea.Mcp.Handler := { + initializeResult := initializeResult, + toolsList := toolsList, + callTool := callTool + } + match a.mode with + | "http" => + IO.eprintln s!"coder-mcp: POST http://{a.host}:{a.port}/mcp" + mcpHandler.serveHttp a.port a.host + | _ => + IO.eprintln "coder-mcp: stdio mode" + mcpHandler.serveStdio + +end CoderMcp + +def main (args : List String) : IO Unit := CoderMcp.serveMain args diff --git a/examples/GeminiMcp/Serve.lean b/examples/GeminiMcp/Serve.lean new file mode 100644 index 0000000..2e33cab --- /dev/null +++ b/examples/GeminiMcp/Serve.lean @@ -0,0 +1,374 @@ +import LeanTea +import Lean.Data.Json + +/-! # gemini_mcp_serve — MCP server fronting the Google Gemini API + +Same shape as the other LeanTEA MCP servers (`tmux_mcp_serve`, +`chrome_cdp_mcp_serve`, …): one binary, stdio + HTTP transports, +tools named with a common `gemini_` prefix. + +The point of going through MCP rather than calling Gemini directly +from each agent is twofold: + +1. **One key, one boundary.** The `GEMINI_API_KEY` lives in this + process only. Clients (Claude / Cursor / any MCP runner) see + only a stable tool catalogue. +2. **Workspace-bound file access.** `gemini_review_files` and + `gemini_review_diff` always go through `LeanTea.Net.SafePath` + against a `--workspace DIR` root, so a buggy or adversarial + LLM client can't make the MCP read `/etc/shadow` and ship it + to Google. + +### Why these five tools + +`gemini_ask` is the one-shot prompt. `gemini_chat` is the +multi-turn variant; the history is provided as a JSON array each +call (MCP servers are stateless by spec — keeping turn history on +the server would break that). `gemini_review_files` is the +flagship: it bundles many files into one prompt to exploit Pro's +2M-token context for cross-file architectural review. +`gemini_review_diff` is the small-bites variant — quick PR review +without bundling the whole codebase. `gemini_list_models` returns +the supported model catalogue so clients can advertise the choices +without hard-coding. + +### Model selection + +Default model is `gemini-2.5-pro` (per `LeanTea.Cloud.Gemini`). +Every tool accepts an optional `model` argument so callers can +pick `gemini-2.5-flash` for speed or `gemini-2.5-flash-lite` for +cost. Override at server level via `GEMINI_MODEL` env or +`--model NAME` flag. + +``` +gemini_mcp_serve --port 8020 --workspace /path/to/repo +gemini_mcp_serve --stdio --workspace /path/to/repo --model gemini-2.5-flash +``` + +stdio mode is what MCP-Lite / Claude Desktop / Cursor speak; +`--port` is for plain `curl` testing. -/ + +open LeanTea LeanTea.Net.Http LeanTea.Net.Server +open Lean (Json) + +namespace GeminiMcp + +open LeanTea.Mcp (jsonOk jsonErr textContent errContent + argSchema toolDef defaultInitializeResult) + +/-! ## Argument extraction helpers (mirrors TmuxMcp) -/ + +private def getStr (args : Json) (k : String) : Except String String := + match args.getObjVal? k with + | .ok v => v.getStr? + | .error e => .error e + +private def getStrOpt (args : Json) (k : String) (default : String := "") : String := + match args.getObjVal? k with + | .ok v => match v.getStr? with + | .ok s => s + | _ => default + | _ => default + +private def getNatOpt (args : Json) (k : String) (default : Nat := 0) : Nat := + match args.getObjVal? k with + | .ok (.num n) => n.mantissa.toNat + | _ => default + +private def getFloatOpt (args : Json) (k : String) : Option Float := + match args.getObjVal? k with + | .ok (.num n) => some n.toFloat + | _ => none + +/-- Pull a JSON array of strings; non-string entries are skipped. -/ +private def getStrArr (args : Json) (k : String) : Array String := + match args.getObjVal? k with + | .ok (.arr a) => + a.filterMap fun j => match j.getStr? with + | .ok s => some s + | .error _ => none + | _ => #[] + +/-! ## Server-side state — the Gemini Config + workspace root. + +The Config carries `apiKey` (which we never want to log), so it +goes through a top-level `IO.Ref` rather than being threaded +through `callTool`. -/ + +private initialize cfgRef : IO.Ref (Option LeanTea.Cloud.Gemini.Config) ← + IO.mkRef none + +private initialize workspaceRef : IO.Ref String ← IO.mkRef "" + +private def getCfg : IO LeanTea.Cloud.Gemini.Config := do + match ← cfgRef.get with + | some c => return c + | none => + throw <| IO.userError + "gemini_mcp: Config not initialised (set GEMINI_API_KEY)" + +/-- Pull a `CallOpts` from the JSON args: `model` / `system` / + `temperature` / `maxTokens`. All optional. -/ +private def parseOpts (args : Json) : LeanTea.Cloud.Gemini.CallOpts := + let model := getStrOpt args "model" + let system := getStrOpt args "system" + let temperature := getFloatOpt args "temperature" + let maxTokens := getNatOpt args "maxTokens" + { + model := if model.isEmpty then none else some model, + system := if system.isEmpty then none else some system, + temperature := temperature, + maxOutputTokens := if maxTokens == 0 then none else some maxTokens + } + +/-- Common reply formatter: surface text + usage + finish reason + + resolved model name, separated by markers so callers can + parse them out if they want. -/ +private def renderResponse (r : LeanTea.Cloud.Gemini.Response) + (extra : String := "") : Json := + let header := + s!"[model={r.model} tokens in={r.usage.input} out={r.usage.output} \ +finish={r.finishReason}]" + let extraLine := if extra.isEmpty then "" else extra ++ "\n" + textContent (header ++ "\n" ++ extraLine ++ r.text) + +/-! ## Tool catalogue -/ + +def toolsList : Json := + Json.mkObj [ + ("tools", Json.arr #[ + toolDef "gemini_ask" + ("Single-turn Gemini prompt. Returns the model's text reply " + ++ "prefixed with `[model=… tokens in=… out=… finish=…]` so the " + ++ "caller can track cost. Default model `gemini-2.5-pro`; " + ++ "override with `model` (try `gemini-2.5-flash` for speed).") + #[ argSchema "prompt" "string" "the user prompt", + argSchema "model" "string" "(optional) override default model", + argSchema "system" "string" "(optional) system prompt", + argSchema "temperature" "number" "(optional) 0.0=deterministic", + argSchema "maxTokens" "number" "(optional) maxOutputTokens cap" ] + #["prompt"], + toolDef "gemini_review_files" + ("Bundle multiple workspace-relative files into a Markdown " + ++ "prompt and ask Gemini for a holistic review. Exploits " + ++ "Pro's 2M-token context; pair with `model=gemini-2.5-flash` " + ++ "for cheaper passes. Paths are validated via `SafePath` " + ++ "against the server's `--workspace` — `..` / NUL escape " + ++ "attempts are rejected. The response is prefixed with the " + ++ "usual `[model=… tokens…]` header followed by `Files: N " + ++ "(M bytes)` then the review prose.") + #[ argSchema "paths" "array" "workspace-relative paths to bundle", + argSchema "prompt" "string" "(optional) custom review prompt; default = holistic review", + argSchema "model" "string" "(optional) override default model", + argSchema "system" "string" "(optional) custom system prompt", + argSchema "temperature" "number" "(optional)", + argSchema "maxTokens" "number" "(optional)" ] + #["paths"], + toolDef "gemini_review_diff" + ("Run `git diff BASE HEAD` inside the workspace, pipe the diff " + ++ "to Gemini, return a focused review (intent + defect " + ++ "candidates + suggestions + approve/changes/nit verdict).") + #[ argSchema "base" "string" "git base ref", + argSchema "head" "string" "git head ref", + argSchema "prompt" "string" "(optional) custom review prompt", + argSchema "model" "string" "(optional) override default model", + argSchema "system" "string" "(optional) custom system prompt", + argSchema "temperature" "number" "(optional)", + argSchema "maxTokens" "number" "(optional)" ] + #["base", "head"], + toolDef "gemini_chat" + ("Multi-turn chat. `history` is a JSON array of " + ++ "`{role: \"user\"|\"model\", text: \"...\"}` objects " + ++ "alternating user/model turns; `message` is the new user " + ++ "input. MCP servers are stateless so callers must keep " + ++ "history themselves between calls.") + #[ argSchema "message" "string" "new user message", + argSchema "history" "array" "prior turns (alternating user/model)", + argSchema "model" "string" "(optional) override default model", + argSchema "system" "string" "(optional) system prompt", + argSchema "temperature" "number" "(optional)", + argSchema "maxTokens" "number" "(optional)" ] + #["message"], + toolDef "gemini_list_models" + ("Return the catalogue of supported models with rough use-case " + ++ "hints and context-window sizes. Pricing is informational " + ++ "only — check ai.google.dev for current rates.") + #[] #[] + ]) + ] + +def initializeResult : Json := defaultInitializeResult "lean-elm-gemini-mcp" + +/-! ## Static model catalogue -/ + +private def modelsCatalogue : Json := + let mkRow (id desc : String) (ctxK : Nat) : Json := + Json.mkObj [ + ("id", Json.str id), + ("desc", Json.str desc), + ("contextK", Json.num (Int.ofNat ctxK)) + ] + Json.mkObj [ + ("default", Json.str "gemini-2.5-pro"), + ("models", Json.arr #[ + mkRow "gemini-2.5-pro" + "heavy code / architecture review; 2M context" + 2097, + mkRow "gemini-2.5-flash" + "fast + cheap general-purpose; 1M context" + 1048, + mkRow "gemini-2.5-flash-lite" + "smallest + cheapest, summarisation / lint; 1M context" + 1048 + ]) + ] + +/-! ## Tool dispatch -/ + +def callTool (name : String) (args : Json) : IO Json := do + try + match name with + | "gemini_list_models" => + return textContent modelsCatalogue.pretty + | "gemini_ask" => + match getStr args "prompt" with + | .error e => return errContent s!"prompt: {e}" + | .ok prompt => + let cfg ← getCfg + let opts := parseOpts args + let r ← LeanTea.Cloud.Gemini.ask cfg prompt opts + return renderResponse r + | "gemini_chat" => + match getStr args "message" with + | .error e => return errContent s!"message: {e}" + | .ok message => + let cfg ← getCfg + let opts := parseOpts args + let hist : List LeanTea.Cloud.Gemini.Message := + match args.getObjVal? "history" with + | .ok (.arr a) => + (a.toList.filterMap fun j => + match j.getObjVal? "role", j.getObjVal? "text" with + | .ok jr, .ok jt => + match jr.getStr?, jt.getStr? with + | .ok role, .ok text => some ({ role, text } : LeanTea.Cloud.Gemini.Message) + | _, _ => none + | _, _ => none) + | _ => [] + let r ← LeanTea.Cloud.Gemini.chat cfg hist message opts + return renderResponse r + | "gemini_review_files" => + let paths := getStrArr args "paths" + if paths.isEmpty then + return errContent "paths: empty (need at least one workspace-relative path)" + else + let cfg ← getCfg + let ws ← workspaceRef.get + if ws.isEmpty then + return errContent "gemini_mcp: --workspace not set; can't safely read files" + else + let prompt := getStrOpt args "prompt" + let opts := parseOpts args + /- Pre-check SafePath: if *every* path is rejected we'd + otherwise spend a Gemini call on an empty bundle. -/ + let firstOk := paths.findSome? fun p => + match LeanTea.Net.SafePath.under ws p with + | .ok _ => some p + | .error _ => none + if firstOk.isNone then + let reasons := String.intercalate "\n " <| paths.toList.map fun p => + match LeanTea.Net.SafePath.under ws p with + | .error msg => s!"{p}: {msg}" + | .ok _ => s!"{p}: (ok but somehow filtered)" + return errContent s!"gemini_review_files: all paths rejected\n {reasons}" + let (r, stats) ← LeanTea.Cloud.Gemini.reviewMany cfg ws paths prompt opts + let skipNote := + if stats.skipped.isEmpty then "" + else + "Skipped:\n" ++ String.intercalate "\n" + (stats.skipped.map fun (p, reason) => s!" - {p}: {reason}") + let extra := s!"Files: {stats.fileCount} ({stats.totalBytes} bytes)" + ++ (if skipNote.isEmpty then "" else "\n" ++ skipNote) + return renderResponse r extra + | "gemini_review_diff" => + match getStr args "base", getStr args "head" with + | .error e, _ => return errContent s!"base: {e}" + | _, .error e => return errContent s!"head: {e}" + | .ok base, .ok head => + let cfg ← getCfg + let ws ← workspaceRef.get + if ws.isEmpty then + return errContent "gemini_mcp: --workspace not set; can't run git diff" + else + let prompt := getStrOpt args "prompt" + let opts := parseOpts args + let r ← LeanTea.Cloud.Gemini.reviewDiff cfg ws base head prompt opts + return renderResponse r + | _ => return errContent s!"unknown tool: {name}" + catch e => + return errContent s!"{name}: {e}" + +/-! ## CLI argument parsing -/ + +private structure Args where + mode : String := "stdio" + port : UInt16 := 8020 + host : String := "0.0.0.0" + workspace : String := "" + model : String := "" + +private partial def parseArgs (xs : List String) (a : Args) : Args := + match xs with + | "--stdio" :: rest => parseArgs rest { a with mode := "stdio" } + | "--http" :: rest => parseArgs rest { a with mode := "http" } + | "--port" :: v :: rest => + parseArgs rest { a with mode := "http", port := (v.toNat?.getD 8020).toUInt16 } + | "--host" :: v :: rest => parseArgs rest { a with host := v } + | "--workspace" :: v :: rest => parseArgs rest { a with workspace := v } + | "--model" :: v :: rest => parseArgs rest { a with model := v } + | _ :: rest => parseArgs rest a + | [] => a + +def serveMain (args : List String) : IO Unit := do + let mut a := parseArgs args {} + if let some p ← IO.getEnv "PORT" then + if let some n := p.toNat? then a := { a with mode := "http", port := n.toUInt16 } + if let some w ← IO.getEnv "GEMINI_MCP_WORKSPACE" then + if a.workspace.isEmpty then a := { a with workspace := w } + if let some m ← IO.getEnv "GEMINI_MODEL" then + if a.model.isEmpty then a := { a with model := m } + /- Initialise the global config from env + CLI overrides. We still + return successfully without an API key — the tools just fail + with a clear "Config not initialised" message when called. + This keeps the catalogue (`tools/list`, `gemini_list_models`) + reachable for clients that just want to discover capabilities. -/ + match ← LeanTea.Cloud.Gemini.Config.fromEnv? with + | none => + IO.eprintln "gemini-mcp: GEMINI_API_KEY not set — \ +tools/list works but gemini_ask / chat / review will error until you set it" + | some baseCfg => + let cfg := if a.model.isEmpty then baseCfg else { baseCfg with model := a.model } + cfgRef.set (some cfg) + IO.eprintln s!"gemini-mcp: ready (default model = {cfg.model})" + workspaceRef.set a.workspace + if a.workspace.isEmpty then + IO.eprintln "gemini-mcp: --workspace not set — \ +gemini_review_files and gemini_review_diff will refuse to run" + else + IO.eprintln s!"gemini-mcp: workspace = {a.workspace}" + let mcpHandler : LeanTea.Mcp.Handler := { + initializeResult := initializeResult, + toolsList := toolsList, + callTool := callTool + } + match a.mode with + | "http" => + IO.eprintln s!"gemini-mcp: POST http://{a.host}:{a.port}/mcp" + mcpHandler.serveHttp a.port a.host + | _ => + IO.eprintln "gemini-mcp: stdio mode" + mcpHandler.serveStdio + +end GeminiMcp + +def main (args : List String) : IO Unit := GeminiMcp.serveMain args diff --git a/examples/LlmChat/coder-config.json b/examples/LlmChat/coder-config.json new file mode 100644 index 0000000..29ed32c --- /dev/null +++ b/examples/LlmChat/coder-config.json @@ -0,0 +1,12 @@ +{ + "model": "qwen/qwen2.5-coder-32b-instruct", + "baseUrl": "http://127.0.0.1:11211/v1", + "system": "You are a senior software engineer working in a Lean 4 + Elm-style codebase. Use the `coder_*` tools to read, search, and edit files. Always read before editing. Prefer `coder_edit_file` (exact search-and-replace) for surgical changes over `coder_write_file` (full rewrite). Use `coder_grep` first when looking for symbols. Run `lake build` after non-trivial edits and fix any errors before claiming done. Reply in the user's language.", + "servers": [ + { + "name": "coder", + "bin": "./.lake/build/bin/coder_mcp_serve", + "args": ["--workspace", "."] + } + ] +} diff --git a/examples/LlmChat/coder-policy.json b/examples/LlmChat/coder-policy.json new file mode 100644 index 0000000..ea2cfed --- /dev/null +++ b/examples/LlmChat/coder-policy.json @@ -0,0 +1,8 @@ +{ + "rules": [ + {"pattern": "coder__coder_read_file", "action": "allow"}, + {"pattern": "coder__coder_list_dir", "action": "allow"}, + {"pattern": "coder__coder_glob", "action": "allow"}, + {"pattern": "coder__coder_grep", "action": "allow"} + ] +} diff --git a/examples/LlmChat/example-config.json b/examples/LlmChat/example-config.json new file mode 100644 index 0000000..95411ac --- /dev/null +++ b/examples/LlmChat/example-config.json @@ -0,0 +1,17 @@ +{ + "model": "google/gemma-4-e4b", + "baseUrl": "http://127.0.0.1:11211/v1", + "system": "You are a helpful assistant with access to MCP tools. When the user asks you to do something, decide whether a tool call is needed before calling one. Reply in the user's language. Keep replies concise.", + "servers": [ + { + "name": "chrome", + "bin": "./.lake/build/bin/chrome_cdp_mcp_serve", + "args": [] + }, + { + "name": "gemini", + "bin": "./.lake/build/bin/gemini_mcp_serve", + "args": ["--workspace", "."] + } + ] +} diff --git a/examples/LlmChatCli/Main.lean b/examples/LlmChatCli/Main.lean new file mode 100644 index 0000000..49f93e7 --- /dev/null +++ b/examples/LlmChatCli/Main.lean @@ -0,0 +1,239 @@ +import LeanTea +import Lean.Data.Json + +/-! # llm_chat_cli — thinnest possible REPL on top of `McpOrchestrator` + +``` +llm_chat_cli --config llm-chat.json +> open github.com and tell me the page title +[chrome::chrome_navigate]({"url":"https://github.com"}) +[chrome::chrome_get_text]({}) +The GitHub homepage shows the headline "Let's build from here". +> ^D +``` + +ANSI colors on stderr-style progress lines; the final assistant +text goes to stdout so the binary composes with other CLI tools +(`llm_chat_cli ... < script.txt`). -/ + +open Lean (Json) +open LeanTea.Llm.McpOrchestrator + +namespace LlmChatCli + +private def cYellow (s : String) : String := s!"\x1b[33m{s}\x1b[0m" +private def cGrey (s : String) : String := s!"\x1b[2m{s}\x1b[0m" +private def cCyan (s : String) : String := s!"\x1b[36m{s}\x1b[0m" +private def cRed (s : String) : String := s!"\x1b[31m{s}\x1b[0m" +private def cBold (s : String) : String := s!"\x1b[1m{s}\x1b[0m" + +private def truncate (s : String) (n : Nat) : String := + if s.length ≤ n then s + else (s.take n).toString ++ "…" + +/-- Interactive prompt: ask y/n/a/d for a tool call. Loops until a + valid key is given. Reads from stdin. -/ +private def askDecision (name : String) (args : Json) : IO UserDecision := do + let stdin ← IO.getStdin + let stdout ← IO.getStdout + let argsPreview := truncate args.compress 200 + stdout.putStrLn (cYellow s!" ⚠ tool call needs approval: {name}({argsPreview})") + let mut decided : Option UserDecision := none + while decided.isNone do + stdout.putStr (cBold " [y=allow once / n=deny once / a=always allow / d=always deny] > ") + stdout.flush + let line ← stdin.getLine + let key := line.trimAscii.toString.toLower + match key with + | "y" => decided := some .allowOnce + | "n" => decided := some .denyOnce + | "a" => decided := some .allowAlways + | "d" => decided := some .denyAlways + | _ => stdout.putStrLn (cRed " (please type one of: y / n / a / d)") + return decided.get! + +private def cliHooks : ProgressHooks := { + onLlmStart := fun n => IO.eprintln (cGrey s!" ⏳ round {n + 1} — asking LLM"), + onLlmEnd := fun _ => pure (), + onToolCall := fun name args => + IO.eprintln (cYellow s!" → {name}({truncate args.compress 200})"), + onToolResult := fun _ result => + let oneLine := result.replace "\n" " " + IO.eprintln (cGrey s!" ← {truncate oneLine 200}"), + onAsk := askDecision +} + +/-- Guess the image mime type from a file extension. -/ +private def guessImageMime (path : String) : String := + let lc := path.toLower + if lc.endsWith ".png" then "image/png" + else if lc.endsWith ".jpg" || lc.endsWith ".jpeg" then "image/jpeg" + else if lc.endsWith ".webp" then "image/webp" + else if lc.endsWith ".gif" then "image/gif" + else "image/png" + +/-- Read an image file and return a `data:…` URL. Errors propagate. -/ +private def imageToDataUrl (path : String) : IO String := + LeanTea.Llm.Openai.imageDataUrlFromFile path (guessImageMime path) + +private structure Args where + configPath : String := "" + +private partial def parseArgs (xs : List String) (a : Args) : Args := + match xs with + | "--config" :: v :: rest => parseArgs rest { a with configPath := v } + | _ :: rest => parseArgs rest a + | [] => a + +def main (rawArgs : List String) : IO Unit := do + let a := parseArgs rawArgs {} + if a.configPath.isEmpty then + IO.eprintln "usage: llm_chat_cli --config " + IO.Process.exit 2 + let fc ← loadConfig a.configPath + IO.eprintln (cCyan s!"model={fc.model} via {fc.baseUrl}") + IO.eprintln (cCyan s!"system={truncate fc.systemPrompt 100}") + let serverList := String.intercalate ", " <| fc.servers.toList.map (·.name) + IO.eprintln (cCyan s!"servers=[{serverList}]") + let o ← fromConfig fc + IO.eprintln (cCyan s!"loaded {o.openAiTools.size} tools across {o.servers.size} server(s)") + let storeDir ← (·.getD (← LeanTea.Llm.ChatStore.defaultDir)) <$> IO.getEnv "LLM_CHAT_STORE" + let policy ← LeanTea.Llm.Policy.LiveRef.fromDisk storeDir + let policyCfg : PolicyConfig := { policy := some policy } + IO.eprintln (cCyan s!"store={storeDir}") + IO.eprintln (cGrey "commands: /attach /clear-attach /history /clear \ +/sessions /save [name] /load /new /policy /policy-rm /quit") + let stdin ← IO.getStdin + let stdout ← IO.getStdout + let mut history : Array ChatMsg := #[] + let mut activeId : String := "" + let mut pendingImages : Array String := #[] + let mut pendingPaths : Array String := #[] + let mut loop := true + while loop do + let attachNote := + if pendingPaths.isEmpty then "" + else cYellow s!" [📎 {pendingPaths.size} attachment(s)]" + stdout.putStr (cBold s!"\n>{attachNote} ") + stdout.flush + let line ← stdin.getLine + if line.isEmpty then + IO.eprintln (cGrey "\n(eof — bye)") + loop := false + else + let user := line.trimAscii.toString + if user.isEmpty then + pure () + else if user == "/quit" then + loop := false + else if user == "/clear" then + history := #[] + IO.eprintln (cGrey "(history cleared)") + else if user == "/clear-attach" then + pendingImages := #[] + pendingPaths := #[] + IO.eprintln (cGrey "(attachments cleared)") + else if user == "/sessions" then + let summaries ← LeanTea.Llm.ChatStore.list storeDir + if summaries.isEmpty then + IO.eprintln (cGrey "(no saved sessions)") + else + for s in summaries do + let mark := if s.id == activeId then "*" else " " + IO.eprintln (cGrey s!" {mark} {s.id} ({s.count} msgs) {s.name}") + else if user == "/new" then + history := #[] + activeId := "" + pendingImages := #[] + pendingPaths := #[] + IO.eprintln (cGrey "(new session)") + else if user.startsWith "/save" then + let nameArg := (user.drop "/save".length).trimAscii.toString + let session : LeanTea.Llm.ChatStore.Session ← + if activeId.isEmpty then LeanTea.Llm.ChatStore.newSession nameArg + else pure { + id := activeId, name := nameArg, + created := 0, updated := 0, + messages := history + } + let session := { session with messages := history } + let saved ← LeanTea.Llm.ChatStore.save storeDir session + activeId := saved.id + IO.eprintln (cGrey s!"(saved {saved.id} as `{saved.name}`)") + else if user.startsWith "/load " then + let id := (user.drop "/load ".length).trimAscii.toString + match ← LeanTea.Llm.ChatStore.load storeDir id with + | none => + IO.eprintln (cRed s!"no such session: {id}") + | some s => + history := s.messages + activeId := s.id + IO.eprintln (cGrey s!"(loaded {s.id} `{s.name}` — {s.messages.size} msgs)") + else if user == "/policy" then + let rules ← policy.get + if rules.isEmpty then + IO.eprintln (cGrey "(no policy rules — every tool call is asked)") + else + for (r, i) in rules.toArray.zipIdx do + IO.eprintln (cGrey s!" [{i}] {r.action} {r.pattern}") + else if user.startsWith "/policy-rm " then + let n := (user.drop "/policy-rm ".length).trimAscii.toString.toNat?.getD 0 + policy.deleteAt n + IO.eprintln (cGrey s!"(removed rule at index {n})") + else if user == "/history" then + IO.eprintln (cGrey s!"history: {history.size} message(s)") + for m in history do + let role := toString m.role + let preview := truncate (m.text.replace "\n" " ") 80 + let imgs := if m.images.isEmpty then "" else s!" [📎 {m.images.size}]" + IO.eprintln (cGrey s!" [{role}]{imgs} {preview}") + else if user.startsWith "/attach " then + let path := (user.drop "/attach ".length).trimAscii.toString + try + let url ← imageToDataUrl path + pendingImages := pendingImages.push url + pendingPaths := pendingPaths.push path + IO.eprintln (cYellow s!" 📎 attached {path} ({(url.length * 3 / 4 / 1024)} kB)") + catch e => + IO.eprintln (cRed s!"attach failed: {e}") + else + try + let newMsgs ← o.runTurnFull history user pendingImages cliHooks policyCfg + history := history ++ newMsgs + pendingImages := #[] + pendingPaths := #[] + /- Auto-persist after each turn so closing the terminal + never loses the conversation. Create a session on the + first turn if one isn't active yet. -/ + let session : LeanTea.Llm.ChatStore.Session ← + if activeId.isEmpty then + let fresh ← LeanTea.Llm.ChatStore.newSession + pure { fresh with messages := history } + else pure { + id := activeId, name := "", + created := 0, updated := 0, + messages := history + } + let saved ← LeanTea.Llm.ChatStore.save storeDir session + activeId := saved.id + /- Final assistant message in this turn = the last message + that is `.assistant` with no tool_calls. -/ + let finalText := + (newMsgs.filter (fun m => m.role == .assistant && m.toolCalls.isEmpty)) + |>.back?.map (·.text) |>.getD "" + /- Surface any tool-generated images by file path. -/ + let toolImgs := newMsgs.filter (fun m => m.role == .tool && !m.images.isEmpty) + unless toolImgs.isEmpty do + for m in toolImgs do + IO.eprintln (cYellow s!" 🖼 {m.toolName} returned {m.images.size} image(s)") + stdout.putStrLn "" + stdout.putStrLn (cBold "assistant:") + stdout.putStrLn finalText + IO.eprintln (cGrey s!" (session {saved.id})") + catch e => + IO.eprintln (cRed s!"error: {e}") + o.shutdown + +end LlmChatCli + +def main (args : List String) : IO Unit := LlmChatCli.main args diff --git a/examples/LlmChatTui/Main.lean b/examples/LlmChatTui/Main.lean new file mode 100644 index 0000000..48e2889 --- /dev/null +++ b/examples/LlmChatTui/Main.lean @@ -0,0 +1,399 @@ +import LeanTea +import LeanTea.Tui +import Lean.Data.Json + +/-! # llm_chat_tui — chat UX built on the LeanTea.Tui widget kit + +Full-screen chat: coloured message bubbles above a `>` prompt, +whole-screen repaint on every turn. Cooked-mode line input +(`getLine`) — no raw-mode keystroke handling. Progress hooks paint +a one-line status under the prompt while the LLM is busy, then a +full repaint on completion. + +## What changed vs. the pre-widget-kit revision + +The old file (353 lines) rolled its own ANSI helpers — `dim`, +`bold`, `cyan`, `saveCursor`, `restoreCursor`, bespoke soft-wrap ++ divider drawing. This one leans on `LeanTea.Tui`: `Style` +records instead of escape helpers, `vbox` / `hbox` / `text` +instead of `String.intercalate` layout math, `renderBoxAnsi` as +the single point that serialises the widget tree to the +terminal. + +Progress hooks still write direct ANSI (save/restore cursor + +status line) because they fire *during* a blocking LLM turn, when +we can't repaint the widget tree. + +``` +llm_chat_tui --config llm-chat.json +``` +-/ + +open Lean (Json) +open LeanTea.Llm.McpOrchestrator +open LeanTea.Tui + +namespace LlmChatTui + +/-! ## Style palette — one place to tweak the look. -/ + +private def styleTitle : Style := { fg := .magenta, bold := true } +private def styleDim : Style := { dim := true } +private def styleUser : Style := { fg := .cyan, bold := true } +private def styleAi : Style := { fg := .green, bold := true } +private def styleTool : Style := { fg := .yellow } +private def styleToolBody : Style := { dim := true } +private def styleError : Style := { fg := .red } +private def stylePrompt : Style := { bold := true } +private def styleAttach : Style := { fg := .blue } + +/-! ## Soft wrap — split long strings without breaking wide-char escapes. + +Trivial line-based wrap; we don't try to word-break — a chat UX +already reads a mix of natural prose and code / URLs, and word +splits at arbitrary widths look worse than a hard cut. -/ + +private partial def softWrapLine (width : Nat) (line : String) : List String := + if line.length ≤ width then [line] + else + let head := (line.take width).toString + let tail := (line.drop width).toString + head :: softWrapLine width tail + +private def softWrap (width : Nat) (text : String) : List String := + text.splitOn "\n" |>.flatMap (softWrapLine width) + +private def truncate (s : String) (n : Nat) : String := + if s.length ≤ n then s + else (s.take (if n == 0 then 0 else n - 1)).toString ++ "…" + +/-! ## Message bubbles as widget rows + +Each `ChatMsg` produces a list of `Widget Unit` rows — one per +displayed line — so the vbox that holds the history can `vbox` +them all in order. The tag prefix (`you`, `ai`, ` ↳`) is a +separate coloured widget; continuation lines are indented so the +prefix column stays visually clean. -/ + +private def imgBadgeText (imgs : Array String) : String := + if imgs.isEmpty then "" + else + let kb := (imgs.foldl (fun acc u => acc + u.length * 3 / 4) 0) / 1024 + s!" [📎 {imgs.size} img, {kb} kB]" + +private def messageRows (width : Nat) (m : ChatMsg) : List (Widget Unit) := + match m.role with + | .user => + let body := softWrap (width - 4) m.text + let badge := imgBadgeText m.images + match body with + | [] => [text s!"you {badge}" styleUser] + | l :: ls => + text s!"you {l}{badge}" styleUser + :: ls.map (fun x => text s!" {x}" {}) + | .assistant => + if m.text.isEmpty && !m.toolCalls.isEmpty then + [text "(assistant requested tool calls)" styleDim] + else + let body := softWrap (width - 4) m.text + match body with + | [] => [text "ai" styleAi] + | l :: ls => + text s!"ai {l}" styleAi + :: ls.map (fun x => text s!" {x}" {}) + | .tool => + let preview := truncate (m.text.replace "\n" "↵") (width - 8) + [text s!" ↳ {m.toolName}{imgBadgeText m.images}" styleTool, + text s!" {preview}" styleToolBody] + | .system => [] + +private def toolCallRows (calls : Array Json) : List (Widget Unit) := + calls.toList.map fun c => + let fn := (c.getObjVal? "function").toOption.getD Json.null + let name := (fn.getObjVal? "name").toOption.bind (·.getStr?.toOption) |>.getD "" + let argsS := (fn.getObjVal? "arguments").toOption.bind (·.getStr?.toOption) |>.getD "{}" + text s!" → {name}({argsS})" styleTool + +private def historyView (width : Nat) (history : Array ChatMsg) : Widget Unit := + let rows := history.toList.flatMap fun m => + messageRows width m ++ toolCallRows m.toolCalls + vbox rows + +/-! ## Full screen -/ + +private def bannerLine (model : String) (nServers nTools : Nat) : Widget Unit := + { text s!" llm-chat — model={model} — {nServers} server(s), {nTools} tool(s)" styleTitle + with prefHeight := some 1 } + +private def dividerRow (width : Nat) : Widget Unit := + { text (String.ofList (List.replicate width '─')) styleDim + with prefHeight := some 1 } + +private def statusRow (status : String) : Widget Unit := + { text status styleDim with prefHeight := some 1 } + +private def promptRow : Widget Unit := + { text "> " stylePrompt with prefHeight := some 1 } + +structure Screen where + width : Nat := 100 + height : Nat := 40 + model : String + nServers : Nat + nTools : Nat + history : Array ChatMsg + status : String + +def screen (s : Screen) : Widget Unit := + vbox [ + bannerLine s.model s.nServers s.nTools, + dividerRow s.width, + historyView s.width s.history, + dividerRow s.width, + statusRow s.status, + promptRow + ] + +/-! ## Repaint & progress-line ANSI + +`clearScreen` erases the terminal (which is a scrollback-loss but +matches the old behaviour), then we ship the widget tree as one +ANSI blob via `renderBoxAnsi`. The progress line for the LLM/tool +hooks writes underneath the prompt using save/restore-cursor so +the user's typed input isn't clobbered mid-turn. -/ + +private def esc (s : String) : String := s!"\x1b[{s}" +private def clearScreen : String := esc "2J" ++ esc "H" +private def clearToEnd : String := esc "0J" +private def saveCursor : String := "\x1b7" +private def restoreCursor : String := "\x1b8" + +private def paint (s : Screen) : IO Unit := do + let box := (screen s).render s.width s.height false + let stdout ← IO.getStdout + stdout.putStr clearScreen + stdout.putStr (renderBoxAnsi box) + stdout.flush + +private def styleAnsi (st : Style) : String := Id.run do + -- Small helper for the progress line, which doesn't go through + -- the widget tree. Assembles the SGR sequence for `st.fg` + + -- optional dim/bold. Reset with esc "0m". + let mut out := esc "0m" + out := out ++ ( + match st.fg with + | .cyan => esc "36m" + | .green => esc "32m" + | .yellow => esc "33m" + | .red => esc "31m" + | .blue => esc "34m" + | .magenta => esc "35m" + | _ => "") + if st.bold then out := out ++ esc "1m" + if st.dim then out := out ++ esc "2m" + return out + +private def writeStatus (s : String) : IO Unit := do + let stdout ← IO.getStdout + stdout.putStr saveCursor + stdout.putStr "\n" + stdout.putStr clearToEnd + stdout.putStr (styleAnsi styleDim ++ s ++ esc "0m") + stdout.putStr restoreCursor + stdout.flush + +/-- Synchronous y/n/a/d prompt on the TUI's input stream. -/ +private def askDecision (name : String) (args : Json) : IO UserDecision := do + let stdin ← IO.getStdin + let stdout ← IO.getStdout + let preview := truncate args.compress 80 + let mut decided : Option UserDecision := none + while decided.isNone do + stdout.putStr saveCursor + stdout.putStr "\n" + stdout.putStr clearToEnd + stdout.putStr (styleAnsi styleTool) + stdout.putStrLn s!"⚠ approve `{name}({preview})`?" + stdout.putStr (styleAnsi stylePrompt) + stdout.putStr " [y allow once / n deny once / a always allow / d always deny] > " + stdout.putStr (esc "0m") + stdout.putStr restoreCursor + stdout.flush + let line ← stdin.getLine + match line.trimAscii.toString.toLower with + | "y" => decided := some .allowOnce + | "n" => decided := some .denyOnce + | "a" => decided := some .allowAlways + | "d" => decided := some .denyAlways + | _ => pure () + return decided.get! + +private def tuiHooks : ProgressHooks := { + onLlmStart := fun n => writeStatus s!"⏳ round {n + 1} — thinking…", + onLlmEnd := fun _ => writeStatus "", + onToolCall := fun n a => writeStatus s!"→ {n}({truncate a.compress 80})", + onToolResult := fun _ r => + let oneLine := r.replace "\n" " " + writeStatus s!"← {truncate oneLine 80}", + onAsk := askDecision +} + +/-! ## CLI args -/ + +private structure Args where + configPath : String := "" + +private partial def parseArgs (xs : List String) (a : Args) : Args := + match xs with + | "--config" :: v :: rest => parseArgs rest { a with configPath := v } + | _ :: rest => parseArgs rest a + | [] => a + +private def guessImageMime (path : String) : String := + let lc := path.toLower + if lc.endsWith ".png" then "image/png" + else if lc.endsWith ".jpg" || lc.endsWith ".jpeg" then "image/jpeg" + else if lc.endsWith ".webp" then "image/webp" + else if lc.endsWith ".gif" then "image/gif" + else "image/png" + +private def imageToDataUrl (path : String) : IO String := + LeanTea.Llm.Openai.imageDataUrlFromFile path (guessImageMime path) + +def main (rawArgs : List String) : IO Unit := do + let a := parseArgs rawArgs {} + if a.configPath.isEmpty then + IO.eprintln "usage: llm_chat_tui --config " + IO.Process.exit 2 + let fc ← loadConfig a.configPath + IO.eprintln s!"loading {fc.servers.size} MCP server(s)…" + let o ← fromConfig fc + let nServers := o.servers.size + let nTools := o.openAiTools.size + let storeDir ← (·.getD (← LeanTea.Llm.ChatStore.defaultDir)) <$> IO.getEnv "LLM_CHAT_STORE" + let policy ← LeanTea.Llm.Policy.LiveRef.fromDisk storeDir + let policyCfg : PolicyConfig := { policy := some policy } + let stdin ← IO.getStdin + let mut history : Array ChatMsg := #[] + let mut activeId : String := "" + let mut pendingImages : Array String := #[] + let mut screenState : Screen := { + model := o.model, nServers := nServers, nTools := nTools, + history := history, + status := "commands: /attach /clear-attach /clear /sessions /save /load /new /policy /policy-rm /quit" + } + paint screenState + let mut loop := true + while loop do + let line ← stdin.getLine + if line.isEmpty then + loop := false + else + let user := line.trimAscii.toString + if user.isEmpty then pure () + else if user == "/quit" then loop := false + else if user == "/clear" then + history := #[] + screenState := { screenState with history := history, status := "(history cleared)" } + paint screenState + else if user == "/clear-attach" then + pendingImages := #[] + screenState := { screenState with status := "(attachments cleared)" } + paint screenState + else if user == "/sessions" then + let summaries ← LeanTea.Llm.ChatStore.list storeDir + let lines := summaries.toList.map fun s => + let mark := if s.id == activeId then "*" else " " + s!"{mark} {s.id} ({s.count} msgs) {s.name}" + let joined := if lines.isEmpty then "(no saved sessions)" + else String.intercalate " · " lines + screenState := { screenState with status := joined } + paint screenState + else if user == "/new" then + history := #[] + activeId := "" + pendingImages := #[] + screenState := { screenState with history := history, status := "(new session)" } + paint screenState + else if user.startsWith "/save" then + let nameArg := (user.drop "/save".length).trimAscii.toString + let session : LeanTea.Llm.ChatStore.Session ← + if activeId.isEmpty then LeanTea.Llm.ChatStore.newSession nameArg + else pure { + id := activeId, name := nameArg, + created := 0, updated := 0, + messages := history + } + let saved ← LeanTea.Llm.ChatStore.save storeDir + { session with messages := history } + activeId := saved.id + screenState := { screenState with status := s!"(saved {saved.id} `{saved.name}`)" } + paint screenState + else if user.startsWith "/load " then + let id := (user.drop "/load ".length).trimAscii.toString + match ← LeanTea.Llm.ChatStore.load storeDir id with + | none => + screenState := { screenState with status := s!"no such session: {id}" } + paint screenState + | some s => + history := s.messages + activeId := s.id + screenState := { screenState with history := history, + status := s!"(loaded {s.id} `{s.name}`)" } + paint screenState + else if user == "/policy" then + let rules ← policy.get + let lines := rules.toArray.zipIdx.toList.map fun (r, i) => + s!"[{i}] {r.action} {r.pattern}" + let joined := if lines.isEmpty then "(no policy rules)" + else String.intercalate " · " lines + screenState := { screenState with status := joined } + paint screenState + else if user.startsWith "/policy-rm " then + let n := (user.drop "/policy-rm ".length).trimAscii.toString.toNat?.getD 0 + policy.deleteAt n + screenState := { screenState with status := s!"(removed policy rule {n})" } + paint screenState + else if user.startsWith "/attach " then + let path := (user.drop "/attach ".length).trimAscii.toString + try + let url ← imageToDataUrl path + pendingImages := pendingImages.push url + screenState := { screenState with + status := s!"📎 attached {path} ({url.length * 3 / 4 / 1024} kB) — {pendingImages.size} pending" } + paint screenState + catch e => + screenState := { screenState with status := s!"attach failed: {e}" } + paint screenState + else + try + let newMsgs ← o.runTurnFull history user pendingImages tuiHooks policyCfg + history := history ++ newMsgs + pendingImages := #[] + /- Auto-save the session each turn. -/ + let session : LeanTea.Llm.ChatStore.Session ← + if activeId.isEmpty then + let fresh ← LeanTea.Llm.ChatStore.newSession + pure { fresh with messages := history } + else pure { + id := activeId, name := "", + created := 0, updated := 0, + messages := history + } + let saved ← LeanTea.Llm.ChatStore.save storeDir session + activeId := saved.id + screenState := { screenState with history := history, + status := s!"session {saved.id}" } + paint screenState + catch e => + screenState := { screenState with status := s!"error: {e}" } + paint screenState + o.shutdown + let stdout ← IO.getStdout + stdout.putStrLn "" + stdout.putStr (styleAnsi styleDim ++ "(bye)" ++ esc "0m") + stdout.putStrLn "" + +end LlmChatTui + +def main (args : List String) : IO Unit := LlmChatTui.main args diff --git a/examples/LlmChatWeb/Serve.lean b/examples/LlmChatWeb/Serve.lean new file mode 100644 index 0000000..8b1eeee --- /dev/null +++ b/examples/LlmChatWeb/Serve.lean @@ -0,0 +1,892 @@ +import LeanTea +import Lean.Data.Json + +/-! # llm_chat_web — single-page chat with sessions sidebar + images + + * Left sidebar — past sessions (newest first); `+ new` button. + * Main pane — bubbles per role; tool calls collapsed; inline images. + * Input area — paste / drag-drop / file-picker images. + +Routes: + + * `GET /` single HTML page + * `GET /api/state` current model + servers + tools + active session id + * `GET /api/sessions` list every saved session (newest first) + * `POST /api/sessions/new` create a fresh session, return its id + * `GET /api/sessions/:id` load full history + * `DELETE /api/sessions/:id` remove + * `POST /api/sessions/:id/send` body `{message, images:[dataUrl,...]}` + +Sessions are persisted to `~/.cache/leantea-chat/` (override with +`--store DIR` / `LLM_CHAT_STORE`). Each session is a single JSON +file with the history inlined including base64 image data — fine +for the single-user developer scale this app is built for. -/ + +open Lean (Json) +open LeanTea LeanTea.Net.Http LeanTea.Net.Server +open LeanTea.Llm.McpOrchestrator + +namespace LlmChatWeb + +/-! ## ChatMsg → JSON for the UI -/ + +private def msgToUiJson (m : ChatMsg) : Json := + let callsJson := Json.arr <| m.toolCalls.map fun c => + let fn := (c.getObjVal? "function").toOption.getD Json.null + let name := (fn.getObjVal? "name").toOption.bind (·.getStr?.toOption) |>.getD "" + let args := (fn.getObjVal? "arguments").toOption.bind (·.getStr?.toOption) |>.getD "{}" + Json.mkObj [("name", Json.str name), ("args", Json.str args)] + Json.mkObj [ + ("role", Json.str m.role.toString), + ("text", Json.str m.text), + ("images", Json.arr (m.images.map Json.str)), + ("toolCalls", callsJson), + ("toolName", Json.str m.toolName) + ] + +private def historyToJson (h : Array ChatMsg) : Json := + Json.arr (h.map msgToUiJson) + +/-! ## HTML page (inlined) -/ + +private def pageHtml : String := " + + + + +llm-chat + + + + +
+
+

⚠ approve tool call?

+
tool:
+

+    
+ + + + +
+
+
+
+
+

llm-chat

+ loading… +
+
+
+
+
+ + + + +
+
+
+ + + +" + +/-! ## Server state -/ + +/-- One pending tool call awaiting user approval. `decision` is set + by `POST /api/decision` and the orchestrator's `onAsk` hook + polls it until it flips from `none` to a real decision. -/ +private structure PendingCall where + id : String + toolName : String + args : Json + decision : IO.Ref (Option UserDecision) + +private structure Ctx where + orch : Orchestrator + storeDir : String + /-- In-memory cache for the active session — avoids re-reading + the JSON file on every send. -/ + cache : IO.Ref (Std.HashMap String LeanTea.Llm.ChatStore.Session) + /-- Live policy ref (shared with /api/policy endpoints). -/ + policy : LeanTea.Llm.Policy.LiveRef + /-- The one (or zero) pending tool-call decision the browser + is being asked about. We only ever have one at a time — + the orchestrator's onAsk is synchronous per call. -/ + pending : IO.Ref (Option PendingCall) + +private def Ctx.loadSession (ctx : Ctx) (id : String) : IO (Option LeanTea.Llm.ChatStore.Session) := do + let m ← ctx.cache.get + if let some s := m.get? id then return some s + match ← LeanTea.Llm.ChatStore.load ctx.storeDir id with + | none => return none + | some s => + ctx.cache.set (m.insert id s) + return some s + +private def Ctx.saveSession (ctx : Ctx) (s : LeanTea.Llm.ChatStore.Session) + : IO LeanTea.Llm.ChatStore.Session := do + let s' ← LeanTea.Llm.ChatStore.save ctx.storeDir s + let m ← ctx.cache.get + ctx.cache.set (m.insert s.id s') + return s' + +/-- The orchestrator hook the policy flow runs when a rule says + `ask`. We post the pending call into `ctx.pending` and spin + until the browser resolves it via `POST /api/decision`. -/ +private def Ctx.askPolicy (ctx : Ctx) (toolName : String) (args : Json) + : IO UserDecision := do + let id ← do + let r ← IO.rand 0 0xffff_ffff + pure s!"pc-{r}" + let dref ← IO.mkRef (none : Option UserDecision) + let pc : PendingCall := { id, toolName, args, decision := dref } + ctx.pending.set (some pc) + /- Poll every 100ms. Plenty fast for a human-driven UI; tiny + overhead. We never time out — the browser must answer. -/ + let mut decided : Option UserDecision := none + while decided.isNone do + IO.sleep 100 + decided ← dref.get + ctx.pending.set none + return decided.get! + +private def stateJson (ctx : Ctx) : IO Json := do + return Json.mkObj [ + ("model", Json.str ctx.orch.model), + ("servers", Json.num (Int.ofNat ctx.orch.servers.size)), + ("tools", Json.num (Int.ofNat ctx.orch.openAiTools.size)) + ] + +private def sessionsListJson (ctx : Ctx) : IO Json := do + let summaries ← LeanTea.Llm.ChatStore.list ctx.storeDir + return Json.mkObj [ + ("sessions", Json.arr (summaries.map LeanTea.Llm.ChatStore.summaryToJson)) + ] + +private def handleNewSession (ctx : Ctx) : IO Response := do + let s ← LeanTea.Llm.ChatStore.newSession + let _ ← ctx.saveSession s + let body := Json.mkObj [("id", Json.str s.id)] + return Response.text 200 body.compress + +private def handleGetSession (ctx : Ctx) (id : String) : IO Response := do + match ← ctx.loadSession id with + | none => + let body := Json.mkObj [("error", Json.str s!"session {id} not found")] + return Response.text 404 body.compress + | some s => + let body := Json.mkObj [ + ("id", Json.str s.id), + ("name", Json.str s.name), + ("messages", historyToJson s.messages) + ] + return Response.text 200 body.compress + +private def handleDeleteSession (ctx : Ctx) (id : String) : IO Response := do + let _ ← LeanTea.Llm.ChatStore.delete ctx.storeDir id + let m ← ctx.cache.get + ctx.cache.set (m.erase id) + return Response.text 200 "{\"ok\":true}" + +private def parseImages (j : Json) : Array String := + match (j.getObjVal? "images").toOption.bind (·.getArr?.toOption) with + | some a => a.filterMap (·.getStr?.toOption) + | none => #[] + +private def handleSend (ctx : Ctx) (id : String) (req : Request) : IO Response := do + match Json.parse (String.fromUTF8! req.body) with + | .error e => + let body := Json.mkObj [("error", Json.str s!"bad json: {e}")] + return Response.text 400 body.compress + | .ok j => + let msg := (j.getObjVal? "message").toOption.bind (·.getStr?.toOption) |>.getD "" + let images := parseImages j + if msg.isEmpty && images.isEmpty then + let body := Json.mkObj [("error", Json.str "need either message or images")] + return Response.text 400 body.compress + match ← ctx.loadSession id with + | none => + let body := Json.mkObj [("error", Json.str s!"session {id} not found")] + return Response.text 404 body.compress + | some s => + try + let hooks : ProgressHooks := { onAsk := ctx.askPolicy } + let policyCfg : PolicyConfig := { policy := some ctx.policy } + let newMsgs ← ctx.orch.runTurnFull s.messages msg images hooks policyCfg + let updated : LeanTea.Llm.ChatStore.Session := + { s with messages := s.messages ++ newMsgs } + let updated' ← ctx.saveSession updated + let body := Json.mkObj [("messages", historyToJson updated'.messages)] + return Response.text 200 body.compress + catch e => + let body := Json.mkObj [("error", Json.str s!"{e}")] + return Response.text 500 body.compress + +/-! ## Policy endpoints -/ + +private def pendingJson (ctx : Ctx) : IO Json := do + match ← ctx.pending.get with + | none => return Json.mkObj [("pending", Json.null)] + | some pc => + return Json.mkObj [("pending", Json.mkObj [ + ("id", Json.str pc.id), + ("toolName", Json.str pc.toolName), + ("args", pc.args) + ])] + +private def handleDecision (ctx : Ctx) (req : Request) : IO Response := do + match Json.parse (String.fromUTF8! req.body) with + | .error e => + let body := Json.mkObj [("error", Json.str s!"bad json: {e}")] + return Response.text 400 body.compress + | .ok j => + let id := (j.getObjVal? "id").toOption.bind (·.getStr?.toOption) |>.getD "" + let action := (j.getObjVal? "action").toOption.bind (·.getStr?.toOption) |>.getD "" + match ← ctx.pending.get with + | none => + let body := Json.mkObj [("error", Json.str "no pending call")] + return Response.text 409 body.compress + | some pc => + if pc.id != id then + let body := Json.mkObj [("error", Json.str "pending call id mismatch")] + return Response.text 409 body.compress + let decision : Option UserDecision := match action with + | "allow-once" => some .allowOnce + | "deny-once" => some .denyOnce + | "allow-always" => some .allowAlways + | "deny-always" => some .denyAlways + | _ => none + match decision with + | none => + let body := Json.mkObj [("error", Json.str s!"unknown action: {action}")] + return Response.text 400 body.compress + | some d => + pc.decision.set (some d) + return Response.text 200 "{\"ok\":true}" + +private def policyListJson (ctx : Ctx) : IO Json := do + let rules ← ctx.policy.get + let arr := rules.toArray.zipIdx.map fun (r, i) => + Json.mkObj [ + ("idx", Json.num (Int.ofNat i)), + ("pattern", Json.str r.pattern), + ("action", Json.str r.action.toString) + ] + return Json.mkObj [("rules", Json.arr arr)] + +private def handlePolicyDelete (ctx : Ctx) (idx : Nat) : IO Response := do + ctx.policy.deleteAt idx + return Response.text 200 "{\"ok\":true}" + +/-! ## Routing + +We hand-roll path matching for `/api/sessions/` and its `/send` +suffix because the framework's `Handler` is just `Request → IO +Response` — no router. Splitting on `/` and checking the prefix +keeps it minimal. -/ + +private def matchSessionPath (path : String) : Option (String × Option String) := + /- `/api/sessions/ID` → `(ID, none)` + `/api/sessions/ID/send` → `(ID, some "send")` -/ + let prefix_ := "/api/sessions/" + if !path.startsWith prefix_ then none + else + let rest := (path.drop prefix_.length).toString + if rest.isEmpty then none + else + match rest.splitOn "/" with + | [id] => some (id, none) + | [id, sub] => some (id, some sub) + | _ => none + +private def matchPolicyDeletePath (path : String) : Option Nat := + let prefix_ := "/api/policy/" + if !path.startsWith prefix_ then none + else (path.drop prefix_.length).toString.toNat? + +private def handler (ctx : Ctx) : Handler := fun req => do + match req.path, req.method with + | "/", _ => return Response.html 200 pageHtml + | "/favicon.ico", _ => return { status := 204, headers := #[], body := .empty } + | "/api/state", "GET" => + let body := (← stateJson ctx).compress + return Response.text 200 body + | "/api/sessions", "GET" => + let body := (← sessionsListJson ctx).compress + return Response.text 200 body + | "/api/sessions/new", "POST" => handleNewSession ctx + | "/api/pending", "GET" => + let body := (← pendingJson ctx).compress + return Response.text 200 body + | "/api/decision", "POST" => handleDecision ctx req + | "/api/policy", "GET" => + let body := (← policyListJson ctx).compress + return Response.text 200 body + | p, m => + match matchSessionPath p, m with + | some (id, none), "GET" => handleGetSession ctx id + | some (id, none), "DELETE" => handleDeleteSession ctx id + | some (id, some "send"), "POST" => handleSend ctx id req + | _, _ => + match matchPolicyDeletePath p, m with + | some idx, "DELETE" => handlePolicyDelete ctx idx + | _, _ => return Response.notFound + +/-! ## CLI -/ + +private structure Args where + port : UInt16 := 8030 + host : String := "0.0.0.0" + configPath : String := "" + storeDir : String := "" + +private partial def parseArgs (xs : List String) (a : Args) : Args := + match xs with + | "--port" :: v :: rest => parseArgs rest { a with port := (v.toNat?.getD 8030).toUInt16 } + | "--host" :: v :: rest => parseArgs rest { a with host := v } + | "--config" :: v :: rest => parseArgs rest { a with configPath := v } + | "--store" :: v :: rest => parseArgs rest { a with storeDir := v } + | _ :: rest => parseArgs rest a + | [] => a + +def serveMain (args : List String) : IO Unit := do + let mut a := parseArgs args {} + if let some p ← IO.getEnv "PORT" then + if let some n := p.toNat? then a := { a with port := n.toUInt16 } + if let some d ← IO.getEnv "LLM_CHAT_STORE" then + if a.storeDir.isEmpty then a := { a with storeDir := d } + if a.configPath.isEmpty then + IO.eprintln "usage: llm_chat_web --config \ +[--port N] [--host H] [--store DIR]" + IO.Process.exit 2 + let storeDir ← if a.storeDir.isEmpty then LeanTea.Llm.ChatStore.defaultDir + else pure a.storeDir + let fc ← loadConfig a.configPath + IO.eprintln s!"llm-chat-web: spawning {fc.servers.size} MCP server(s)…" + let orch ← fromConfig fc + IO.eprintln s!"llm-chat-web: loaded {orch.openAiTools.size} tool(s); \ +storeDir={storeDir}; serving on http://{a.host}:{a.port}/" + let cache ← IO.mkRef (∅ : Std.HashMap String LeanTea.Llm.ChatStore.Session) + let policy ← LeanTea.Llm.Policy.LiveRef.fromDisk storeDir + let pending ← IO.mkRef (none : Option PendingCall) + let ctx : Ctx := { orch, storeDir, cache, policy, pending } + /- `serveConcurrent` (not `serve`) is required because /api/send + blocks on the policy `ask` hook waiting for /api/decision — + the sequential server would deadlock. -/ + serveConcurrent a.port a.host (handler ctx) + +end LlmChatWeb + +def main (args : List String) : IO Unit := LlmChatWeb.serveMain args diff --git a/examples/Smoke/Gemini.lean b/examples/Smoke/Gemini.lean new file mode 100644 index 0000000..3bcc019 --- /dev/null +++ b/examples/Smoke/Gemini.lean @@ -0,0 +1,63 @@ +import LeanTea + +/-! Smoke test for `LeanTea.Cloud.Gemini`. + +Skips quietly when `GEMINI_API_KEY` is unset so it can sit in CI +without leaking the secret or failing the pipeline. + +When the key is present: + + 1. `ask`: send "ping; reply OK" against `gemini-2.5-flash-lite` + (cheapest; this is a wire-up check, not a reasoning test) and + assert non-empty response text. + 2. `reviewMany`: bundle this very file plus `LeanTea/Cloud/Gemini.lean` + and request a 1-sentence summary, against the same cheap model. + Verifies the SafePath workspace plumbing + the multi-file + bundling shape. + +Override the model used for the smoke via `GEMINI_SMOKE_MODEL` +(default `gemini-2.5-flash-lite`). -/ + +open LeanTea.Cloud.Gemini + +def main : IO Unit := do + match ← Config.fromEnv? with + | none => + IO.println "gemini_smoke: GEMINI_API_KEY not set — skipping (this is fine in CI)" + return + | some baseCfg => + let model := (← IO.getEnv "GEMINI_SMOKE_MODEL").getD "gemini-2.5-flash-lite" + let cfg := { baseCfg with model, timeoutSec := 120 } + IO.println s!"gemini_smoke: model = {cfg.model}" + + IO.println "── ask ──────────────────────────────────────────" + let r1 ← ask cfg "Reply with the single word OK." + { temperature := some 0.0, maxOutputTokens := some 16 } + IO.println s!"text=`{r1.text.trim}` finish={r1.finishReason} \ +tokens in={r1.usage.input} out={r1.usage.output}" + if r1.text.isEmpty then + throw <| IO.userError "gemini_smoke: ask returned empty text" + + IO.println "" + IO.println "── reviewMany ───────────────────────────────────" + /- Find the repo root: the smoke binary runs from anywhere, but + Lake sets cwd to the package root. Bail out cleanly if the + file isn't there (running this binary by hand from somewhere + else shouldn't crash CI). -/ + let ws := (← IO.currentDir).toString + let target := "LeanTea/Cloud/Gemini.lean" + if ! (← System.FilePath.pathExists (ws ++ "/" ++ target)) then + IO.println s!"gemini_smoke: {target} not found under {ws} — \ +skipping reviewMany (run from repo root for full coverage)" + return + let (r2, stats) ← reviewMany cfg ws + #[target, "examples/Smoke/Gemini.lean"] + "1 文で全体構造を要約してください。" + { temperature := some 0.0, maxOutputTokens := some 256 } + IO.println s!"files={stats.fileCount} bytes={stats.totalBytes} \ +finish={r2.finishReason} tokens in={r2.usage.input} out={r2.usage.output}" + IO.println r2.text + if r2.text.isEmpty then + throw <| IO.userError "gemini_smoke: reviewMany returned empty text" + IO.println "" + IO.println "gemini_smoke: ✔ both calls succeeded" diff --git a/examples/Tests/TuiSpec.lean b/examples/Tests/TuiSpec.lean new file mode 100644 index 0000000..12e7f43 --- /dev/null +++ b/examples/Tests/TuiSpec.lean @@ -0,0 +1,264 @@ +import LeanTea +import LeanTea.Tui + +/-! # examples/Tests/TuiSpec.lean — pure tests for the LeanTea.Tui widget kit + +Every widget is a pure function (`Nat → Nat → Bool → Box`) plus a +pure key handler (`Key → Option msg`). This spec exercises the +widget kit end-to-end without spinning up a real terminal: + + * Layout — `Box.text`, `vcat`, `hcat`, `vbox`, `hbox` all produce + the expected sizes and contents. + * Combinators — `border` adds the right corners and edges, + `padding` clears the margins. + * Elements — `button` renders its label, `input` reflects its + value, `listView` highlights the selected row. + * App loop — feeding a sequence of `Key`s into a `Session` walks + the state machine the same way the real `App.run` does. + +The whole file completes in well under a second because there's no +IO. Add a regression case here before changing layout heuristics. +-/ + +open LeanTea LeanTea.LSpec +open LeanTea.Tui + +/-! ## Group 1 — Box primitives -/ + +namespace BoxGroup + +def emptyHasSize : Bool := + let b := Box.empty 5 3 + b.width == 5 && b.height == 3 && b.cells.size == 3 + +def textTruncatesToWidth : Bool := + let b := Box.text 5 "Hello, World" + (b.toRows.head?.getD "") == "Hello" + +def textPadsToWidth : Bool := + let b := Box.text 8 "hi" + (b.toRows.head?.getD "") == "hi " + +def vcatStacks : Bool := + let top := Box.text 6 "top" + let bot := Box.text 6 "bottom" + let s := Box.vcat top bot + s.height == 2 && s.contains "top" && s.contains "bottom" + +def hcatJoins : Bool := + let l := Box.text 4 "abcd" + let r := Box.text 4 "WXYZ" + let s := Box.hcat l r + s.width == 8 && (s.toRows.head?.getD "") == "abcdWXYZ" + +def containsFindsSubstring : Bool := + let b := Box.vcat (Box.text 10 "alpha") (Box.text 10 "beta") + b.contains "alpha" && b.contains "beta" && !b.contains "gamma" + +def run : IO LSpec := do + return group "Tui — Box primitives" [ + it "Box.empty has the requested size" emptyHasSize, + it "Box.text truncates to width" textTruncatesToWidth, + it "Box.text right-pads to width" textPadsToWidth, + it "Box.vcat stacks vertically" vcatStacks, + it "Box.hcat joins horizontally" hcatJoins, + it "Box.contains finds substrings" containsFindsSubstring + ] + +end BoxGroup + +/-! ## Group 2 — Combinators -/ + +namespace CombinatorGroup + +def borderDrawsCorners : Bool := + let inner := Box.empty 8 3 + let widget : Widget Unit := { + render := fun _ _ _ => inner, + prefWidth := some 8, prefHeight := some 3 + } + let bordered := border .line widget + let b := bordered.render 10 5 false + (b.cellAt 0 0).ch == '┌' && + (b.cellAt 0 9).ch == '┐' && + (b.cellAt 4 0).ch == '└' && + (b.cellAt 4 9).ch == '┘' + +def paddingClearsMargin : Bool := + let widget : Widget Unit := text "X" + let padded := padding 1 widget + let b := padded.render 5 3 false + (b.cellAt 1 1).ch == 'X' && (b.cellAt 0 0).ch == ' ' + +def vboxAllocatesPrefHeight : Bool := + let header : Widget Unit := { render := fun w _ _ => Box.text w "HDR", + prefHeight := some 2 } + let body : Widget Unit := { render := fun w h _ => Box.filled w h '.' } + let root := vbox [header, body] + let b := root.render 10 5 false + b.contains "HDR" && ((b.cellAt 4 0).ch == '.') + +def hboxAllocatesPrefWidth : Bool := + let left : Widget Unit := { render := fun _ h _ => Box.filled 3 h 'L', prefWidth := some 3 } + let right : Widget Unit := { render := fun w h _ => Box.filled w h 'R' } + let root := hbox [left, right] + let b := root.render 10 1 false + ((b.cellAt 0 0).ch == 'L') && ((b.cellAt 0 5).ch == 'R') + +def run : IO LSpec := do + return group "Tui — Combinators" [ + it "border draws the right corners" borderDrawsCorners, + it "padding clears the margin" paddingClearsMargin, + it "vbox honours prefHeight" vboxAllocatesPrefHeight, + it "hbox honours prefWidth" hboxAllocatesPrefWidth + ] + +end CombinatorGroup + +/-! ## Group 3 — Elements -/ + +namespace ElementGroup + +inductive Msg where | clicked deriving Inhabited + +def buttonShowsLabel : Bool := + let w : Widget Msg := button "btn" .primary "Save" .clicked + let b := w.render 12 1 false + b.contains "Save" + +def buttonFiresOnEnter : Bool := + let w : Widget Msg := button "btn" .primary "Save" .clicked + match w.onKey .enter with + | some .clicked => true + | _ => false + +def buttonIgnoresOtherKeys : Bool := + let w : Widget Msg := button "btn" .primary "Save" .clicked + match w.onKey .esc with + | none => true + | _ => false + +inductive InpMsg where + | typed (c : Char) | back | submit + deriving Inhabited + +def inputShowsValue : Bool := + let w : Widget InpMsg := input "x" "hello" { + onChar := .typed, onBackspace := .back, onSubmit := .submit + } + let b := w.render 12 1 false + b.contains "hello" + +def inputSendsCharOnKey : Bool := + let w : Widget InpMsg := input "x" "" { + onChar := .typed, onBackspace := .back, onSubmit := .submit + } + match w.onKey (.char 'a') with + | some (.typed 'a') => true + | _ => false + +inductive LMsg where + | up | down | go + deriving Inhabited + +def listShowsItems : Bool := + let w : Widget LMsg := listView "lst" ["one", "two", "three"] 0 + LMsg.up LMsg.down LMsg.go + let b := w.render 20 3 false + b.contains "one" && b.contains "two" && b.contains "three" + +def listHighlightsSelection : Bool := + let w : Widget LMsg := listView "lst" ["one", "two", "three"] 1 + LMsg.up LMsg.down LMsg.go + let b := w.render 20 3 false + -- The selected row should be prefixed with the ▶ marker. + (b.toRows[1]?.getD "").startsWith "▶ two" + +def run : IO LSpec := do + return group "Tui — Elements" [ + it "button renders its label" buttonShowsLabel, + it "button fires onEnter" buttonFiresOnEnter, + it "button ignores unrelated keys" buttonIgnoresOtherKeys, + it "input echoes its value" inputShowsValue, + it "input fires onChar per keystroke" inputSendsCharOnKey, + it "listView renders each item" listShowsItems, + it "listView highlights selection" listHighlightsSelection + ] + +end ElementGroup + +/-! ## Group 4 — App + Session round-trip (the test framework itself) -/ + +namespace AppGroup + +inductive CMsg where + | inc | dec | quit + deriving Inhabited + +structure Counter where + count : Int := 0 + quit : Bool := false + deriving Inhabited + +def view (s : Counter) : Widget CMsg := + vbox [ + text s!"count = {s.count}", + hbox [ + button "minus" .secondary "[-]" .dec, + button "plus" .primary "[+]" .inc, + button "quit" .danger "[q]" .quit + ] + ] + +def update (m : CMsg) (s : Counter) : Counter := + match m with + | .inc => { s with count := s.count + 1 } + | .dec => { s with count := s.count - 1 } + | .quit => { s with quit := true } + +def app : Tui.App Counter CMsg := { + init := {}, + view := view, + update := update, + quitWhen := (·.quit) +} + +def fresh : Session Counter CMsg := + mkSession app 30 4 ["minus", "plus", "quit"] + +def initialRenderShowsZero : Bool := + fresh.containsText "count = 0" + +def enterOnInitialFocusFiresMinus : Bool := + (fresh.sendKey .enter).state.count == -1 + +def tabAdvancesFocus : Bool := + (fresh.sendKey .tab).focused == "plus" + +def enterAfterTabFiresPlus : Bool := + ((fresh.sendKey .tab).sendKey .enter).state.count == 1 + +def quitTriggersQuitWhen : Bool := + (((fresh.sendKey .tab).sendKey .tab).sendKey .enter).isQuit + +def run : IO LSpec := do + return group "Tui — App + Session" [ + it "initial render shows count=0" initialRenderShowsZero, + it "Enter on initial focus fires .dec" enterOnInitialFocusFiresMinus, + it "Tab advances focus" tabAdvancesFocus, + it "Enter after Tab fires the next button" enterAfterTabFiresPlus, + it "the quit button trips quitWhen" quitTriggersQuitWhen + ] + +end AppGroup + +/-! ## main -/ + +def main : IO Unit := do + let b ← BoxGroup.run + let c ← CombinatorGroup.run + let e ← ElementGroup.run + let a ← AppGroup.run + let tree := group "LeanTea.Tui widget kit" [b, c, e, a] + let code ← lspecIO tree + if code != 0 then IO.Process.exit code.toUInt8 diff --git a/lakefile.lean b/lakefile.lean index 80d31c1..27c258b 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -30,7 +30,7 @@ package «lean-tea» where precompileModules := false lean_lib LeanTea where - roots := #[`LeanTea] + roots := #[`LeanTea, `LeanTea.Tui] /-- A small Fay-style Lean-subset → JavaScript compiler. Used by the LSpec tests under `examples/Tests/` that exec the emitted JS via @@ -50,6 +50,7 @@ lean_lib Examples where `Tools.LeanJsCompile, `Tools.LeanJsInterp, `Tools.LeanJsRun, `Tests.PureSpec, `Tests.AuthSpec, + `Tests.TuiSpec, `AuthIdp.Serve, `StateMachine.Order ] @@ -390,6 +391,13 @@ lean_exe leanjs_spec where srcDir := "examples" root := `Tests.LeanJsSpec +/-- Pure unit tests for the `LeanTea.Tui` widget kit — layout + primitives, combinators, elements, and the `Session` test + harness that mirrors `App.run` without a real TTY. -/ +lean_exe tui_spec where + srcDir := "examples" + root := `Tests.TuiSpec + /-- Aggregated LSpec runner for the construction-time security primitives (SafeHtml + SafePath + SafeCmd + SafeHeader + SafeRedirect). One binary, one CI step, ~60 LSpec assertions. -/ @@ -476,6 +484,14 @@ lean_exe openai_smoke where srcDir := "examples" root := `Smoke.Openai +/-- Gemini API wire-up smoke. Skips quietly when `GEMINI_API_KEY` is + unset (CI default). Otherwise runs one `ask` against + `gemini-2.5-flash-lite` (cheapest) and one `reviewMany` over two + repo files. Override the model via `GEMINI_SMOKE_MODEL`. -/ +lean_exe gemini_smoke where + srcDir := "examples" + root := `Smoke.Gemini + /-- End-to-end demo: drive Chromium via Playwright, screenshot a page, feed it to a vision model. Requires the Node bridge under `tools/browser-bridge/` (`npm install` + `npx playwright install @@ -528,6 +544,60 @@ lean_exe tmux_mcp_serve where srcDir := "examples" root := `TmuxMcp.Serve +/-- MCP server giving code-editing agents the standard file-system + + shell toolkit, every operation workspace-bound via + `LeanTea.Net.SafePath`. Seven tools: `coder_read_file`, + `coder_list_dir`, `coder_glob`, `coder_grep` (read-only — safe + to allow globally) and `coder_write_file`, `coder_edit_file`, + `coder_run` (mutating — best left to the policy `ask` gate). + Pair with `llm_chat_web` + `LeanTea.Llm.Policy` for a + Claude-Code-style approve-before-write flow. -/ +lean_exe coder_mcp_serve where + srcDir := "examples" + root := `CoderMcp.Serve + +/-- Visual control / telemetry for a `LeanTea.Agent.Conductor`. Boots + the MCP orchestrator + a conductor loop in `IO.asTask`, exposes + `live` / `playbooks` / `rewards` tabs with bandit stats + pause / + resume / abort controls. Pair with `browser_mcp_serve` + a tiny + JSON-based playbook collection to play a browser game. -/ +lean_exe agent_dashboard_serve where + srcDir := "examples" + root := `AgentDashboard.Serve + +/-- MCP server fronting the Google Gemini API. Five tools: + `gemini_ask`, `gemini_chat`, `gemini_review_files` (long-context + multi-file holistic review — exploits Pro's 2M-token window), + `gemini_review_diff` (git-diff focused review), `gemini_list_models`. + Default model `gemini-2.5-pro`, override per-call. The API key + is read from `GEMINI_API_KEY` (see ai.google.dev for issuance). + `--workspace DIR` scopes the file-reading tools through + `LeanTea.Net.SafePath` so a buggy client can't read outside it. -/ +lean_exe gemini_mcp_serve where + srcDir := "examples" + root := `GeminiMcp.Serve + +/-! ## LLM chat demos — three UI shells over `LeanTea.Llm.McpOrchestrator` + +All three share the same `--config FILE.json` shape and the same +orchestrator core. Use whichever fits your context: + + * `llm_chat_cli` — stdin/stdout REPL, ANSI colours, scripts cleanly. + * `llm_chat_tui` — full-screen styled chat with ANSI repaint. + * `llm_chat_web` — single-page browser UI; talk from any device. -/ + +lean_exe llm_chat_cli where + srcDir := "examples" + root := `LlmChatCli.Main + +lean_exe llm_chat_tui where + srcDir := "examples" + root := `LlmChatTui.Main + +lean_exe llm_chat_web where + srcDir := "examples" + root := `LlmChatWeb.Serve + /-- MCP server backed by OS-level mouse / screenshot (Quartz on macOS today). Same JSON-RPC shape as `browser_mcp_serve` but the tools move the real mouse and capture the whole display — @@ -601,6 +671,19 @@ lean_exe reversi_serve where srcDir := "examples" root := `Reversi.Serve +/- Downstream projects previously bundled here now live in their own + repos so lean-tea stays a library core + a compact examples set: + + * 楚漢恋歌 (Chu-Han Love Song) — a six-route narrative game + ⟶ https://github.com/Verilean/lean-tea-chuhan + + * meta_orchestrator — Gemini/LMStudio PM watching Claude-Code + zellij panes and nudging them on stall + ⟶ https://github.com/Verilean/lean-tea-meta + + Both pull this repo in as a `require` from git, so the library + changes here still ship immediately when they `lake update`. -/ + /-! ## Construction-time security primitives The five primitives below ship a single aggregated LSpec runner