From c9743ed2c2ed61db3fb255715219cfdb98e47e12 Mon Sep 17 00:00:00 2001 From: HiLleywyn Date: Tue, 19 May 2026 08:34:21 +0000 Subject: [PATCH 1/2] Add next-turn parameters and tool-call approval to the agent loop A tool may now return a next_turn block in its result to steer the following model turn (model, temperature, token budget, instructions). The sidecar feeds it to the Agent SDK's nextTurnParams; the in-process loop applies it directly. Both paths honour the same four parameters. Tool calls can be gated on human approval: list tool names in AGENT_APPROVAL_TOOLS or risk tiers in AGENT_APPROVAL_RISKS, or set requires_approval on a tool. A gated call posts an Approve / Reject prompt in the channel; a rejected call never runs and the model is told it was declined. Approval is resolved on the bot side, so the sidecar stays stateless per turn and the build guard still holds. Bumps the bridge protocol to v2 for the optional next_turn frame field. --- .env.example | 12 +++ README.md | 12 +++ agent-sidecar/src/server.ts | 102 ++++++++++++++++++++-- ai/agent_control.py | 163 ++++++++++++++++++++++++++++++++++++ ai/agent_sidecar.py | 50 +++++++++-- ai/tools.py | 52 ++++++++++-- cogs/chat.py | 83 +++++++++++++++++- cogs/chat_views.py | 57 +++++++++++++ config.py | 11 +++ tests/test_agent_control.py | 135 +++++++++++++++++++++++++++++ tests/test_smoke.py | 4 +- 11 files changed, 656 insertions(+), 25 deletions(-) create mode 100644 ai/agent_control.py create mode 100644 tests/test_agent_control.py diff --git a/.env.example b/.env.example index ac2c100..5164854 100644 --- a/.env.example +++ b/.env.example @@ -91,6 +91,18 @@ AGENT_FALLBACK_MODELS= AGENT_PROVIDER_ORDER= AGENT_PROVIDER_ALLOW_FALLBACKS=true AGENT_SERVER_TOOLS= +# Tool-call approval. A gated tool call posts an Approve / Reject prompt in +# the channel and waits for the person who asked; a rejected call never runs +# and the model is told it was declined. AGENT_APPROVAL_TOOLS is a +# comma-separated list of tool names to gate (e.g. +# "files.write,files.delete"); AGENT_APPROVAL_RISKS a comma-separated list of +# risk tiers to gate (read, safe, mutate). Both default empty, which leaves +# approval off and changes nothing. AGENT_APPROVAL_TIMEOUT_S bounds the wait +# for a decision; keep it well below AI_REPLY_TIMEOUT_S, since the wait counts +# against the turn. +AGENT_APPROVAL_TOOLS= +AGENT_APPROVAL_RISKS= +AGENT_APPROVAL_TIMEOUT_S=45 # ── Memory ──────────────────────────────────────────────────────────────────── # Hours before a stored user memory is considered stale and refreshed. diff --git a/README.md b/README.md index e26a564..0ab439b 100644 --- a/README.md +++ b/README.md @@ -178,6 +178,18 @@ feature is safe to leave on. Stop conditions are tunable: `AGENT_MAX_STEPS` caps model steps per turn and `AGENT_MAX_COST` sets an optional per-turn dollar ceiling. +Two within-turn controls run on top of that loop. A tool may return a +`next_turn` block in its result to steer the following model turn -- a +different model, a lower temperature, a tighter token budget, or extra +instructions -- which both the sidecar (through the Agent SDK's +`nextTurnParams`) and the in-process loop apply before the model is asked +again. And a tool call can be gated on human approval: list tool names in +`AGENT_APPROVAL_TOOLS` or risk tiers in `AGENT_APPROVAL_RISKS`, and a gated +call posts an Approve / Reject prompt in the channel before it runs, with a +rejected call handed back to the model as a declined result. Approval is +resolved entirely on the bot side within the turn, so the sidecar stays +stateless per turn and conversation state stays in the bot. + ## Tool execution pipeline A tool result never goes straight from a handler to the model. It travels a diff --git a/agent-sidecar/src/server.ts b/agent-sidecar/src/server.ts index 4205b75..3b2d460 100644 --- a/agent-sidecar/src/server.ts +++ b/agent-sidecar/src/server.ts @@ -10,7 +10,7 @@ * models?, provider?, tools, server_tools?, ... } * sidecar -> bot : { type: "delta", text } streamed model text * sidecar -> bot : { type: "tool_call", call_id, name, arguments } - * bot -> sidecar : { type: "tool_result", call_id, result } + * bot -> sidecar : { type: "tool_result", call_id, result, next_turn? } * sidecar -> bot : { type: "done", text, finish_reason, model, usage, * tool_names } * sidecar -> bot : { type: "error", error } @@ -27,6 +27,9 @@ * over the same socket: the SDK calls execute(), the sidecar emits a * tool_call, and the bot answers with a tool_result. That keeps the tool * registry, the execution pipeline and the Lua plugins exactly where they are. + * A tool_result may also carry a `next_turn` directive -- the bot's way of + * steering the following model turn (model, temperature, token budget, + * instructions) -- which the sidecar feeds to the SDK's nextTurnParams. */ import { createServer } from 'node:http'; import { readFileSync } from 'node:fs'; @@ -53,7 +56,7 @@ const API_KEY = process.env.OPENROUTER_API_KEY || ''; * frame; either side treats a mismatch as a reason to fall back rather than * risk talking past a half-deployed peer. */ -const PROTOCOL_VERSION = 1; +const PROTOCOL_VERSION = 2; /** Best-effort version of the bundled Agent SDK, surfaced for logs only. */ function readSdkVersion(): string { @@ -96,6 +99,7 @@ interface StartMessage { } interface PendingToolCall { + name: string; resolve: (value: unknown) => void; reject: (reason: unknown) => void; } @@ -107,23 +111,66 @@ function errorMessage(err: unknown): string { return String(err); } +/** + * Translate a bot-supplied `next_turn` directive into the SDK's + * nextTurnParams shape. The bridge speaks snake_case; the SDK speaks + * camelCase. Only the four parameters both the sidecar and the bot's + * in-process loop can honour identically are carried; an unknown or + * mistyped key is dropped, so a malformed directive can never break a turn. + */ +function translateNextTurn(raw: unknown): Record { + const out: Record = {}; + if (!raw || typeof raw !== 'object') { + return out; + } + const directive = raw as Record; + if (typeof directive.model === 'string' && directive.model) { + out.model = directive.model; + } + if (typeof directive.instructions === 'string' && directive.instructions) { + out.instructions = directive.instructions; + } + if (typeof directive.temperature === 'number') { + out.temperature = directive.temperature; + } + if (typeof directive.max_output_tokens === 'number') { + out.maxOutputTokens = directive.max_output_tokens; + } + return out; +} + /** * One chat turn: drives callModel and bridges tool calls back to the bot. * * The sidecar is deliberately stateless per turn -- it opens one WebSocket, * runs one turn and closes. Conversation history, memory and traits all live * in the Python bot. The Agent SDK also offers a stateful mode (a persistent - * turn-state accessor and approval-gated tool pausing); the bridge does NOT - * use it, because that would split one turn's state across two runtimes. - * Anything stateful belongs on the Python side. A build guard - * (tests/test_sidecar_guards.py) fails CI if that surface appears here -- if a - * future change genuinely needs it, that guard must be revisited deliberately. + * turn-state accessor and approval-gated tool pausing across separate + * request/response cycles); the bridge does NOT use it, because that would + * split one turn's state across two runtimes. Anything stateful belongs on + * the Python side. A build guard (tests/test_sidecar_guards.py) fails CI if + * that surface appears here -- if a future change genuinely needs it, that + * guard must be revisited deliberately. + * + * Two within-turn controls do ride the bridge, because neither needs + * persistent state. A tool may return a next_turn directive on its + * tool_result frame to steer the following model turn (model, temperature, + * token budget, instructions); the sidecar feeds it to the SDK's + * nextTurnParams. Tool-call approval is resolved entirely on the Python side + * within the turn -- a gated tool simply makes the bot withhold its + * tool_result until a human decides -- so the gate never reaches this + * stateless surface. */ class Session { private readonly ws: WebSocket; private readonly client: OpenRouter; private readonly pending = new Map(); private readonly toolNames: string[] = []; + // The latest next_turn directive each tool returned, SDK-keyed and keyed by + // tool name. A tool's nextTurnParams functions read it after the tool runs; + // the next call of that tool overwrites it, so a stale directive is never + // applied twice. + private readonly nextTurnByName = new Map>(); private callCounter = 0; private started = false; private turnId = '-'; @@ -203,6 +250,9 @@ class Session { const entry = this.pending.get(String(msg.call_id)); if (entry) { this.pending.delete(String(msg.call_id)); + // Record (or clear) this tool's next-turn directive before resolving, + // so the SDK sees it when it runs nextTurnParams after execute(). + this.nextTurnByName.set(entry.name, translateNextTurn(msg.next_turn)); entry.resolve(msg.result); } } @@ -213,7 +263,7 @@ class Session { const callId = `t${++this.callCounter}`; this.toolNames.push(name); const promise = new Promise((resolve, reject) => { - this.pending.set(callId, { resolve, reject }); + this.pending.set(callId, { name, resolve, reject }); }); this.send({ type: 'tool_call', @@ -228,11 +278,47 @@ class Session { return schemas.map((entry) => { const fn = (entry.function ?? entry) as Record; const name = String(fn.name || ''); + // Each nextTurnParams function returns this tool's last directive value + // for that parameter, or the request's current value when it set none. + // The SDK threads the current value through every called tool, so a + // tool that sets no directive returns the value unchanged and never + // clobbers a directive an earlier tool in the round did set. return tool({ name, description: String(fn.description || ''), inputSchema: toZodObject(fn.parameters), execute: async (args: unknown) => this.runTool(name, args), + nextTurnParams: { + model: (_params, context): string => { + const d = this.nextTurnByName.get(name); + if (d && typeof d.model === 'string' && d.model) { + return d.model; + } + // No directive: keep the current model. When the request uses a + // models fallback array context.model is empty -- returning null + // there leaves the model parameter unset rather than pinning it + // to an empty string the API would reject. + return (context.model || null) as string; + }, + instructions: (_params, context) => { + const d = this.nextTurnByName.get(name); + return d && typeof d.instructions === 'string' + ? d.instructions + : context.instructions; + }, + temperature: (_params, context) => { + const d = this.nextTurnByName.get(name); + return d && typeof d.temperature === 'number' + ? d.temperature + : context.temperature; + }, + maxOutputTokens: (_params, context) => { + const d = this.nextTurnByName.get(name); + return d && typeof d.maxOutputTokens === 'number' + ? d.maxOutputTokens + : context.maxOutputTokens; + }, + }, }); }); } diff --git a/ai/agent_control.py b/ai/agent_control.py new file mode 100644 index 0000000..fcd91f8 --- /dev/null +++ b/ai/agent_control.py @@ -0,0 +1,163 @@ +"""ai/agent_control.py -- per-turn agent controls: next-turn parameters and +tool-call approval. + +Two cross-cutting agent features live here, kept out of :mod:`ai.tools` and +:mod:`ai.agent_sidecar` so the in-process loop and the sidecar bridge share +one implementation: + + * **Next-turn parameters.** A tool may steer the next model turn by + returning a ``next_turn`` block in its result -- a different model, a new + temperature, a tighter token budget, or extra instructions. This is the + Python-native shape of the OpenRouter Agent SDK's ``nextTurnParams``: the + tool decides at run time, the orchestrator applies the change before the + model is asked again. :func:`split_next_turn` peels the directive off a + raw tool return; :func:`apply_next_turn` folds it into the in-process + loop's working parameters. The sidecar forwards the same directive over + the bridge for the SDK's ``nextTurnParams`` to consume. + + * **Tool-call approval.** A tool call may be gated on a human yes/no before + it runs. :func:`needs_approval` decides whether a given tool is gated -- + by an explicit ``requires_approval`` flag on the tool, by tool name, or + by risk tier, the latter two driven by config. :func:`request_tool_approval` + asks the turn's approver (a button prompt in the channel, supplied by the + chat cog through :class:`ai.tools.ToolContext`) and returns the decision. + When no approver is reachable a gated call is denied, never silently run. + +Neither feature reaches for the Agent SDK's persistent state surface: a +next-turn directive rides the existing tool-result frame within one turn, and +approval is resolved entirely on the Python side before the turn ends. The +sidecar stays stateless per turn. +""" +from __future__ import annotations + +import logging + +from config import Config + +log = logging.getLogger(__name__) + +# Parameters a tool may change for the next model turn. Kept deliberately +# small: every key here is one both the sidecar and the in-process loop can +# honour identically, so a directive behaves the same on either path. +NEXT_TURN_KEYS = ("model", "instructions", "temperature", "max_output_tokens") + +# Hard ceilings on a next-turn directive. Bot tool handlers are trusted, but a +# Lua plugin tool is less so -- these bound a directive to sane values. +_MAX_INSTRUCTION_CHARS = 8000 +_MAX_OUTPUT_TOKENS = 8000 + + +def sanitize_next_turn(raw) -> dict | None: + """Validate a raw ``next_turn`` directive into a clean, bounded dict. + + Returns the subset of :data:`NEXT_TURN_KEYS` that carried a well-typed, + in-range value, or ``None`` when nothing usable remains. An unknown key, a + wrong type or an out-of-range number is dropped silently -- a malformed + directive must never break the turn. + """ + if not isinstance(raw, dict): + return None + out: dict = {} + for key in NEXT_TURN_KEYS: + if key not in raw: + continue + value = raw[key] + if key in ("model", "instructions"): + if isinstance(value, str) and value.strip(): + text = value.strip() + if key == "instructions": + text = text[:_MAX_INSTRUCTION_CHARS] + out[key] = text + elif key == "temperature": + if isinstance(value, (int, float)) and not isinstance(value, bool): + out[key] = max(0.0, min(2.0, float(value))) + elif key == "max_output_tokens": + if (isinstance(value, int) and not isinstance(value, bool) + and value > 0): + out[key] = min(int(value), _MAX_OUTPUT_TOKENS) + return out or None + + +def split_next_turn(result): + """Peel a ``next_turn`` directive off a raw tool return. + + Returns ``(result_without_next_turn, directive_or_None)``. The directive + is removed from the result so it never reaches the execution pipeline or + the model -- it is turn-control data, not tool output. + """ + if not isinstance(result, dict) or "next_turn" not in result: + return result, None + cleaned = {key: value for key, value in result.items() + if key != "next_turn"} + return cleaned, sanitize_next_turn(result.get("next_turn")) + + +def apply_next_turn( + directive: dict | None, + *, + convo: list[dict], + model: str | None, + temperature: float, + max_tokens: int, +) -> tuple[str | None, float, int]: + """Fold a next-turn directive into the in-process loop's parameters. + + ``convo`` is mutated in place when the directive carries ``instructions`` + (appended as a system message, mirroring how the sidecar forwards + ``instructions`` to the SDK). The model, temperature and token budget are + returned updated so the caller can rebind its working values. + """ + if not directive: + return model, temperature, max_tokens + if directive.get("model"): + model = directive["model"] + if directive.get("temperature") is not None: + temperature = directive["temperature"] + if directive.get("max_output_tokens"): + max_tokens = directive["max_output_tokens"] + instructions = directive.get("instructions") + if instructions: + convo.append({"role": "system", "content": instructions}) + return model, temperature, max_tokens + + +def needs_approval(spec) -> bool: + """Is a tool call gated on a human yes/no before it may run? + + A tool is gated when its :class:`~ai.tools.ToolSpec` sets + ``requires_approval``, when its name is listed in ``AGENT_APPROVAL_TOOLS``, + or when its risk tier is listed in ``AGENT_APPROVAL_RISKS``. With both + config lists empty -- the default -- approval is off and no call is gated. + """ + if spec is None: + return False + if getattr(spec, "requires_approval", False): + return True + name = getattr(spec, "name", "") + if name and name in Config.AGENT_APPROVAL_TOOLS: + return True + risk = getattr(spec, "risk", "") + if risk and risk in Config.AGENT_APPROVAL_RISKS: + return True + return False + + +async def request_tool_approval(name: str, args: dict, ctx) -> bool: + """Ask the turn's approver to clear one gated tool call. + + The approver -- set on :class:`~ai.tools.ToolContext` by the chat cog -- + surfaces a human prompt and resolves to the decision. When no approver is + reachable the call is denied: a gated tool is never run unreviewed. + """ + approver = getattr(ctx, "approver", None) + if approver is None: + log.info("tool approval: %s denied (no approver available)", name) + return False + try: + decided = bool(await approver(name, args)) + except Exception as exc: # noqa: BLE001 + log.warning("tool approval for %s failed: %s", name, exc) + return False + log.info("tool approval: %s %s", name, + "approved" if decided else "rejected") + return decided diff --git a/ai/agent_sidecar.py b/ai/agent_sidecar.py index 124dcf0..99f30af 100644 --- a/ai/agent_sidecar.py +++ b/ai/agent_sidecar.py @@ -17,7 +17,9 @@ the sidecar greets the connection with a ``hello`` frame, the bot sends a ``start`` frame, the sidecar streams ``delta`` text and ``tool_call`` requests, the bot runs each tool through the execution pipeline and answers -with ``tool_result``, and the turn ends with ``done`` or ``error``. +with ``tool_result`` -- optionally carrying a ``next_turn`` directive that +steers the following model turn -- and the turn ends with ``done`` or +``error``. ``run_stream`` yields the same event vocabulary as the in-process agent loop in :mod:`ai.tools`, so the chat cog consumes either transparently. When the @@ -37,6 +39,9 @@ import aiohttp from config import Config +from ai.agent_control import ( + needs_approval, request_tool_approval, split_next_turn, +) from framework.pipeline import run_pipeline log = logging.getLogger(__name__) @@ -47,7 +52,7 @@ # Wire-protocol version. Must match PROTOCOL_VERSION in agent-sidecar/src. # Bumped whenever a message shape changes; the handshake fails fast when the # two halves disagree, which catches a half-finished deploy. -_PROTOCOL_VERSION = 1 +_PROTOCOL_VERSION = 2 # Supervisor restart policy. After a crash the process is respawned with an # escalating backoff; after _MAX_RESTART_ATTEMPTS consecutive failures the @@ -579,23 +584,46 @@ async def run_stream( await session.close() async def _run_tool(self, ws, ctx, registry, event: dict, step: int): - """Execute one bridged tool call and stream its agent events.""" + """Execute one bridged tool call and stream its agent events. + + A tool flagged for approval is gated on a human yes/no first; a + rejected call never runs and an error result is sent back in its + place. A ``next_turn`` directive returned by the tool rides the + tool_result frame to the sidecar, which feeds it to the SDK's + nextTurnParams for the following model turn. + """ name = str(event.get("name") or "") call_id = event.get("call_id") raw_args = event.get("arguments") args = raw_args if isinstance(raw_args, dict) else {} + spec = registry.get(name) if registry is not None else None yield {"type": "reset"} - yield {"type": "tool_start", "tool": name} + + # A gated tool call waits for a human yes/no before it runs. + approved = True + if needs_approval(spec): + yield {"type": "approval_pending", "tool": name} + approved = await request_tool_approval(name, args, ctx) + yield {"type": "approval_resolved", "tool": name, + "approved": approved} started = time.monotonic() - if registry is not None: + if not approved: + result = {"error": f"the user declined the '{name}' tool call, " + f"so it was not run"} + elif registry is not None: + yield {"type": "tool_start", "tool": name} result = await registry.run(name, args, ctx) else: + yield {"type": "tool_start", "tool": name} result = {"error": "no tool registry available"} elapsed_ms = int((time.monotonic() - started) * 1000) - spec = registry.get(name) if registry is not None else None + # A next-turn directive is control data: peel it off before the + # result reaches the pipeline or the model. + result, directive = split_next_turn(result) + piped = run_pipeline( name, result, meta={"round": step, "elapsed_ms": elapsed_ms}, @@ -606,14 +634,18 @@ async def _run_tool(self, ws, ctx, registry, event: dict, step: int): if (name == "data.web_search" and isinstance(data, dict) and data.get("results")): yield {"type": "sources", "results": data["results"]} - yield {"type": "tool_done", "tool": name} + if approved: + yield {"type": "tool_done", "tool": name} try: payload = json.loads(piped.injected) except (json.JSONDecodeError, TypeError): payload = piped.injected - await ws.send_json({ + frame: dict = { "type": "tool_result", "call_id": call_id, "result": payload, - }) + } + if directive: + frame["next_turn"] = directive + await ws.send_json(frame) diff --git a/ai/tools.py b/ai/tools.py index b58d224..9595131 100644 --- a/ai/tools.py +++ b/ai/tools.py @@ -53,6 +53,9 @@ from config import Config from ai import workspace +from ai.agent_control import ( + apply_next_turn, needs_approval, request_tool_approval, split_next_turn, +) from ai.agent_sidecar import AgentSidecarUnavailable, log_agent_turn from ai.client import ( complete, download_media, generate_image, poll_video, stream_completion, @@ -88,6 +91,10 @@ class ToolContext: registry: "ToolRegistry | None" = None # Per-turn usage ledger. A tool that calls a model records into it. meter: TurnMeter | None = None + # Resolves a gated tool call to a human yes/no. The chat cog sets it for a + # turn that has a channel to prompt in; left None elsewhere, which denies + # any gated call rather than running it unreviewed. + approver: "Callable[[str, dict], Awaitable[bool]] | None" = None @dataclass @@ -113,6 +120,10 @@ class ToolSpec: risk: str = RISK_READ result_fields: tuple[str, ...] | None = None verbatim: bool = False + # When True, every call of this tool is gated on a human yes/no before it + # runs, regardless of the AGENT_APPROVAL_* config. Operators can gate any + # tool by name or risk tier through that config instead. + requires_approval: bool = False def as_openai_tool(self) -> dict: return { @@ -988,6 +999,7 @@ async def _run_agent_inprocess( "content": done_event.get("text") or None, "tool_calls": calls, }) + round_directive: dict | None = None for tc in calls: name = tc.get("function", {}).get("name", "") raw_args = tc.get("function", {}).get("arguments") or "{}" @@ -995,17 +1007,39 @@ async def _run_agent_inprocess( args = json.loads(raw_args) except json.JSONDecodeError: args = {} - yield {"type": "tool_start", "tool": name} + tool_names.append(name) + spec = registry.get(name) + + # A gated tool call waits for a human yes/no. A rejected call + # never runs -- the model is handed an error in its place and + # the turn continues. + approved = True + if needs_approval(spec): + yield {"type": "approval_pending", "tool": name} + approved = await request_tool_approval(name, args, ctx) + yield {"type": "approval_resolved", "tool": name, + "approved": approved} + started = time.monotonic() - result = await registry.run(name, args, ctx) + if approved: + yield {"type": "tool_start", "tool": name} + result = await registry.run(name, args, ctx) + else: + result = {"error": f"the user declined the '{name}' tool " + f"call, so it was not run"} elapsed_ms = int((time.monotonic() - started) * 1000) - tool_names.append(name) + + # A tool may steer the next model turn; that directive is + # control data, peeled off before the result reaches the + # pipeline or the model. + result, directive = split_next_turn(result) + if directive: + round_directive = {**(round_directive or {}), **directive} # The result does not go to the model raw. The pipeline wraps # it in the contract envelope, runs it through the validation # gate, compresses it deterministically and reduces it to # minimal JSON. - spec = registry.get(name) piped = run_pipeline( name, result, meta={"round": _round + 1, "elapsed_ms": elapsed_ms}, @@ -1016,13 +1050,21 @@ async def _run_agent_inprocess( if (name == "data.web_search" and isinstance(data, dict) and data.get("results")): yield {"type": "sources", "results": data["results"]} - yield {"type": "tool_done", "tool": name} + if approved: + yield {"type": "tool_done", "tool": name} convo.append({ "role": "tool", "tool_call_id": tc.get("id", ""), "content": piped.injected, }) + # A next-turn directive returned by any tool this round is + # applied before the model is asked again. + model, temperature, max_tokens = apply_next_turn( + round_directive, convo=convo, model=model, + temperature=temperature, max_tokens=max_tokens, + ) + # Tool-round budget exhausted -- ask for a final plain answer. final = await complete( convo, model=model, max_tokens=max_tokens, temperature=temperature, diff --git a/cogs/chat.py b/cogs/chat.py index 83a98a3..ea2691a 100644 --- a/cogs/chat.py +++ b/cogs/chat.py @@ -8,6 +8,7 @@ from __future__ import annotations import asyncio +import json import logging import random import time @@ -17,7 +18,9 @@ from config import Config from framework.context import ArchimedesContext +from framework.embed import card from framework.middleware import guild_only, no_bots +from framework.ui import C_ERROR, C_SUCCESS, C_WARNING, clip from ai.client import complete_default from ai.context import ChatMode, build_system_prompt, gather_chat_context from ai.memory import run_post_message_tasks @@ -30,7 +33,7 @@ ) from ai.tools import ToolContext, run_agent_stream from ai.usage import TurnMeter -from cogs.chat_views import AskReplyView, AskState, StreamRenderer +from cogs.chat_views import AskReplyView, ApprovalView, AskState, StreamRenderer log = logging.getLogger(__name__) @@ -101,6 +104,28 @@ def _stamp_footer(text: str, meter: TurnMeter | None) -> str: return text[:max(0, budget)] + "\n" + footer +def _format_tool_args(args: dict) -> str: + """Render tool-call arguments as compact, clipped lines for an approval + prompt, so the person deciding can see what the call would do. + + Long values are truncated; a non-dict or empty argument set reads as + "(no arguments)". + """ + if not isinstance(args, dict) or not args: + return "_(no arguments)_" + lines = [] + for key, value in list(args.items())[:8]: + if isinstance(value, str): + text = value + else: + try: + text = json.dumps(value, default=str) + except (TypeError, ValueError): + text = str(value) + lines.append(f"- **{key}**: {clip(text, 280)}") + return "\n".join(lines) + + class ChatBrain(commands.Cog): """Handles mentions, replies, .ask, and ambient chat.""" @@ -388,6 +413,12 @@ async def _stream_turn( memory=self.bot.memory, registry=self.bot.tools, meter=meter, ) + + async def _approver(name: str, args: dict) -> bool: + return await self._collect_tool_approval( + channel_id, user_id, name, args) + + tool_ctx.approver = _approver renderer = StreamRenderer(placeholder) animator = asyncio.create_task(renderer.run()) final_text = "" @@ -422,6 +453,56 @@ async def _stream_turn( pass return final_text or None + async def _collect_tool_approval( + self, channel_id: int, user_id: int, name: str, args: dict, + ) -> bool: + """Post an Approve / Reject prompt for one gated tool call. + + Returns the human decision. A send failure or an unanswered prompt + counts as a refusal -- a gated tool is never run without an explicit + yes. The prompt message is edited in place with the outcome, so the + channel keeps a record of who cleared what. + """ + channel = self.bot.get_channel(channel_id) + if channel is None: + return False + timeout = float(max(5, Config.AGENT_APPROVAL_TIMEOUT_S)) + decision: asyncio.Future[bool] = ( + asyncio.get_running_loop().create_future() + ) + view = ApprovalView(user_id, decision, timeout=timeout) + builder = card( + "Tool approval needed", + description=f"Archimedes wants to run `{name}`.\n" + f"{_format_tool_args(args)}", + color=C_WARNING, + ) + builder.footer(f"Decide within {int(timeout)}s -- no answer is a " + f"refusal.") + try: + prompt = await channel.send( + content=f"<@{user_id}>", embed=builder.build(), view=view, + ) + except discord.HTTPException: + return False + try: + approved = await asyncio.wait_for(decision, timeout=timeout + 5) + except asyncio.TimeoutError: + approved = False + view.stop() + verdict = ("approved and is running" if approved + else "not approved and will not run") + outcome = card( + "Tool approval needed", + description=f"`{name}` was {verdict}.", + color=C_SUCCESS if approved else C_ERROR, + ) + try: + await prompt.edit(content=None, embed=outcome.build(), view=None) + except discord.HTTPException: + pass + return approved + def _build_view( self, placeholder: discord.Message, user_id: int, channel_id: int, messages: list[dict], tool_schemas, *, accumulated: str, diff --git a/cogs/chat_views.py b/cogs/chat_views.py index a14f4c4..d8e6300 100644 --- a/cogs/chat_views.py +++ b/cogs/chat_views.py @@ -66,9 +66,19 @@ async def feed(self, event: dict) -> None: if name: self.tool_names.append(name) self.tool = "" + elif kind == "approval_pending": + self.tool = event.get("tool", "") + self.phase = "approval" + await self._render(force=True) + elif kind == "approval_resolved": + self.tool = "" + self.phase = "thinking" + await self._render(force=True) def _status_line(self) -> str: spin = _SPINNER[self.frame % len(_SPINNER)] + if self.phase == "approval" and self.tool: + return f"_{spin} waiting for approval to run `{self.tool}`..._" if self.phase == "tool" and self.tool: return f"_{spin} running `{self.tool}`..._" if self.phase == "writing": @@ -154,3 +164,50 @@ async def sources_btn(self, interaction: discord.Interaction, _b: discord.ui.But await interaction.response.send_message( "\n".join(lines) or "(no sources)", ephemeral=True, ) + + +class ApprovalView(discord.ui.View): + """Approve / Reject buttons gating one tool call on a human decision. + + The view resolves ``decision`` -- a future the agent loop awaits -- to + True on approval and False on rejection or timeout. Only the member who + started the turn may decide; an unanswered prompt is treated as a refusal, + so a gated tool is never run without an explicit yes. + """ + + def __init__(self, user_id: int, decision: "asyncio.Future[bool]", *, + timeout: float) -> None: + super().__init__(timeout=timeout) + self.user_id = user_id + self._decision = decision + + async def interaction_check(self, interaction: discord.Interaction) -> bool: + if interaction.user.id != self.user_id: + await interaction.response.send_message( + "Only the person who asked can decide this.", ephemeral=True, + ) + return False + return True + + def _resolve(self, approved: bool) -> None: + if not self._decision.done(): + self._decision.set_result(approved) + + @discord.ui.button(label="Approve", style=discord.ButtonStyle.success, + emoji="✅") + async def approve_btn(self, interaction: discord.Interaction, + _b: discord.ui.Button): + await interaction.response.defer() + self._resolve(True) + self.stop() + + @discord.ui.button(label="Reject", style=discord.ButtonStyle.danger, + emoji="🛑") + async def reject_btn(self, interaction: discord.Interaction, + _b: discord.ui.Button): + await interaction.response.defer() + self._resolve(False) + self.stop() + + async def on_timeout(self) -> None: + self._resolve(False) diff --git a/config.py b/config.py index 5a67f20..6a7f6fe 100644 --- a/config.py +++ b/config.py @@ -118,6 +118,17 @@ class Config: AGENT_PROVIDER_ALLOW_FALLBACKS: bool = _env_bool( "AGENT_PROVIDER_ALLOW_FALLBACKS", True) AGENT_SERVER_TOOLS: list[str] = _env_list("AGENT_SERVER_TOOLS") + # Tool-call approval. A gated tool call pauses for a human yes/no -- a + # button prompt in the channel -- before it runs; a rejected call never + # executes and the model is told it was declined. AGENT_APPROVAL_TOOLS is + # a comma-separated list of tool names to gate; AGENT_APPROVAL_RISKS a + # comma-separated list of risk tiers (read, safe, mutate) to gate. Both + # default empty, which leaves approval off and changes nothing. + # AGENT_APPROVAL_TIMEOUT_S bounds the wait for a decision; keep it well + # below AI_REPLY_TIMEOUT_S, since the wait counts against the turn. + AGENT_APPROVAL_TOOLS: list[str] = _env_list("AGENT_APPROVAL_TOOLS") + AGENT_APPROVAL_RISKS: list[str] = _env_list("AGENT_APPROVAL_RISKS") + AGENT_APPROVAL_TIMEOUT_S: int = _env_int("AGENT_APPROVAL_TIMEOUT_S", 45) # ── Memory ──────────────────────────────────────────────────────────────── MEMORY_REFRESH_HOURS: int = _env_int("MEMORY_REFRESH_HOURS", 4) diff --git a/tests/test_agent_control.py b/tests/test_agent_control.py new file mode 100644 index 0000000..9bd2a58 --- /dev/null +++ b/tests/test_agent_control.py @@ -0,0 +1,135 @@ +"""tests/test_agent_control.py -- next-turn parameters and tool approval. + +These cover :mod:`ai.agent_control` offline: the next-turn directive +validator and applier, and the tool-call approval gate. No Discord token, +database, model key or network is needed. +""" +from __future__ import annotations + +import types + +from ai.agent_control import ( + apply_next_turn, + needs_approval, + request_tool_approval, + sanitize_next_turn, + split_next_turn, +) + + +def test_sanitize_next_turn_whitelists_known_keys() -> None: + cleaned = sanitize_next_turn({ + "model": " openai/gpt-4o ", + "temperature": 0.2, + "max_output_tokens": 256, + "instructions": "be terse", + "unknown": "dropped", + }) + assert cleaned == { + "model": "openai/gpt-4o", + "temperature": 0.2, + "max_output_tokens": 256, + "instructions": "be terse", + } + + +def test_sanitize_next_turn_rejects_bad_types_and_bounds() -> None: + # A wrong type for a key drops that key silently. + assert sanitize_next_turn({"model": 5}) is None + assert sanitize_next_turn({"temperature": "hot"}) is None + assert sanitize_next_turn({"max_output_tokens": 0}) is None + # A bool is not accepted as a token count. + assert sanitize_next_turn({"max_output_tokens": True}) is None + # Temperature is clamped into range, never rejected for being extreme. + assert sanitize_next_turn({"temperature": 9.0}) == {"temperature": 2.0} + assert sanitize_next_turn({"temperature": -1.0}) == {"temperature": 0.0} + # A non-dict directive, or one with nothing usable, is nothing. + assert sanitize_next_turn("nope") is None + assert sanitize_next_turn({}) is None + + +def test_split_next_turn_peels_directive() -> None: + result, directive = split_next_turn({ + "answer": 42, "next_turn": {"temperature": 0.1}, + }) + assert result == {"answer": 42} + assert directive == {"temperature": 0.1} + # A result with no directive is returned untouched. + plain, none = split_next_turn({"answer": 42}) + assert plain == {"answer": 42} and none is None + # A non-dict result is passed straight through. + assert split_next_turn("text") == ("text", None) + + +def test_apply_next_turn_updates_params_and_convo() -> None: + convo: list[dict] = [{"role": "user", "content": "hi"}] + model, temperature, max_tokens = apply_next_turn( + {"model": "m2", "temperature": 0.3, "max_output_tokens": 128, + "instructions": "stay terse"}, + convo=convo, model="m1", temperature=0.85, max_tokens=600, + ) + assert (model, temperature, max_tokens) == ("m2", 0.3, 128) + # Instructions ride in as an appended system message. + assert convo[-1] == {"role": "system", "content": "stay terse"} + # No directive leaves every parameter as it was. + unchanged = apply_next_turn( + None, convo=convo, model="m1", temperature=0.85, max_tokens=600, + ) + assert unchanged == ("m1", 0.85, 600) + + +def test_needs_approval_honours_spec_flag(monkeypatch) -> None: + from config import Config + + monkeypatch.setattr(Config, "AGENT_APPROVAL_TOOLS", []) + monkeypatch.setattr(Config, "AGENT_APPROVAL_RISKS", []) + + gated = types.SimpleNamespace( + name="x", risk="read", requires_approval=True) + ungated = types.SimpleNamespace( + name="y", risk="read", requires_approval=False) + assert needs_approval(gated) is True + assert needs_approval(ungated) is False + assert needs_approval(None) is False + + +def test_needs_approval_honours_config(monkeypatch) -> None: + from config import Config + + monkeypatch.setattr(Config, "AGENT_APPROVAL_TOOLS", ["files.write"]) + monkeypatch.setattr(Config, "AGENT_APPROVAL_RISKS", ["mutate"]) + + by_name = types.SimpleNamespace( + name="files.write", risk="read", requires_approval=False) + by_risk = types.SimpleNamespace( + name="files.delete", risk="mutate", requires_approval=False) + ungated = types.SimpleNamespace( + name="data.web_search", risk="read", requires_approval=False) + assert needs_approval(by_name) is True + assert needs_approval(by_risk) is True + assert needs_approval(ungated) is False + + +async def test_request_tool_approval_denies_without_approver() -> None: + """No approver means no human to ask, so a gated call is denied.""" + ctx = types.SimpleNamespace(approver=None) + assert await request_tool_approval("files.write", {}, ctx) is False + + +async def test_request_tool_approval_uses_approver() -> None: + async def yes(_name, _args) -> bool: + return True + + async def no(_name, _args) -> bool: + return False + + async def boom(_name, _args) -> bool: + raise RuntimeError("approver crashed") + + assert await request_tool_approval( + "t", {}, types.SimpleNamespace(approver=yes)) is True + assert await request_tool_approval( + "t", {}, types.SimpleNamespace(approver=no)) is False + # An approver that raises fails closed: the call is denied, not run. + assert await request_tool_approval( + "t", {}, types.SimpleNamespace(approver=boom)) is False diff --git a/tests/test_smoke.py b/tests/test_smoke.py index a847294..cc9b929 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -22,8 +22,8 @@ "framework.pipeline.injection", "framework.pipeline.transforms", "ai.emoji_safety", "ai.safety", "ai.quota", "ai.client", "ai.models", "ai.prompts", "ai.redis_store", "ai.traits", "ai.memory", "ai.context", - "ai.tools", "ai.workspace", "ai.agent_sidecar", "ai.training", - "ai.emoji_index", + "ai.tools", "ai.workspace", "ai.agent_control", "ai.agent_sidecar", + "ai.training", "ai.emoji_index", "cogs.meta", "cogs.chat_views", "cogs.chat", "cogs.archimedes", "cogs.ai_admin", "cogs.sidecar", ] From 3ef8b5aca8aa639a102d4dd7cdcd1fbf52d41bb6 Mon Sep 17 00:00:00 2001 From: HiLleywyn Date: Tue, 19 May 2026 08:46:50 +0000 Subject: [PATCH 2/2] Pick ripgrep or grep by search target in the shell tool Replace the vague "always use rg, fall back to grep if it fails" guidance with a deterministic rule: rg searches a directory or the whole workspace, grep searches a single named file. Updates the shell.run description, its command example, the allowlist comment and the README to match. --- README.md | 8 ++++---- ai/tools.py | 9 +++++---- ai/workspace.py | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 0ab439b..69e0e18 100644 --- a/README.md +++ b/README.md @@ -36,10 +36,10 @@ runs the test suite. - **File workspace** -- the `files.*` and `shell.run` tools give the model a private scratch directory, one per server (per user in a DM). It is sandboxed: paths cannot escape it, files and total size are capped, and the - shell runs only an allowlist of read-only commands -- it searches with - ripgrep, falling back to grep only when ripgrep cannot serve. Configured - with the `WORKSPACE_*` variables; turn it off entirely with - `WORKSPACE_ENABLED`. + shell runs only an allowlist of read-only commands -- it searches a + directory or the whole workspace with ripgrep and a single named file with + grep. Configured with the `WORKSPACE_*` variables; turn it off entirely + with `WORKSPACE_ENABLED`. - **Lua plugins** -- a full plugin system. A plugin is one `.lua` file that can register prefix commands, agent tools, background loops and event handlers, and reach out through an HTTP client, a Discord read/write API, diff --git a/ai/tools.py b/ai/tools.py index 9595131..b86c7bb 100644 --- a/ai/tools.py +++ b/ai/tools.py @@ -689,9 +689,9 @@ def _register_workspace_tools(reg: ToolRegistry) -> None: "shell.run", "Run a single read-only shell command inside your sandboxed " "workspace, for example ls, cat, rg, find, wc, head, tail or " - "sort. To search file contents always use rg (ripgrep) -- it is " - "the preferred search tool; only fall back to grep, egrep or " - "fgrep if ripgrep fails or genuinely cannot do what you need. " + "sort. To search file contents, use rg (ripgrep) to search a " + "directory or the whole workspace, and grep to search a single " + "named file. " "Commands take a file path as a direct argument -- " "'sort -r data.txt', 'wc -l data.txt' -- so you do NOT need " "pipes or redirects, which are not supported. To save a " @@ -701,7 +701,8 @@ def _register_workspace_tools(reg: ToolRegistry) -> None: {"type": "object", "properties": { "command": {"type": "string", "description": "The command line to run, e.g. " - "'rg TODO src' or " + "'rg TODO src', " + "'grep TODO notes.txt' or " "'sort -r data.txt'."}, "save_to": {"type": "string", "description": "Optional workspace-relative file " diff --git a/ai/workspace.py b/ai/workspace.py index dda90b4..1938f19 100644 --- a/ai/workspace.py +++ b/ai/workspace.py @@ -371,8 +371,8 @@ def delete_file(ctx, path: str) -> dict: # ── allowlist shell tool ────────────────────────────────────────────────────── # Read-only commands only. The command is executed directly (no shell), so a # pipe or redirect is never interpreted -- it is just a literal argument. -# rg (ripgrep) is the preferred content search; grep, egrep and fgrep stay on -# the allowlist only as a fallback for the rare case ripgrep cannot serve. +# rg (ripgrep) searches a directory or the whole workspace; grep searches a +# single named file. egrep and fgrep stay on the allowlist as grep variants. _SHELL_ALLOWLIST = frozenset({ "ls", "cat", "head", "tail", "wc", "rg", "grep", "egrep", "fgrep", "find", "pwd", "echo", "date", "stat", "sort", "uniq", "cut", "tr",