From 6276ad578f812fc91a0dc5a3e20cb5e2a0c8fcbb Mon Sep 17 00:00:00 2001 From: Aayush Shah Date: Thu, 7 May 2026 13:39:12 +0530 Subject: [PATCH 1/3] feat: rebuild compaction run branch --- docs/examples.md | 4 + .../README.md | 66 + .../index.ts | 781 ++++++++++ .../package.json | 22 + .../tsconfig.json | 17 + .../message-agents-real-llm-demo/.env.example | 7 + .../message-agents-real-llm-demo/README.md | 84 ++ .../message-agents-real-llm-demo/index.ts | 1262 +++++++++++++++++ .../message-agents-real-llm-demo/package.json | 22 + .../tsconfig.json | 17 + pnpm-lock.yaml | 44 + src/core/compaction.ts | 1104 ++++++++++++++ src/core/engine.ts | 171 ++- src/core/tracing.ts | 6 + src/core/types.ts | 48 +- 15 files changed, 3607 insertions(+), 48 deletions(-) create mode 100644 examples/compaction-real-llm-multi-turn-demo/README.md create mode 100644 examples/compaction-real-llm-multi-turn-demo/index.ts create mode 100644 examples/compaction-real-llm-multi-turn-demo/package.json create mode 100644 examples/compaction-real-llm-multi-turn-demo/tsconfig.json create mode 100644 examples/message-agents-real-llm-demo/.env.example create mode 100644 examples/message-agents-real-llm-demo/README.md create mode 100644 examples/message-agents-real-llm-demo/index.ts create mode 100644 examples/message-agents-real-llm-demo/package.json create mode 100644 examples/message-agents-real-llm-demo/tsconfig.json create mode 100644 src/core/compaction.ts diff --git a/docs/examples.md b/docs/examples.md index 2968ffa..00f5488 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -20,6 +20,10 @@ The JAF framework includes multiple example projects that demonstrate different 2. **Streaming Demo** (`examples/streaming-demo/`) - Event-level streaming via SSE and direct engine streaming 3. **RAG Demo** (`examples/rag-demo/`) - Vertex AI RAG integration with streaming responses 4. **Visualization Demo** (`examples/visualization-demo/`) - Graphviz-based agent and tool visualization +5. **Compaction Demo** (`examples/compaction-demo/`) - Deterministic compaction flow with no external LLM dependency +6. **Real LiteLLM Compaction Demo** (`examples/compaction-real-llm-demo/`) - Real LLM compaction in a single seeded run +7. **Real LiteLLM Multi-Turn Compaction Demo** (`examples/compaction-real-llm-multi-turn-demo/`) - Real LLM compaction after several conversational turns, followed by continued turns and a final answer +8. **Real LiteLLM Message-Agents Demo** (`examples/message-agents-real-llm-demo/`) - Parent-orchestrated planning, delegated specialist analysis, explicit final synthesis, and compaction in one realistic flow All examples showcase the framework's core principles: - **Immutability**: All state is deeply readonly diff --git a/examples/compaction-real-llm-multi-turn-demo/README.md b/examples/compaction-real-llm-multi-turn-demo/README.md new file mode 100644 index 0000000..3ff3c4e --- /dev/null +++ b/examples/compaction-real-llm-multi-turn-demo/README.md @@ -0,0 +1,66 @@ +# JAF Real LiteLLM Multi-Turn Compaction Demo + +This example uses real LiteLLM-backed chat completion calls for both: + +- the normal JAF turn LLM calls +- the compaction LLM call + +Unlike the single-run real compaction demo, this one keeps the conversation going across multiple `run(...)` calls so you can see compaction happen after several normal turns, then continue the chat, and only answer at the end. + +## What it demonstrates + +- A custom `ModelProvider` that makes real chat completion requests +- A slightly larger model context window than the basic real compaction demo +- Four normal turns before compaction is expected to kick in +- Two more post-compaction turns that keep using the rebuilt transcript +- A final answer on the last turn instead of answering immediately +- Full request and response logging for both the main turn model and the compaction model + +## Required environment + +Copy `.env.example` to `.env` and fill in: + +```bash +LITELLM_URL=https://grid.ai.juspay.net/v1 +LITELLM_API_KEY=your-key +LITELLM_MODEL=glm-flash-experimental +LITELLM_COMPACTION_MODEL=glm-flash-experimental +LITELLM_MAX_INPUT_TOKENS=2400 +LITELLM_MAX_OUTPUT_TOKENS=260 +COMPACTION_TRIGGER_PERCENTAGE=0.52 +``` + +Important: + +- `LITELLM_MAX_INPUT_TOKENS` is intentionally higher than the single-turn demo, but still low enough that the scripted conversation should compact around turn 5. +- `COMPACTION_TRIGGER_PERCENTAGE=0.52` is tuned so the default transcript usually grows through four turns before compaction. +- `LITELLM_PROVIDER` is optional. Use it only when your LiteLLM setup expects provider-prefixed model names such as `openai/gpt-4o-mini`. +- `LITELLM_URL` should point at the LiteLLM base path that serves `/chat/completions`. If you pass `https://host/v1`, the example uses it as-is. If you pass `https://host`, the example normalizes it to `https://host/v1`. + +## Run + +From the repo root: + +```bash +pnpm exec tsx examples/compaction-real-llm-multi-turn-demo/index.ts +``` + +Or from the example directory: + +```bash +pnpm dev +``` + +## Expected flow + +1. Turns 1 to 4 add realistic account-planning notes to the transcript. +2. Before turn 5, JAF should compact the older prefix. +3. Turns 5 and 6 continue on top of the compacted transcript. +4. Turn 7 asks for the final executive-ready brief. + +## What to look for + +- `Compaction started before scripted turn 5` in the console output +- The compaction request payload appearing between normal turn requests +- The final transcript containing a `[JAF COMPACTION SUMMARY]` assistant message +- The last answer still preserving key names, metrics, timing, and commercial commitments diff --git a/examples/compaction-real-llm-multi-turn-demo/index.ts b/examples/compaction-real-llm-multi-turn-demo/index.ts new file mode 100644 index 0000000..6c77433 --- /dev/null +++ b/examples/compaction-real-llm-multi-turn-demo/index.ts @@ -0,0 +1,781 @@ +#!/usr/bin/env tsx + +import { randomUUID } from 'crypto'; +import { config as loadDotenv } from 'dotenv'; +import OpenAI from 'openai'; +import { dirname, resolve } from 'path'; +import { fileURLToPath } from 'url'; +import { z } from 'zod'; +import { + createRunId, + createTraceId, + getTextContent, + type Agent, + type Message, + type ModelProvider, + type RunConfig, + type RunResult, + type RunState, + type Tool, + type TraceEvent, +} from '../../src/core/types'; +import { run } from '../../src/core/engine'; +import { configureSanitization, resetSanitizationConfig } from '../../src/core/tracing'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +loadDotenv({ path: resolve(__dirname, '../../.env'), quiet: true }); +loadDotenv({ path: resolve(__dirname, '.env'), quiet: true }); + +type DemoContext = { + accountName: string; + renewalQuarter: string; +}; + +type DemoEnv = { + baseURL: string; + apiKey: string; + mainProvider: string; + mainModel: string; + compactionProvider: string; + compactionModel: string; + maxInputTokens: number; + maxOutputTokens: number; + triggerPercentage: number; +}; + +type ScriptedTurn = { + label: string; + user: string; +}; + +const colors = { + bold: (text: string) => `\x1b[1m${text}\x1b[0m`, + blue: (text: string) => `\x1b[34m${text}\x1b[0m`, + cyan: (text: string) => `\x1b[36m${text}\x1b[0m`, + dim: (text: string) => `\x1b[2m${text}\x1b[0m`, + green: (text: string) => `\x1b[32m${text}\x1b[0m`, + magenta: (text: string) => `\x1b[35m${text}\x1b[0m`, + red: (text: string) => `\x1b[31m${text}\x1b[0m`, + yellow: (text: string) => `\x1b[33m${text}\x1b[0m`, +}; + +const LITELLM_REQUEST_HEADERS = { + 'x-litellm-disable-logging': 'true', +}; + +const MIN_DEMO_INPUT_TOKENS = 4200; +const MIN_DEMO_OUTPUT_TOKENS = 1200; + +const SCRIPTED_TURNS: readonly ScriptedTurn[] = [ + { + label: 'Turn 1', + user: + 'We are prepping a renewal-risk brief for Northstar Retail, a 980-store commerce customer on a June Q3 renewal worth about $2.4M ARR. The VP of Digital still wants to expand site-search usage to mobile and store kiosks, but the last steering review landed badly because our search relevance improvements have not yet shown up in category conversion. Their CFO, Priyanka Rao, asked for one crisp narrative that explains whether this is execution noise or a structural risk. Keep a running working brief as we go, because I am going to send more notes over several turns.', + }, + { + label: 'Turn 2', + user: + 'New field notes from the support and product threads: during the last two weekend campaigns, search latency on the product-listing path moved from roughly 320 ms p95 to about 1.1 s p95, and merchandising disabled one synonym package because it seemed to amplify irrelevant apparel results for seasonal queries. There were 14 tickets tied to search quality and a few screenshots where zero-result pages showed up for high-volume terms. The customer says store managers are now manually curating landing pages before each promotion because they do not trust the current ranking behavior. Before you answer, call `lookupOperationalPulse` with focus `search_reliability` so the working brief carries the freshest operational metrics, owners, and milestone detail.', + }, + { + label: 'Turn 3', + user: + 'Stakeholder map changed this week. Their original sponsor, Megan Cole, left for another retailer. The interim sponsor is Arun Bedi, who is more skeptical and already asked whether our roadmap commitments from last quarter were too optimistic. Procurement joined the thread early, which usually means they are preparing leverage. Northstar also told us their board wants software spend held roughly flat this year unless a vendor can show a direct margin or conversion lift. So the renewal story now needs to work for both an operator and a finance audience.', + }, + { + label: 'Turn 4', + user: + 'Commercial pressure is getting sharper. A competitor is offering an 18 percent discount, migration support, and a claim that they can be production-ready before holiday planning starts. Northstar is asking us for price protection, a stronger uptime commitment, and a written explanation of how we will avoid another relevance regression. Legal also asked for a cleaner data-locality clause because the customer is expanding into two regions with tighter internal review. Before you answer, call `lookupDealDeskGuidance` with focus `pricing_and_legal` so the brief reflects actual concession guardrails instead of guesses. Keep the brief grounded in what actually matters instead of turning it into a generic account summary.', + }, + { + label: 'Turn 5', + user: + 'Board-pack timing update: Northstar needs a clear path-to-green narrative by Wednesday morning. They specifically want to hear who owns relevance quality, what will improve before the holiday build window, and what they should expect in the next 30 days versus the next quarter. Internally, we do not want to promise new headcount, but we can commit a named search engineer plus a solutions architect for the next six weeks. Finance is okay with targeted credits if we tie them to milestones rather than offering an open-ended concession. Before you answer, call `lookupDeliveryPlan` with focus `holiday_readiness` so the working brief picks up the named owners, dates, and near-term milestones.', + }, + { + label: 'Turn 6', + user: + 'Latest negotiation posture from our side: we can probably hold the discount line to 6 percent if we bundle a two-year term and a phased rollout plan. Product is comfortable promising weekly relevance reviews, a rollback guardrail for synonym changes, and a formal scorecard shared with Arun. We should avoid language that sounds defensive, because the account team thinks confidence matters almost as much as the technical fix. Before you answer, call `lookupDealDeskGuidance` with focus `concession_package` and call `lookupDeliveryPlan` with focus `thirty_day_plan`. Fold that into the working brief, then be ready to give me the final executive-ready version on the next turn.', + }, + { + label: 'Turn 7', + user: + 'Now produce the final executive-ready renewal brief using the accumulated tool-backed notes and conversation context. Output exactly three markdown bullets titled Current state, Top risks, and Recommended next step.', + }, +]; + +function loadEnv(): DemoEnv { + const baseURL = process.env.LITELLM_URL; + const apiKey = 'sk-3X33Ycz1FV8rHZ2YCL1Hwg'; + const mainProvider = process.env.LITELLM_PROVIDER; + const mainModel = process.env.LITELLM_MODEL; + + if (!baseURL || !apiKey || !mainModel) { + console.log(colors.red('Missing LiteLLM configuration for the multi-turn compaction demo.')); + console.log(colors.yellow('Set LITELLM_URL, LITELLM_API_KEY, and LITELLM_MODEL.')); + console.log( + colors.dim( + 'Copy examples/compaction-real-llm-multi-turn-demo/.env.example to .env and fill it in.' + ) + ); + process.exit(1); + } + + const resolvedMainProvider = mainProvider || 'direct'; + const compactionProvider = process.env.LITELLM_COMPACTION_PROVIDER || mainProvider; + const compactionModel = process.env.LITELLM_COMPACTION_MODEL || mainModel; + + return { + baseURL: normalizeLiteLLMBaseURL(baseURL), + apiKey, + mainProvider: resolvedMainProvider, + mainModel: resolveLiteLLMModel(mainProvider, mainModel), + compactionProvider: compactionProvider || 'direct', + compactionModel: resolveLiteLLMModel(compactionProvider, compactionModel), + maxInputTokens: parsePositiveInt(process.env.LITELLM_MAX_INPUT_TOKENS, MIN_DEMO_INPUT_TOKENS, MIN_DEMO_INPUT_TOKENS), + maxOutputTokens: parsePositiveInt(process.env.LITELLM_MAX_OUTPUT_TOKENS, MIN_DEMO_OUTPUT_TOKENS, MIN_DEMO_OUTPUT_TOKENS), + triggerPercentage: parseTriggerPercentage(process.env.COMPACTION_TRIGGER_PERCENTAGE, 0.38), + }; +} + +function normalizeLiteLLMBaseURL(baseURL: string): string { + const trimmed = baseURL.trim().replace(/\/+$/, ''); + if (trimmed.endsWith('/v1')) { + return trimmed; + } + return `${trimmed}/v1`; +} + +function resolveLiteLLMModel(provider: string | undefined, model: string): string { + if (!provider || provider.trim().length === 0 || model.includes('/')) { + return model; + } + return `${provider}/${model}`; +} + +function parsePositiveInt(value: string | undefined, fallback: number, minimum = 1): number { + const parsed = Number.parseInt(value || '', 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + return fallback; + } + return Math.max(parsed, minimum); +} + +function parseTriggerPercentage(value: string | undefined, fallback: number): number { + const parsed = Number.parseFloat(value || ''); + if (!Number.isFinite(parsed) || parsed <= 0) { + return fallback; + } + if (parsed > 1 && parsed <= 100) { + return parsed / 100; + } + if (parsed > 1) { + return fallback; + } + return parsed; +} + +function createRenewalTools(): readonly Tool[] { + const lookupOperationalPulse: Tool = { + schema: { + name: 'lookupOperationalPulse', + description: + 'Retrieve the latest operational search signals, incident trends, and named owners for the renewal.', + parameters: z.object({ + focus: z.enum(['search_reliability', 'category_conversion', 'support_burden']), + }), + }, + async execute({ focus }, context) { + return JSON.stringify( + { + accountName: context.accountName, + focus, + asOf: '2026-03-09', + reliability: { + searchLatencyP95: '1.08s', + baselineLatencyP95: '320ms', + zeroResultRate: '3.6%', + degradedCampaignWeekends: 2, + supportTicketsLast14Days: 14, + }, + conversion: { + categoryConversionDelta: '-2.9%', + mobileSearchAdoptionStatus: 'paused until latency recovers', + kioskPilotStatus: 'design approved, launch blocked on relevance confidence', + }, + ownership: { + searchEngineer: 'Rina Patel', + solutionsArchitect: 'Gabe Kim', + executiveSponsor: 'Lena Ortiz', + }, + committedActions: [ + 'Weekly relevance review with Arun Bedi every Thursday.', + 'Rollback guardrail for synonym changes before the next weekend campaign.', + 'Shared scorecard covering latency, zero-result rate, and category conversion.', + ], + watchouts: [ + 'Store managers are manually curating promo landing pages because ranking confidence dropped.', + 'The disabled synonym package remains the clearest example of an avoidable regression.', + ], + }, + null, + 2 + ); + }, + }; + + const lookupDealDeskGuidance: Tool = { + schema: { + name: 'lookupDealDeskGuidance', + description: + 'Retrieve current commercial guardrails, concession limits, and legal posture for the renewal.', + parameters: z.object({ + focus: z.enum(['pricing_and_legal', 'concession_package', 'board_narrative']), + }), + }, + async execute({ focus }, context) { + return JSON.stringify( + { + accountName: context.accountName, + focus, + asOf: '2026-03-09', + pricing: { + competitorDiscount: '18%', + approvedStandaloneDiscountCeiling: '6%', + twoYearTermPosition: + '6% discount is acceptable with a phased rollout plan and executive scorecard.', + creditPolicy: 'Targeted service credits only when tied to named milestones.', + }, + legal: { + uptimeCommitment: + 'Stronger SLA language can be offered if the rollback guardrail is documented.', + dataLocalityClause: + 'Updated regional processing addendum is available for the two expansion regions.', + procurementRisk: + 'Procurement entered early, so margin protection requires a board-safe narrative.', + }, + messaging: { + financeAngle: + 'Hold spend roughly flat while tying every concession to measurable conversion or risk reduction.', + operatorAngle: + 'Show named owners, weekly reviews, and pre-holiday rollback safety.', + }, + nonNegotiables: [ + 'Do not offer open-ended credits.', + 'Do not imply new headcount beyond the named search engineer and solutions architect.', + 'Do not frame the competitor offer as technically equivalent without evidence.', + ], + }, + null, + 2 + ); + }, + }; + + const lookupDeliveryPlan: Tool = { + schema: { + name: 'lookupDeliveryPlan', + description: + 'Retrieve the current delivery plan, owners, milestones, and 30-day path-to-green commitments.', + parameters: z.object({ + focus: z.enum(['holiday_readiness', 'thirty_day_plan', 'ownership_map']), + }), + }, + async execute({ focus }, context) { + return JSON.stringify( + { + accountName: context.accountName, + focus, + asOf: '2026-03-09', + owners: { + relevanceLead: 'Rina Patel', + solutionsArchitect: 'Gabe Kim', + accountExecutive: 'Maya Thompson', + }, + next30Days: [ + 'Week 1: ship synonym rollback guardrail and validate high-volume seasonal queries.', + 'Week 2: publish a shared scorecard with latency, zero-result rate, and conversion trend.', + 'Week 3: run a controlled relevance review with Arun and merchandising leads.', + 'Week 4: present path-to-green update before holiday build planning starts.', + ], + quarterPlan: [ + 'Stabilize search latency below 450ms p95.', + 'Recover category conversion through controlled ranking experiments.', + 'Resume mobile and kiosk rollout only after the reliability scorecard stays green for two consecutive weeks.', + ], + dependencies: [ + 'Named search engineer and solutions architect remain allocated for six weeks.', + 'Northstar must provide merchandising signoff on the revised synonym package.', + ], + }, + null, + 2 + ); + }, + }; + + return [lookupOperationalPulse, lookupDealDeskGuidance, lookupDeliveryPlan]; +} + +function createAgent(env: DemoEnv): Agent { + return { + name: 'RealCompactionMultiTurnDemoAgent', + tools: createRenewalTools(), + instructions: () => + [ + 'You are preparing an executive renewal-risk brief for an enterprise account team.', + 'Maintain continuity across turns and update the working brief as new facts arrive.', + 'When the user asks for fresh operational, commercial, or delivery detail, call the relevant tool before answering.', + 'If the user explicitly names a tool, use that tool instead of guessing.', + 'Before the final turn, respond in exactly 3 terse bullets titled Signal, Risk, and Missing.', + 'Only when the user explicitly asks for the final executive-ready renewal brief, output exactly 3 markdown bullet items: "- **Current state** ...", "- **Top risks** ...", and "- **Recommended next step** ...".', + 'Keep the writing concrete and commercially grounded. Preserve names, metrics, timing, discounts, commitments, milestone dates, and ownership details.', + 'Keep each non-final response under 120 words.', + 'Use tool outputs as source-of-truth details and fold them naturally into the brief.', + 'Do not mention compaction, summarization, token limits, or transcript management.', + ].join('\n'), + modelConfig: { + name: env.mainModel, + temperature: 0, + maxTokens: env.maxOutputTokens, + }, + compaction: { + enabled: true, + triggerPercentage: env.triggerPercentage, + preserveLastAssistantMessage: true, + minCandidateMessages: 2, + rules: + 'Preserve the account name, renewal timing, ARR, stakeholder names, tool-derived operational metrics, legal and pricing guardrails, committed owners, milestone-based credits, and the final requested output shape. Return at most 180 words. Drop repeated phrasing and duplicate recap text.', + }, + }; +} + +function createProvider(label: string, env: DemoEnv): ModelProvider { + const client = new OpenAI({ + baseURL: env.baseURL, + apiKey: env.apiKey, + defaultHeaders: LITELLM_REQUEST_HEADERS, + dangerouslyAllowBrowser: true, + }); + + let callCount = 0; + + return { + getTokenLimits() { + return { + maxInputTokens: env.maxInputTokens, + maxOutputTokens: env.maxOutputTokens, + }; + }, + async getCompletion(state, agent, config) { + callCount += 1; + const model = agent.modelConfig?.name ?? config.modelOverride; + if (!model) { + throw new Error(`No model configured for ${label}`); + } + + const params: OpenAI.Chat.Completions.ChatCompletionCreateParams = { + model, + temperature: agent.modelConfig?.temperature, + max_tokens: agent.modelConfig?.maxTokens ?? env.maxOutputTokens, + messages: [ + { + role: 'system', + content: agent.instructions(state), + }, + ...state.messages.map(convertMessageToChatParam), + ], + tools: buildOpenAITools(agent.tools), + }; + + logProviderRequest(`${label} request #${callCount}`, state, agent, params); + + const response = await client.chat.completions.create( + params as OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming + ); + let result = buildProviderResult(response); + + logProviderResponse(`${label} response #${callCount}`, result); + + if (shouldRetryForVisibleOutput(result)) { + const retryParams: OpenAI.Chat.Completions.ChatCompletionCreateParams = { + ...params, + messages: [ + ...params.messages, + { + role: 'user', + content: + 'Return only the visible assistant response for the current task. Do not include analysis or hidden reasoning. If this is the final executive-ready renewal brief, output exactly three markdown bullet items titled Current state, Top risks, and Recommended next step.', + }, + ], + tool_choice: params.tools ? 'auto' : undefined, + }; + + logProviderRequest(`${label} retry request #${callCount}`, state, agent, retryParams); + const retryResponse = await client.chat.completions.create( + retryParams as OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming + ); + result = buildProviderResult(retryResponse); + logProviderResponse(`${label} retry response #${callCount}`, result); + } + + return result as any; + }, + }; +} + +function buildProviderResult(response: OpenAI.Chat.Completions.ChatCompletion) { + const choice = response.choices[0]; + return { + ...choice, + usage: response.usage, + model: response.model, + id: response.id, + }; +} + +function hasVisibleAssistantOutput(result: any): boolean { + const message = result?.message; + if (!message) { + return false; + } + if (typeof message.content === 'string' && message.content.trim().length > 0) { + return true; + } + return Array.isArray(message.tool_calls) && message.tool_calls.length > 0; +} + +function shouldRetryForVisibleOutput(result: any): boolean { + if (!hasVisibleAssistantOutput(result)) { + return true; + } + return result?.finish_reason === 'length'; +} + +function buildOpenAITools( + tools: readonly Tool[] | undefined +): OpenAI.Chat.Completions.ChatCompletionCreateParams['tools'] | undefined { + if (!tools || tools.length === 0) { + return undefined; + } + + return tools.map((tool) => ({ + type: 'function', + function: { + name: tool.schema.name, + description: tool.schema.description, + parameters: zodSchemaToJsonSchema(tool.schema.parameters), + }, + })); +} + +function zodSchemaToJsonSchema(zodSchema: any): any { + if (zodSchema._def?.typeName === 'ZodObject') { + const properties: Record = {}; + const required: string[] = []; + + for (const [key, value] of Object.entries(zodSchema._def.shape())) { + properties[key] = zodSchemaToJsonSchema(value); + if (!(value as any).isOptional?.()) { + required.push(key); + } + } + + return { + type: 'object', + properties, + required: required.length > 0 ? required : undefined, + additionalProperties: false, + }; + } + + if (zodSchema._def?.typeName === 'ZodString') { + return { type: 'string' }; + } + + if (zodSchema._def?.typeName === 'ZodEnum') { + return { + type: 'string', + enum: zodSchema._def.values, + }; + } + + if (zodSchema._def?.typeName === 'ZodOptional') { + return zodSchemaToJsonSchema(zodSchema._def.innerType); + } + + return { type: 'string', description: 'Unsupported schema type' }; +} + +function convertMessageToChatParam( + message: Message +): OpenAI.Chat.Completions.ChatCompletionMessageParam { + switch (message.role) { + case 'user': + return { + role: 'user', + content: getTextContent(message.content), + }; + case 'assistant': + return { + role: 'assistant', + content: getTextContent(message.content), + tool_calls: message.tool_calls as any, + }; + case 'tool': + return { + role: 'tool', + content: getTextContent(message.content), + tool_call_id: message.tool_call_id!, + }; + default: + throw new Error(`Unsupported role: ${(message as any).role}`); + } +} + +function buildInitialState(agent: Agent): RunState { + return { + runId: createRunId(randomUUID()), + traceId: createTraceId(randomUUID()), + messages: [ + { + role: 'user', + content: + 'Kickoff note: keep a running renewal-risk brief for Northstar Retail, preserve names and metrics, and keep separate what is operational noise versus structural risk.', + }, + { + role: 'assistant', + content: + 'Understood. I will carry a concise working brief across turns, preserve concrete account details, and tighten the narrative as new facts arrive.', + }, + ], + currentAgentName: agent.name, + context: { + accountName: 'Northstar Retail', + renewalQuarter: 'Q3', + }, + turnCount: 0, + }; +} + +function appendUserMessage( + state: RunState, + content: string +): RunState { + return { + ...state, + messages: [ + ...state.messages, + { + role: 'user', + content, + }, + ], + }; +} + +function readAssistantOutput(result: RunResult): string { + if (result.outcome.status === 'completed') { + return String(result.outcome.output); + } + if (result.outcome.status === 'error') { + throw new Error(JSON.stringify(result.outcome.error)); + } + throw new Error(`Unexpected interrupted outcome: ${JSON.stringify(result.outcome)}`); +} + +function logProviderRequest( + title: string, + state: Readonly>, + agent: Readonly>, + params: OpenAI.Chat.Completions.ChatCompletionCreateParams +) { + console.log(''); + console.log(colors.bold(colors.yellow(title))); + console.log(colors.bold(colors.blue('Agent instructions'))); + console.log(agent.instructions(state)); + console.log(colors.bold(colors.blue('JAF message array'))); + console.dir(state.messages.map(toPrintableMessage), { depth: 8, maxArrayLength: null }); + console.log(colors.bold(colors.blue('LiteLLM headers'))); + console.dir(LITELLM_REQUEST_HEADERS, { depth: 4 }); + console.log(colors.bold(colors.blue('LiteLLM request payload'))); + console.dir(params, { depth: 8, maxArrayLength: null }); +} + +function logProviderResponse(title: string, response: unknown) { + console.log(colors.bold(colors.green(title))); + console.dir(response, { depth: 8, maxArrayLength: null }); +} + +function toPrintableMessage(message: Message) { + return { + role: message.role, + content: message.content, + tool_calls: message.tool_calls, + tool_call_id: message.tool_call_id, + }; +} + +function renderMessage(message: Message, index: number): string { + if (message.tool_calls && message.tool_calls.length > 0) { + return `${index + 1}. assistant tool call -> ${message.tool_calls + .map((toolCall) => toolCall.function.name) + .join(', ')}`; + } + if (message.role === 'tool') { + return `${index + 1}. tool -> ${truncate(getTextContent(message.content), 240)}`; + } + return `${index + 1}. ${message.role} -> ${truncate(getTextContent(message.content), 240)}`; +} + +function truncate(text: string, maxLength: number): string { + if (text.length <= maxLength) { + return text; + } + return `${text.slice(0, maxLength - 3)}...`; +} + +async function main() { + configureSanitization({ + customSanitizer: (key, value) => { + if (!key.toLowerCase().includes('token')) { + return undefined; + } + if (typeof value === 'number' || typeof value === 'boolean') { + return value; + } + return undefined; + }, + }); + + try { + const env = loadEnv(); + const agent = createAgent(env); + + console.log(colors.bold(colors.blue('JAF Real LiteLLM Multi-Turn Compaction Demo'))); + console.log(colors.dim(`LiteLLM URL: ${env.baseURL}`)); + console.log(colors.dim(`Main provider/model: ${env.mainModel}`)); + console.log(colors.dim(`Compaction provider/model: ${env.compactionModel}`)); + console.log(colors.dim(`Configured max input tokens: ${env.maxInputTokens}`)); + console.log(colors.dim(`Configured max output tokens: ${env.maxOutputTokens}`)); + console.log(colors.dim(`Compaction trigger percentage: ${env.triggerPercentage}`)); + console.log( + colors.dim( + 'Target flow: 7 turns with explicit operational, commercial, and delivery tool calls; compaction should trigger at least once before the run finishes.\n' + ) + ); + + let activeScriptedTurn = 0; + let compactionCount = 0; + + function onEvent(event: TraceEvent) { + switch (event.type) { + case 'llm_call_start': + console.log( + colors.blue( + `JAF turn call starting for ${event.data.agentName} with ${event.data.messages?.length ?? 0} transcript messages` + ) + ); + break; + case 'tool_requests': + console.log( + colors.cyan( + `Tool requested: ${event.data.toolCalls + .map((toolCall) => `${toolCall.name}(${JSON.stringify(toolCall.args)})`) + .join(', ')}` + ) + ); + break; + case 'tool_call_end': + console.log(colors.cyan(`Tool completed: ${event.data.toolName}`)); + break; + case 'compaction_start': + compactionCount += 1; + console.log( + colors.magenta( + `Compaction started before scripted turn ${activeScriptedTurn + 1}: input=${event.data.currentInputTokens}, threshold=${event.data.thresholdTokens}, compactable=${event.data.compactableMessageCount}, preserved=${event.data.preservedMessageCount}` + ) + ); + break; + case 'compaction_end': + console.log( + colors.magenta( + `Compaction ${event.data.status}: before=${event.data.beforeInputTokens}, after=${event.data.afterInputTokens ?? '-'}, model=${event.data.model}` + ) + ); + break; + case 'token_usage': + console.log( + colors.dim( + `Token usage: prompt=${event.data.prompt ?? '-'} completion=${event.data.completion ?? '-'} total=${event.data.total ?? '-'}` + ) + ); + break; + case 'final_output': + console.log(colors.green(`Final output emitted: ${String(event.data.output)}`)); + break; + } + } + + const mainProvider = createProvider('Main turn model', env); + const compactionProvider = createProvider('Compaction model', env); + + const config: RunConfig = { + agentRegistry: new Map([[agent.name, agent]]), + modelProvider: mainProvider, + compaction: { + modelProvider: compactionProvider, + modelOverride: env.compactionModel, + }, + maxTurns: SCRIPTED_TURNS.length * 4, + onEvent, + }; + + let state = buildInitialState(agent); + + for (const [index, turn] of SCRIPTED_TURNS.entries()) { + activeScriptedTurn = index; + + console.log(''); + console.log(colors.bold(colors.cyan(`${turn.label} user input`))); + console.log(turn.user); + + const result = await run(appendUserMessage(state, turn.user), config); + const assistantText = readAssistantOutput(result); + + console.log(''); + console.log(colors.bold(colors.yellow(`${turn.label} assistant output`))); + console.log(assistantText); + + state = result.finalState; + } + + console.log(''); + console.log(colors.bold(colors.yellow('Final transcript after the scripted conversation'))); + state.messages.forEach((message, index) => { + console.log(renderMessage(message, index)); + }); + + console.log(''); + if (compactionCount === 0) { + console.log( + colors.red( + 'Compaction did not trigger in this run. This demo is tuned to compact once with the default settings, so lower COMPACTION_TRIGGER_PERCENTAGE or LITELLM_MAX_INPUT_TOKENS only if your model still used materially fewer prompt tokens than expected.' + ) + ); + } else { + console.log(colors.bold(colors.green(`Demo completed with ${compactionCount} compaction event(s).`))); + } + } finally { + resetSanitizationConfig(); + } +} + +if (require.main === module) { + main().catch((error) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/compaction-real-llm-multi-turn-demo/package.json b/examples/compaction-real-llm-multi-turn-demo/package.json new file mode 100644 index 0000000..0eb8de3 --- /dev/null +++ b/examples/compaction-real-llm-multi-turn-demo/package.json @@ -0,0 +1,22 @@ +{ + "name": "jaf-compaction-real-llm-multi-turn-demo", + "version": "1.0.0", + "description": "Real multi-turn LLM compaction demo for JAF", + "main": "index.ts", + "scripts": { + "start": "tsx index.ts", + "dev": "tsx index.ts", + "build": "tsc" + }, + "dependencies": { + "@xynehq/jaf": "workspace:*", + "dotenv": "^17.2.1", + "openai": "^4.0.0", + "zod": "^3.22.0" + }, + "devDependencies": { + "@types/node": "^20.10.5", + "tsx": "^4.7.0", + "typescript": "^5.3.3" + } +} diff --git a/examples/compaction-real-llm-multi-turn-demo/tsconfig.json b/examples/compaction-real-llm-multi-turn-demo/tsconfig.json new file mode 100644 index 0000000..df05ea4 --- /dev/null +++ b/examples/compaction-real-llm-multi-turn-demo/tsconfig.json @@ -0,0 +1,17 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "../.." + }, + "include": [ + "./**/*.ts", + "../../src/**/*.ts" + ], + "exclude": [ + "node_modules", + "dist", + "../../src/**/*.test.ts", + "../../src/**/__tests__/**/*.ts" + ] +} diff --git a/examples/message-agents-real-llm-demo/.env.example b/examples/message-agents-real-llm-demo/.env.example new file mode 100644 index 0000000..509f6ad --- /dev/null +++ b/examples/message-agents-real-llm-demo/.env.example @@ -0,0 +1,7 @@ +LITELLM_URL=https://grid.ai.juspay.net/v1 +LITELLM_API_KEY=your-key +LITELLM_MODEL=glm-flash-experimental +LITELLM_COMPACTION_MODEL=glm-flash-experimental +LITELLM_MAX_INPUT_TOKENS=3200 +LITELLM_MAX_OUTPUT_TOKENS=500 +COMPACTION_TRIGGER_PERCENTAGE=0.48 diff --git a/examples/message-agents-real-llm-demo/README.md b/examples/message-agents-real-llm-demo/README.md new file mode 100644 index 0000000..6ad49fa --- /dev/null +++ b/examples/message-agents-real-llm-demo/README.md @@ -0,0 +1,84 @@ +# JAF Real LiteLLM Message-Agents Demo + +This example is the closest JAF analogue in the repo to Xyne's `MessageAgents` flow. + +It uses real LiteLLM-backed chat completion calls for: + +- the parent orchestrator turns +- delegated specialist sub-runs via `agentAsTool` +- the compaction LLM call +- the explicit final synthesis step + +Unlike `examples/compaction-real-llm-multi-turn-demo/`, this example is not just a tool-backed multi-turn conversation. It demonstrates the more opinionated control flow of a message-agents run: + +1. create or revise a sequential plan +2. delegate focused work to specialists +3. update the working brief as evidence arrives +4. compact the transcript when it grows +5. finish through a dedicated synthesis step + +## What it demonstrates + +- A parent orchestrator that stays in control across the full conversation +- Specialist delegation via `agentAsTool` +- Example-local planning with an `update_plan` tool +- An explicit `synthesize_final_brief` step for the final answer +- Real compaction under a more realistic orchestration flow +- Full request and response logging for the parent model and the compaction model + +## Comparison to the existing compaction demo + +| Example | Primary focus | +|---|---| +| `compaction-real-llm-multi-turn-demo` | Multi-turn tool-backed compaction with one main agent | +| `message-agents-real-llm-demo` | Parent orchestrator + delegated specialists + explicit final synthesis under compaction | + +## Required environment + +Copy `.env.example` to `.env` and fill in: + +```bash +LITELLM_URL=https://grid.ai.juspay.net/v1 +LITELLM_API_KEY=your-key +LITELLM_MODEL=glm-flash-experimental +LITELLM_COMPACTION_MODEL=glm-flash-experimental +LITELLM_MAX_INPUT_TOKENS=3200 +LITELLM_MAX_OUTPUT_TOKENS=500 +COMPACTION_TRIGGER_PERCENTAGE=0.48 +``` + +Important: + +- `COMPACTION_TRIGGER_PERCENTAGE=0.48` is tuned so the example usually gets through planning plus at least part of delegation before compaction starts. +- `LITELLM_PROVIDER` is optional. Use it only when your LiteLLM setup expects provider-prefixed model names such as `openai/gpt-4o-mini`. +- `LITELLM_URL` should point at the LiteLLM base path that serves `/chat/completions`. If you pass `https://host/v1`, the example uses it as-is. If you pass `https://host`, the example normalizes it to `https://host/v1`. + +## Run + +From the repo root: + +```bash +pnpm exec tsx examples/message-agents-real-llm-demo/index.ts +``` + +Or from the example directory: + +```bash +pnpm dev +``` + +## Expected flow + +1. Turn 1 creates the initial plan. +2. Turns 2 to 4 delegate operational, commercial, and delivery analysis. +3. The parent keeps revising the working brief and plan as specialist outputs arrive. +4. Compaction should trigger once the transcript grows enough. +5. The final turn calls `synthesize_final_brief` and returns the board-ready brief. + +## What to look for + +- `update_plan(...)` appearing early in the run +- `delegate_operational_analysis`, `delegate_commercial_analysis`, and `delegate_delivery_strategy` appearing as tool calls +- `Compaction started before scripted turn ...` in the console output +- The final transcript containing both delegated tool messages and a `[JAF COMPACTION SUMMARY]` assistant message +- The final answer preserving names, metrics, owners, discounts, and readiness gates after compaction diff --git a/examples/message-agents-real-llm-demo/index.ts b/examples/message-agents-real-llm-demo/index.ts new file mode 100644 index 0000000..d84da1f --- /dev/null +++ b/examples/message-agents-real-llm-demo/index.ts @@ -0,0 +1,1262 @@ +#!/usr/bin/env tsx + +import { randomUUID } from 'crypto'; +import { config as loadDotenv } from 'dotenv'; +import OpenAI from 'openai'; +import { dirname, resolve } from 'path'; +import { fileURLToPath } from 'url'; +import { z } from 'zod'; +import { agentAsTool } from '../../src/core/agent-as-tool'; +import { run } from '../../src/core/engine'; +import { configureSanitization, resetSanitizationConfig } from '../../src/core/tracing'; +import { getToolRuntime } from '../../src/core/tool-runtime'; +import { ToolResponse, type ToolResult } from '../../src/core/tool-results'; +import { + createRunId, + createTraceId, + getTextContent, + type Agent, + type Message, + type ModelProvider, + type RunConfig, + type RunResult, + type RunState, + type Tool, + type TraceEvent, +} from '../../src/core/types'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +loadDotenv({ path: resolve(__dirname, '.env'), quiet: true }); +loadDotenv({ path: resolve(__dirname, '../../.env'), quiet: true }); + +type FindingCategory = 'operational' | 'commercial' | 'delivery' | 'synthesis'; +type TaskStatus = 'pending' | 'in_progress' | 'completed'; + +type PlanTask = { + id: string; + description: string; + owner: 'parent' | 'operational' | 'commercial' | 'delivery'; + status: TaskStatus; + result?: string; +}; + +type FindingRecord = { + owner: string; + category: FindingCategory; + summary: string; + turn: number; +}; + +type DelegationRecord = { + specialist: string; + query: string; + turn: number; +}; + +type DemoContext = { + userId: string; + permissions: string[]; + accountName: string; + renewalQuarter: string; + planGoal: string; + plan: PlanTask[]; + currentTaskId?: string; + workingBrief: string; + findings: FindingRecord[]; + delegationHistory: DelegationRecord[]; + finalSynthesisRequested: boolean; + becameAgentic: boolean; +}; + +type DemoEnv = { + baseURL: string; + apiKey: string; + mainProvider: string; + mainModel: string; + compactionProvider: string; + compactionModel: string; + maxInputTokens: number; + maxOutputTokens: number; + triggerPercentage: number; + minCandidateMessages: number; +}; + +type ScriptedTurn = { + label: string; + user: string; +}; + +const colors = { + bold: (text: string) => `\x1b[1m${text}\x1b[0m`, + blue: (text: string) => `\x1b[34m${text}\x1b[0m`, + cyan: (text: string) => `\x1b[36m${text}\x1b[0m`, + dim: (text: string) => `\x1b[2m${text}\x1b[0m`, + green: (text: string) => `\x1b[32m${text}\x1b[0m`, + magenta: (text: string) => `\x1b[35m${text}\x1b[0m`, + yellow: (text: string) => `\x1b[33m${text}\x1b[0m`, + red: (text: string) => `\x1b[31m${text}\x1b[0m`, +}; + +const LITELLM_REQUEST_HEADERS = { + 'x-litellm-disable-logging': 'true', +}; + +const MIN_DEMO_INPUT_TOKENS = 9000; +const MIN_DEMO_OUTPUT_TOKENS = 900; +const DEFAULT_COMPACTION_TRIGGER_PERCENTAGE = 0.72; +const DEFAULT_COMPACTION_MIN_CANDIDATE_MESSAGES = 8; + +const SCRIPTED_TURNS: readonly ScriptedTurn[] = [ + { + label: 'Turn 1', + user: + 'We need a board-safe renewal brief for Northstar Retail, a search customer on a June Q3 renewal worth roughly $2.4M ARR. Do not answer fully yet. Create a plan first, keep a running working brief, and treat this like a message-agents orchestration run rather than a one-shot answer.', + }, + { + label: 'Turn 2', + user: + 'New signal from support and product: search latency spiked during the last two campaign weekends, merchandising disabled a synonym package, and store managers are manually curating landing pages because they do not trust ranking behavior. Investigate the operational story through the operational specialist, then update the plan.', + }, + { + label: 'Turn 3', + user: + 'Stakeholder posture changed. Their original sponsor left, the interim sponsor Arun Bedi is skeptical, and procurement joined early because Northstar wants software spend roughly flat unless we can prove margin or conversion lift. Pull the commercial story through the commercial specialist instead of guessing.', + }, + { + label: 'Turn 4', + user: + 'One more complication: a competitor is offering an 18 percent discount and migration support before holiday planning. We need a realistic 30-day path-to-green with named owners and no fake headcount promises. Use the delivery specialist and then rewrite the plan based on everything you know so far.', + }, + { + label: 'Turn 5', + user: + 'Board-pack timing moved up to Wednesday morning. Tighten the working brief so it is explicit about current state, top risks, and what changes in the next 30 days versus the next quarter. Keep this as a progress update, not the final answer.', + }, + { + label: 'Turn 6', + user: + 'Our internal negotiation line is that we can probably hold the discount to 6 percent if we get a two-year term, milestone-linked credits, and weekly relevance reviews. Refresh the commercial and delivery implications in the plan, then tell me if you are ready for final synthesis.', + }, + { + label: 'Turn 7', + user: + 'Produce the final board-ready renewal brief now. Use the synthesis step and output exactly three markdown bullets titled Current state, Top risks, and Recommended next step.', + }, +]; + +function loadEnv(): DemoEnv { + const baseURL = process.env.LITELLM_URL; + const apiKey = process.env.LITELLM_API_KEY; + const mainProvider = process.env.LITELLM_PROVIDER; + const mainModel = process.env.LITELLM_MODEL; + + if (!baseURL || !apiKey || !mainModel) { + console.log(colors.red('Missing LiteLLM configuration for the message-agents demo.')); + console.log(colors.yellow('Set LITELLM_URL, LITELLM_API_KEY, and LITELLM_MODEL.')); + console.log( + colors.dim( + 'Copy examples/message-agents-real-llm-demo/.env.example to .env and fill it in.' + ) + ); + process.exit(1); + } + + const resolvedMainProvider = mainProvider || 'direct'; + const compactionProvider = process.env.LITELLM_COMPACTION_PROVIDER || mainProvider; + const compactionModel = process.env.LITELLM_COMPACTION_MODEL || mainModel; + + return { + baseURL: normalizeLiteLLMBaseURL(baseURL), + apiKey, + mainProvider: resolvedMainProvider, + mainModel: resolveLiteLLMModel(mainProvider, mainModel), + compactionProvider: compactionProvider || 'direct', + compactionModel: resolveLiteLLMModel(compactionProvider, compactionModel), + maxInputTokens: parsePositiveInt( + process.env.LITELLM_MAX_INPUT_TOKENS, + MIN_DEMO_INPUT_TOKENS, + MIN_DEMO_INPUT_TOKENS + ), + maxOutputTokens: parsePositiveInt( + process.env.LITELLM_MAX_OUTPUT_TOKENS, + MIN_DEMO_OUTPUT_TOKENS, + MIN_DEMO_OUTPUT_TOKENS + ), + triggerPercentage: parseTriggerPercentage( + process.env.COMPACTION_TRIGGER_PERCENTAGE, + DEFAULT_COMPACTION_TRIGGER_PERCENTAGE + ), + minCandidateMessages: parsePositiveInt( + process.env.COMPACTION_MIN_CANDIDATE_MESSAGES, + DEFAULT_COMPACTION_MIN_CANDIDATE_MESSAGES, + 2 + ), + }; +} + +function normalizeLiteLLMBaseURL(baseURL: string): string { + const trimmed = baseURL.trim().replace(/\/+$/, ''); + return trimmed.endsWith('/v1') ? trimmed : `${trimmed}/v1`; +} + +function resolveLiteLLMModel(provider: string | undefined, model: string): string { + if (!provider || provider.trim().length === 0 || model.includes('/')) { + return model; + } + return `${provider}/${model}`; +} + +function parsePositiveInt(value: string | undefined, fallback: number, minimum = 1): number { + const parsed = Number.parseInt(value || '', 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + return fallback; + } + return Math.max(parsed, minimum); +} + +function parseTriggerPercentage(value: string | undefined, fallback: number): number { + const parsed = Number.parseFloat(value || ''); + if (!Number.isFinite(parsed) || parsed <= 0) { + return fallback; + } + if (parsed > 1 && parsed <= 100) { + return parsed / 100; + } + if (parsed > 1) { + return fallback; + } + return parsed; +} + +function mutableContext(context: Readonly): DemoContext { + return context as DemoContext; +} + +function normalizeWhitespace(text: string): string { + return text.replace(/\s+/g, ' ').trim(); +} + +function isNoisyFindingText(text: string): boolean { + const normalized = normalizeWhitespace(text).toLowerCase(); + const noisyFragments = [ + 'the user wants me to', + 'return only visible assistant output', + 'tool calls:', + 'ongoing thinking', + '', + 'wait, let me', + 'let me analyze', + 'i need to synthesize', + 'actually, looking', + 'this implies i should', + '[jaf compaction summary]', + 'sequence: [ongoing thinking]', + ]; + return noisyFragments.some((fragment) => normalized.includes(fragment)); +} + +function sanitizeFindingSummary(text: string): string | null { + const compact = normalizeWhitespace(text); + if (!compact) { + return null; + } + if (isNoisyFindingText(compact)) { + return null; + } + return compact; +} + +function buildWorkingBrief(context: DemoContext): string { + const recentFindings = context.findings.slice(-6); + const planSummary = + context.plan.length === 0 + ? 'Plan not created yet.' + : context.plan + .map((task) => `${task.id}:${task.status}:${task.description}${task.result ? ` => ${task.result}` : ''}`) + .join(' | '); + const findingsSummary = + recentFindings.length === 0 + ? 'No validated findings yet.' + : recentFindings + .map((finding, index) => `${index + 1}. [${finding.category}] ${finding.summary}`) + .join('\n'); + + return [`Account: ${context.accountName} (${context.renewalQuarter} renewal).`, `Plan: ${planSummary}`, `Recent findings:\n${findingsSummary}`].join('\n'); +} + +function recordFinding(context: DemoContext, finding: FindingRecord) { + context.findings.push(finding); + context.workingBrief = buildWorkingBrief(context); +} + +function upsertPlan(context: DemoContext, goal: string, subTasks: PlanTask[]) { + context.planGoal = goal; + context.plan = subTasks; + context.currentTaskId = + subTasks.find((task) => task.status === 'in_progress')?.id || + subTasks.find((task) => task.status === 'pending')?.id; + context.becameAgentic = true; + context.workingBrief = buildWorkingBrief(context); +} + +function lookupOperationalSnapshot(focus: 'latency_regression' | 'support_burden' | 'usage_confidence') { + if (focus === 'latency_regression') { + return { + asOf: '2026-03-09', + latencyP95: '1.08s', + previousBaseline: '320ms', + zeroResultRate: '3.6%', + degradedCampaignWeekends: 2, + categoryConversionDelta: '-2.9%', + owner: 'Rina Patel', + watchout: 'Holiday category campaigns are now exposed to ranking regressions.', + }; + } + + if (focus === 'support_burden') { + return { + asOf: '2026-03-09', + supportTicketsLast14Days: 14, + storeManagerBehavior: 'manually curating promo landing pages', + merchandisingBehavior: 'synonym package disabled after irrelevant apparel matches', + owner: 'Gabe Kim', + watchout: 'Support burden is converting a search issue into an operator trust issue.', + }; + } + + return { + asOf: '2026-03-09', + mobileSearchStatus: 'paused pending reliability recovery', + kioskPilotStatus: 'design approved but launch blocked on relevance confidence', + executiveConcern: 'Northstar cannot tell whether this is temporary execution noise or structural product risk', + owner: 'Lena Ortiz', + }; +} + +function lookupCommercialPosition(focus: 'procurement_pressure' | 'concession_guardrails' | 'stakeholder_posture') { + if (focus === 'procurement_pressure') { + return { + asOf: '2026-03-09', + competitorOffer: '18% discount plus migration support', + procurementPosture: 'entered early and wants spend held roughly flat', + boardConstraint: 'vendors must show direct margin or conversion lift', + watchout: 'Margin protection depends on a confidence-building recovery narrative.', + }; + } + + if (focus === 'concession_guardrails') { + return { + asOf: '2026-03-09', + approvedDiscountCeiling: '6%', + acceptablePackaging: 'two-year term, phased rollout, milestone-linked credits', + disallowedMoves: ['open-ended credits', 'unbounded custom headcount promises'], + legalClause: 'updated data-locality addendum available for two new regions', + }; + } + + return { + asOf: '2026-03-09', + outgoingSponsor: 'Megan Cole', + interimSponsor: 'Arun Bedi', + financeContact: 'Priyanka Rao', + commercialRead: 'Operator story must work for a skeptical sponsor and a finance audience at the same time.', + }; +} + +function lookupDeliveryCommitments(focus: 'owner_map' | 'thirty_day_plan' | 'holiday_readiness') { + if (focus === 'owner_map') { + return { + asOf: '2026-03-09', + relevanceLead: 'Rina Patel', + solutionsArchitect: 'Gabe Kim', + accountExecutive: 'Maya Thompson', + staffingConstraint: 'named engineer plus named solutions architect for six weeks; no new headcount', + }; + } + + if (focus === 'thirty_day_plan') { + return { + asOf: '2026-03-09', + milestones: [ + 'Week 1: ship synonym rollback guardrail and validate high-volume seasonal queries.', + 'Week 2: publish shared scorecard for latency, zero-result rate, and conversion trend.', + 'Week 3: hold weekly relevance review with Arun and merchandising leads.', + 'Week 4: deliver path-to-green update before holiday build planning.', + ], + dependency: 'Northstar must sign off on revised synonym package before full rollout.', + }; + } + + return { + asOf: '2026-03-09', + preHolidayConditions: [ + 'Latency below 450ms p95.', + 'Two consecutive green scorecard reviews.', + 'Rollback guardrail active for synonym changes.', + ], + nextQuarterTarget: 'resume mobile and kiosk rollout only after reliability stays green.', + }; +} + +function createLookupTools(): { + operationalSnapshot: Tool<{ focus: 'latency_regression' | 'support_burden' | 'usage_confidence' }, DemoContext>; + commercialPosition: Tool<{ focus: 'procurement_pressure' | 'concession_guardrails' | 'stakeholder_posture' }, DemoContext>; + deliveryCommitments: Tool<{ focus: 'owner_map' | 'thirty_day_plan' | 'holiday_readiness' }, DemoContext>; +} { + return { + operationalSnapshot: { + schema: { + name: 'lookup_support_signal', + description: 'Retrieve current operational reliability, support, and usage-confidence signals.', + parameters: z.object({ + focus: z.enum(['latency_regression', 'support_burden', 'usage_confidence']), + }), + }, + async execute(args) { + return ToolResponse.success(lookupOperationalSnapshot(args.focus)); + }, + }, + commercialPosition: { + schema: { + name: 'lookup_renewal_constraints', + description: 'Retrieve current stakeholder, procurement, discount, and legal renewal constraints.', + parameters: z.object({ + focus: z.enum(['procurement_pressure', 'concession_guardrails', 'stakeholder_posture']), + }), + }, + async execute(args) { + return ToolResponse.success(lookupCommercialPosition(args.focus)); + }, + }, + deliveryCommitments: { + schema: { + name: 'lookup_account_timeline', + description: 'Retrieve current delivery owners, 30-day milestones, and holiday readiness commitments.', + parameters: z.object({ + focus: z.enum(['owner_map', 'thirty_day_plan', 'holiday_readiness']), + }), + }, + async execute(args) { + return ToolResponse.success(lookupDeliveryCommitments(args.focus)); + }, + }, + }; +} + +function createFinalSynthesisTool(env: DemoEnv): Tool<{ emphasis?: string }, DemoContext> { + return { + schema: { + name: 'synthesize_final_brief', + description: + 'Generate the final board-ready brief from the running plan, delegated findings, and accumulated conversation context.', + parameters: z.object({ + emphasis: z.string().optional(), + }), + }, + async execute(args, context): Promise { + const mutable = mutableContext(context); + if (!mutable.becameAgentic) { + return ToolResponse.validationError('Final synthesis is only valid after the run becomes agentic.'); + } + + const runtime = getToolRuntime(context); + if (!runtime) { + return ToolResponse.error('EXECUTION_FAILED', 'Runtime unavailable for final synthesis.'); + } + + mutable.finalSynthesisRequested = true; + + const finalAgent: Agent = { + name: 'FinalBriefSynthesizer', + instructions: () => + [ + 'You are the final synthesis stage for a message-agents style orchestration flow.', + 'Use the supplied working brief, plan state, delegated findings, and recent transcript to write the final answer.', + 'Output exactly three markdown bullets:', + '- **Current state** ...', + '- **Top risks** ...', + '- **Recommended next step** ...', + 'Keep it executive-ready, concrete, and commercially grounded.', + 'Do not mention compaction, orchestration, delegation, or hidden process.', + ].join('\n'), + modelConfig: { + name: env.mainModel, + temperature: 0, + maxTokens: env.maxOutputTokens, + }, + }; + + const transcriptTail = runtime.state.messages + .filter((message) => { + const content = getTextContent(message.content); + if (message.role !== 'user' && isNoisyFindingText(content)) { + return false; + } + return content.trim().length > 0; + }) + .slice(-8) + .map((message) => `${message.role.toUpperCase()}: ${truncate(getTextContent(message.content), 220)}`) + .join('\n'); + const findingsBlock = mutable.findings + .map((finding, index) => `${index + 1}. [${finding.category}] ${finding.summary}`) + .join('\n'); + const planBlock = mutable.plan + .map((task) => `${task.id} | ${task.status} | ${task.description}${task.result ? ` | ${task.result}` : ''}`) + .join('\n'); + + const synthesisState: RunState = { + runId: createRunId(randomUUID()), + traceId: runtime.state.traceId, + currentAgentName: finalAgent.name, + context: runtime.state.context, + turnCount: runtime.state.turnCount, + messages: [ + { + role: 'user', + content: [ + `Account: ${mutable.accountName}`, + `Renewal quarter: ${mutable.renewalQuarter}`, + `Working brief:\n${mutable.workingBrief}`, + `Plan state:\n${planBlock}`, + `Delegated findings:\n${findingsBlock}`, + `Recent transcript:\n${transcriptTail}`, + args.emphasis ? `Emphasis:\n${args.emphasis}` : '', + ] + .filter(Boolean) + .join('\n\n'), + }, + ], + }; + + const response = await runtime.config.modelProvider.getCompletion( + synthesisState, + finalAgent, + runtime.config + ); + const text = response.message?.content?.trim(); + if (!text) { + return ToolResponse.error('EXECUTION_FAILED', 'Final synthesis provider returned no visible text.'); + } + + return ToolResponse.success(text, { phase: 'final_synthesis' }); + }, + }; +} + +function createPlanTool(): Tool<{ goal: string; subTasks: PlanTask[] }, DemoContext> { + return { + schema: { + name: 'update_plan', + description: 'Create or revise the sequential execution plan for the orchestrator.', + parameters: z.object({ + goal: z.string(), + subTasks: z.array( + z.object({ + id: z.string(), + description: z.string(), + owner: z.enum(['parent', 'operational', 'commercial', 'delivery']), + status: z.enum(['pending', 'in_progress', 'completed']), + result: z.string().optional(), + }) + ), + }), + }, + async execute(args, context) { + const mutable = mutableContext(context); + upsertPlan(mutable, args.goal, args.subTasks); + return ToolResponse.success({ + goal: args.goal, + currentTaskId: mutable.currentTaskId, + plan: mutable.plan, + }); + }, + }; +} + +function createSpecialistAgents( + env: DemoEnv, + lookupTools: ReturnType +): { + operationalAnalyst: Agent; + commercialAnalyst: Agent; + deliveryStrategist: Agent; +} { + return { + operationalAnalyst: { + name: 'OperationalAnalyst', + instructions: () => + [ + 'You are the operational specialist in a message-agents flow.', + 'Always call lookup_support_signal before answering.', + 'Return exactly three short markdown bullets:', + '- **Signal** ...', + '- **Risk** ...', + '- **What to tell leadership** ...', + 'Preserve concrete metrics, owners, and operational consequences.', + ].join('\n'), + tools: [lookupTools.operationalSnapshot], + modelConfig: { + name: env.mainModel, + temperature: 0, + maxTokens: 220, + }, + }, + commercialAnalyst: { + name: 'CommercialAnalyst', + instructions: () => + [ + 'You are the commercial specialist in a message-agents flow.', + 'Always call lookup_renewal_constraints before answering.', + 'Return exactly three short markdown bullets:', + '- **Stakeholder posture** ...', + '- **Commercial risk** ...', + '- **Negotiation implication** ...', + 'Preserve discounts, constraints, and stakeholder names.', + ].join('\n'), + tools: [lookupTools.commercialPosition], + modelConfig: { + name: env.mainModel, + temperature: 0, + maxTokens: 220, + }, + }, + deliveryStrategist: { + name: 'DeliveryStrategist', + instructions: () => + [ + 'You are the delivery specialist in a message-agents flow.', + 'Always call lookup_account_timeline before answering.', + 'Return exactly three short markdown bullets:', + '- **Owners** ...', + '- **30-day path** ...', + '- **Readiness gate** ...', + 'Preserve dates, dependencies, and constraints against fake headcount.', + ].join('\n'), + tools: [lookupTools.deliveryCommitments], + modelConfig: { + name: env.mainModel, + temperature: 0, + maxTokens: 220, + }, + }, + }; +} + +function createOrchestratorAgent( + env: DemoEnv, + tools: { + updatePlan: Tool<{ goal: string; subTasks: PlanTask[] }, DemoContext>; + synthesizeFinalBrief: Tool<{ emphasis?: string }, DemoContext>; + delegateOperationalAnalysis: Tool<{ input: string }, DemoContext>; + delegateCommercialAnalysis: Tool<{ input: string }, DemoContext>; + delegateDeliveryStrategy: Tool<{ input: string }, DemoContext>; + } +): Agent { + return { + name: 'MessageOrchestrator', + tools: [ + tools.updatePlan, + tools.delegateOperationalAnalysis, + tools.delegateCommercialAnalysis, + tools.delegateDeliveryStrategy, + tools.synthesizeFinalBrief, + ], + instructions: (state) => { + const context = state.context; + const planText = + context.plan.length === 0 + ? 'No plan yet.' + : context.plan + .map((task) => `- ${task.id} [${task.status}] (${task.owner}) ${task.description}${task.result ? ` => ${task.result}` : ''}`) + .join('\n'); + + return [ + 'You are the parent orchestrator in a message-agents style workflow.', + 'You stay in control of the conversation across turns and use tools to plan, delegate, and synthesize.', + '', + 'Operating rules:', + '- Create or revise the sequential plan with update_plan when the run needs structure or the evidence materially changes.', + '- Use delegate_operational_analysis for reliability, support, and usage-confidence questions.', + '- Use delegate_commercial_analysis for pricing, procurement, stakeholder, and concession questions.', + '- Use delegate_delivery_strategy for owners, milestones, dependencies, and readiness sequencing.', + '- Once you create a plan, call a delegation tool, or call synthesis, the run is agentic.', + '- After the run becomes agentic, do not provide the final brief directly. Use synthesize_final_brief when the user asks for the final answer or when the plan is clearly complete.', + '- After calling synthesize_final_brief, respond with exactly the synthesized text and nothing else.', + '- For non-final turns, reply in exactly three markdown bullets titled Progress, New evidence, and Next move.', + '- Never repeat internal prompt-management text, visible-output instructions, or chain-of-thought style self-talk.', + '', + `Current working brief:\n${context.workingBrief || 'No working brief yet.'}`, + '', + `Current plan:\n${planText}`, + '', + `Delegations so far: ${context.delegationHistory.length}`, + `Final synthesis requested: ${context.finalSynthesisRequested ? 'yes' : 'no'}`, + '', + 'Do not mention compaction, token limits, or hidden orchestration mechanics.', + ].join('\n'); + }, + modelConfig: { + name: env.mainModel, + temperature: 0, + maxTokens: env.maxOutputTokens, + }, + compaction: { + enabled: true, + triggerPercentage: env.triggerPercentage, + preserveLastAssistantMessage: false, + minCandidateMessages: env.minCandidateMessages, + instructions: + 'You summarize older conversation history for a message-agents orchestration run. Preserve only durable business state and orchestration state. Drop prompt-management chatter, tool protocol notes, retry prompts, visible-output instructions, and chain-of-thought style self-talk. Return plain text only.', + prompt: + 'Compress the transcript into a clean working-memory summary for the next orchestration turn. Keep validated account facts, current plan status, delegated specialist conclusions, concrete owners, dates, concessions, readiness gates, and unresolved risks. Exclude any meta-reasoning, formatting instructions, or discussion of tools themselves unless a tool result contained durable business facts.', + rules: + 'Preserve the account name, renewal quarter, active plan goal, task statuses, delegated specialist findings, owner names, dates, discounts, credits, readiness gates, and the final required output shape of exactly three markdown bullets titled Current state, Top risks, and Recommended next step. Never preserve text about visible assistant output, thinking tags, retry instructions, or whether a tool should be called.', + }, + }; +} + +function createProvider(label: string, env: DemoEnv): ModelProvider { + const client = new OpenAI({ + baseURL: env.baseURL, + apiKey: env.apiKey, + defaultHeaders: LITELLM_REQUEST_HEADERS, + dangerouslyAllowBrowser: true, + }); + + let callCount = 0; + + return { + getTokenLimits() { + return { + maxInputTokens: env.maxInputTokens, + maxOutputTokens: env.maxOutputTokens, + }; + }, + async getCompletion(state, agent, config) { + callCount += 1; + const model = agent.modelConfig?.name ?? config.modelOverride; + if (!model) { + throw new Error(`No model configured for ${label}`); + } + + const params: OpenAI.Chat.Completions.ChatCompletionCreateParams = { + model, + temperature: agent.modelConfig?.temperature, + max_tokens: agent.modelConfig?.maxTokens ?? env.maxOutputTokens, + messages: [ + { + role: 'system', + content: agent.instructions(state), + }, + ...state.messages.map(convertMessageToChatParam), + ], + tools: buildOpenAITools(agent.tools), + }; + + logProviderRequest(`${label} request #${callCount}`, state, agent, params); + + const response = await client.chat.completions.create( + params as OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming + ); + let result = buildProviderResult(response); + logProviderResponse(`${label} response #${callCount}`, result); + + if (shouldRetryForVisibleOutput(result)) { + const retryParams: OpenAI.Chat.Completions.ChatCompletionCreateParams = { + ...params, + messages: [ + ...params.messages, + { + role: 'user', + content: + 'Return only visible assistant output for the current task. If you already called the final synthesis tool, respond with exactly the synthesized final brief and nothing else.', + }, + ], + tool_choice: params.tools ? 'auto' : undefined, + }; + + logProviderRequest(`${label} retry request #${callCount}`, state, agent, retryParams); + const retryResponse = await client.chat.completions.create( + retryParams as OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming + ); + result = buildProviderResult(retryResponse); + logProviderResponse(`${label} retry response #${callCount}`, result); + } + + return result as any; + }, + }; +} + +function buildProviderResult(response: OpenAI.Chat.Completions.ChatCompletion) { + const choice = response.choices[0]; + return { + ...choice, + usage: response.usage, + model: response.model, + id: response.id, + }; +} + +function hasVisibleAssistantOutput(result: any): boolean { + const message = result?.message; + if (!message) { + return false; + } + if (typeof message.content === 'string' && message.content.trim().length > 0) { + return true; + } + return Array.isArray(message.tool_calls) && message.tool_calls.length > 0; +} + +function shouldRetryForVisibleOutput(result: any): boolean { + if (!hasVisibleAssistantOutput(result)) { + return true; + } + return result?.finish_reason === 'length'; +} + +function buildOpenAITools( + tools: readonly Tool[] | undefined +): OpenAI.Chat.Completions.ChatCompletionCreateParams['tools'] | undefined { + if (!tools || tools.length === 0) { + return undefined; + } + + return tools.map((tool) => ({ + type: 'function', + function: { + name: tool.schema.name, + description: tool.schema.description, + parameters: zodSchemaToJsonSchema(tool.schema.parameters), + }, + })); +} + +function zodSchemaToJsonSchema(zodSchema: any): any { + if (zodSchema._def?.typeName === 'ZodObject') { + const properties: Record = {}; + const required: string[] = []; + + for (const [key, value] of Object.entries(zodSchema._def.shape())) { + properties[key] = zodSchemaToJsonSchema(value); + if (!(value as any).isOptional?.()) { + required.push(key); + } + } + + return { + type: 'object', + properties, + required: required.length > 0 ? required : undefined, + additionalProperties: false, + }; + } + + if (zodSchema._def?.typeName === 'ZodArray') { + return { + type: 'array', + items: zodSchemaToJsonSchema(zodSchema._def.type), + }; + } + + if (zodSchema._def?.typeName === 'ZodString') { + return { type: 'string' }; + } + + if (zodSchema._def?.typeName === 'ZodEnum') { + return { + type: 'string', + enum: zodSchema._def.values, + }; + } + + if (zodSchema._def?.typeName === 'ZodOptional') { + return zodSchemaToJsonSchema(zodSchema._def.innerType); + } + + return { type: 'string', description: 'Unsupported schema type' }; +} + +function convertMessageToChatParam( + message: Message +): OpenAI.Chat.Completions.ChatCompletionMessageParam { + switch (message.role) { + case 'user': + return { role: 'user', content: getTextContent(message.content) }; + case 'assistant': + return { + role: 'assistant', + content: getTextContent(message.content), + tool_calls: message.tool_calls as any, + }; + case 'tool': + return { + role: 'tool', + content: getTextContent(message.content), + tool_call_id: message.tool_call_id!, + }; + default: + throw new Error(`Unsupported role: ${(message as any).role}`); + } +} + +function buildInitialState(agent: Agent): RunState { + return { + runId: createRunId(randomUUID()), + traceId: createTraceId(randomUUID()), + messages: [ + { + role: 'user', + content: + 'Kickoff note: this conversation should feel like a message-agents orchestration run with planning, specialist delegation, and a final synthesis step.', + }, + { + role: 'assistant', + content: + 'Understood. I will maintain a sequential plan, gather specialist-backed evidence, keep a working brief, and hold the final board-ready answer for the synthesis step.', + }, + ], + currentAgentName: agent.name, + context: { + userId: 'demo-user', + permissions: ['user'], + accountName: 'Northstar Retail', + renewalQuarter: 'Q3', + planGoal: '', + plan: [], + workingBrief: 'No validated evidence yet.', + findings: [], + delegationHistory: [], + finalSynthesisRequested: false, + becameAgentic: false, + }, + turnCount: 0, + }; +} + +function appendUserMessage(state: RunState, content: string): RunState { + return { + ...state, + messages: [...state.messages, { role: 'user', content }], + }; +} + +function readAssistantOutput(result: RunResult): string { + if (result.outcome.status === 'completed') { + return String(result.outcome.output); + } + if (result.outcome.status === 'error') { + throw new Error(JSON.stringify(result.outcome.error)); + } + throw new Error(`Unexpected interrupted outcome: ${JSON.stringify(result.outcome)}`); +} + +function logProviderRequest( + title: string, + state: Readonly>, + agent: Readonly>, + params: OpenAI.Chat.Completions.ChatCompletionCreateParams +) { + console.log(''); + console.log(colors.bold(colors.yellow(title))); + console.log(colors.bold(colors.blue('Agent instructions'))); + console.log(agent.instructions(state)); + console.log(colors.bold(colors.blue('JAF message array'))); + console.dir(state.messages.map(toPrintableMessage), { depth: 8, maxArrayLength: null }); + console.log(colors.bold(colors.blue('LiteLLM request payload'))); + console.dir(params, { depth: 8, maxArrayLength: null }); +} + +function logProviderResponse(title: string, response: unknown) { + console.log(colors.bold(colors.green(title))); + console.dir(response, { depth: 8, maxArrayLength: null }); +} + +function toPrintableMessage(message: Message) { + return { + role: message.role, + content: message.content, + tool_calls: message.tool_calls, + tool_call_id: message.tool_call_id, + }; +} + +function truncate(text: string, maxLength: number): string { + if (text.length <= maxLength) { + return text; + } + return `${text.slice(0, maxLength - 3)}...`; +} + +function renderMessage(message: Message, index: number): string { + if (message.tool_calls && message.tool_calls.length > 0) { + return `${index + 1}. assistant tool call -> ${message.tool_calls + .map((toolCall) => toolCall.function.name) + .join(', ')}`; + } + if (message.role === 'tool') { + return `${index + 1}. tool -> ${truncate(getTextContent(message.content), 220)}`; + } + return `${index + 1}. ${message.role} -> ${truncate(getTextContent(message.content), 220)}`; +} + +function summarizeToolResult(result: string | ToolResult): string { + if (typeof result === 'string') { + return truncate(result, 220); + } + if (result.status === 'success') { + const data = typeof result.data === 'string' ? result.data : JSON.stringify(result.data); + return truncate(data, 220); + } + return truncate(result.error?.message || 'Tool failed', 220); +} + +async function main() { + configureSanitization({ + customSanitizer: (key, value) => { + if (!key.toLowerCase().includes('token')) { + return undefined; + } + if (typeof value === 'number' || typeof value === 'boolean') { + return value; + } + return undefined; + }, + }); + + try { + const env = loadEnv(); + const lookupTools = createLookupTools(); + const specialistAgents = createSpecialistAgents(env, lookupTools); + const updatePlan = createPlanTool(); + const synthesizeFinalBrief = createFinalSynthesisTool(env); + + const delegateOperationalAnalysis = agentAsTool( + specialistAgents.operationalAnalyst, + { + toolName: 'delegate_operational_analysis', + toolDescription: 'Delegate a focused operational investigation to the operational specialist.', + maxTurns: 5, + propagateEvents: 'all', + } + ); + const delegateCommercialAnalysis = agentAsTool( + specialistAgents.commercialAnalyst, + { + toolName: 'delegate_commercial_analysis', + toolDescription: 'Delegate a focused commercial investigation to the commercial specialist.', + maxTurns: 5, + propagateEvents: 'all', + } + ); + const delegateDeliveryStrategy = agentAsTool( + specialistAgents.deliveryStrategist, + { + toolName: 'delegate_delivery_strategy', + toolDescription: 'Delegate a focused delivery and readiness investigation to the delivery specialist.', + maxTurns: 5, + propagateEvents: 'all', + } + ); + + const orchestrator = createOrchestratorAgent(env, { + updatePlan, + synthesizeFinalBrief, + delegateOperationalAnalysis, + delegateCommercialAnalysis, + delegateDeliveryStrategy, + }); + + console.log(colors.bold(colors.blue('JAF Real LiteLLM Message-Agents Demo'))); + console.log(colors.dim(`LiteLLM URL: ${env.baseURL}`)); + console.log(colors.dim(`Main provider/model: ${env.mainModel}`)); + console.log(colors.dim(`Compaction provider/model: ${env.compactionModel}`)); + console.log(colors.dim(`Configured max input tokens: ${env.maxInputTokens}`)); + console.log(colors.dim(`Configured max output tokens: ${env.maxOutputTokens}`)); + console.log(colors.dim(`Compaction trigger percentage: ${env.triggerPercentage}`)); + console.log(colors.dim(`Compaction min candidate messages: ${env.minCandidateMessages}`)); + console.log( + colors.dim( + 'Target flow: plan -> operational delegation -> commercial delegation -> delivery delegation -> final synthesis, with compaction expected once the transcript grows.\n' + ) + ); + + let activeScriptedTurn = 0; + let compactionCount = 0; + + const mainProvider = createProvider('Main turn model', env); + const compactionProvider = createProvider('Compaction model', env); + + const config: RunConfig = { + agentRegistry: new Map([[orchestrator.name, orchestrator]]), + modelProvider: mainProvider, + compaction: { + modelProvider: compactionProvider, + modelOverride: env.compactionModel, + }, + maxTurns: SCRIPTED_TURNS.length * 5, + onEvent(event: TraceEvent) { + switch (event.type) { + case 'tool_requests': + console.log( + colors.cyan( + `Tool requested: ${event.data.toolCalls + .map((toolCall) => `${toolCall.name}(${JSON.stringify(toolCall.args)})`) + .join(', ')}` + ) + ); + break; + case 'tool_call_end': + console.log(colors.cyan(`Tool completed: ${event.data.toolName}`)); + break; + case 'compaction_start': + compactionCount += 1; + console.log( + colors.magenta( + `Compaction started before scripted turn ${activeScriptedTurn + 1}: input=${event.data.currentInputTokens}, threshold=${event.data.thresholdTokens}, compactable=${event.data.compactableMessageCount}, preserved=${event.data.preservedMessageCount}` + ) + ); + break; + case 'compaction_end': + console.log( + colors.magenta( + `Compaction ${event.data.status}: before=${event.data.beforeInputTokens}, after=${event.data.afterInputTokens ?? '-'}, model=${event.data.model}` + ) + ); + break; + case 'final_output': + console.log(colors.green(`Final output emitted: ${String(event.data.output)}`)); + break; + } + }, + async onAfterToolExecution(toolName, result, toolContext) { + const context = mutableContext(toolContext.state.context); + const currentTurn = toolContext.state.turnCount + 1; + + if (toolName === 'update_plan') { + recordFinding(context, { + owner: 'MessageOrchestrator', + category: 'delivery', + summary: `Plan updated: ${context.currentTaskId || 'no active task'} is now active.`, + turn: currentTurn, + }); + return result; + } + + if (toolName === 'delegate_operational_analysis') { + context.becameAgentic = true; + context.delegationHistory.push({ + specialist: 'OperationalAnalyst', + query: String(toolContext.args?.input || ''), + turn: currentTurn, + }); + const summary = sanitizeFindingSummary(summarizeToolResult(result)); + if (summary) { + recordFinding(context, { + owner: 'OperationalAnalyst', + category: 'operational', + summary, + turn: currentTurn, + }); + } + return result; + } + + if (toolName === 'delegate_commercial_analysis') { + context.becameAgentic = true; + context.delegationHistory.push({ + specialist: 'CommercialAnalyst', + query: String(toolContext.args?.input || ''), + turn: currentTurn, + }); + const summary = sanitizeFindingSummary(summarizeToolResult(result)); + if (summary) { + recordFinding(context, { + owner: 'CommercialAnalyst', + category: 'commercial', + summary, + turn: currentTurn, + }); + } + return result; + } + + if (toolName === 'delegate_delivery_strategy') { + context.becameAgentic = true; + context.delegationHistory.push({ + specialist: 'DeliveryStrategist', + query: String(toolContext.args?.input || ''), + turn: currentTurn, + }); + const summary = sanitizeFindingSummary(summarizeToolResult(result)); + if (summary) { + recordFinding(context, { + owner: 'DeliveryStrategist', + category: 'delivery', + summary, + turn: currentTurn, + }); + } + return result; + } + + if (toolName === 'synthesize_final_brief') { + context.finalSynthesisRequested = true; + return result; + } + + return result; + }, + }; + + let state = buildInitialState(orchestrator); + + for (const [index, turn] of SCRIPTED_TURNS.entries()) { + activeScriptedTurn = index; + + console.log(''); + console.log(colors.bold(colors.cyan(`${turn.label} user input`))); + console.log(turn.user); + + const result = await run(appendUserMessage(state, turn.user), config); + const assistantText = readAssistantOutput(result); + + console.log(''); + console.log(colors.bold(colors.yellow(`${turn.label} assistant output`))); + console.log(assistantText); + console.log(colors.dim(`Working brief after ${turn.label}:`)); + console.log(result.finalState.context.workingBrief); + + state = result.finalState; + } + + console.log(''); + console.log(colors.bold(colors.yellow('Final transcript after the scripted conversation'))); + state.messages.forEach((message, index) => { + console.log(renderMessage(message, index)); + }); + + console.log(''); + if (compactionCount === 0) { + console.log( + colors.red( + 'Compaction did not trigger in this run. Lower COMPACTION_TRIGGER_PERCENTAGE or LITELLM_MAX_INPUT_TOKENS if your model still used materially fewer prompt tokens than expected.' + ) + ); + } else { + console.log(colors.bold(colors.green(`Demo completed with ${compactionCount} compaction event(s).`))); + } + } finally { + resetSanitizationConfig(); + } +} + +if (require.main === module) { + main().catch((error) => { + console.error(error); + process.exit(1); + }); +} diff --git a/examples/message-agents-real-llm-demo/package.json b/examples/message-agents-real-llm-demo/package.json new file mode 100644 index 0000000..319e475 --- /dev/null +++ b/examples/message-agents-real-llm-demo/package.json @@ -0,0 +1,22 @@ +{ + "name": "jaf-message-agents-real-llm-demo", + "version": "1.0.0", + "description": "Real LiteLLM message-agents style orchestration demo for JAF", + "main": "index.ts", + "scripts": { + "start": "tsx index.ts", + "dev": "tsx index.ts", + "build": "tsc" + }, + "dependencies": { + "@xynehq/jaf": "workspace:*", + "dotenv": "^17.2.1", + "openai": "^4.0.0", + "zod": "^3.22.0" + }, + "devDependencies": { + "@types/node": "^20.10.5", + "tsx": "^4.7.0", + "typescript": "^5.3.3" + } +} diff --git a/examples/message-agents-real-llm-demo/tsconfig.json b/examples/message-agents-real-llm-demo/tsconfig.json new file mode 100644 index 0000000..df05ea4 --- /dev/null +++ b/examples/message-agents-real-llm-demo/tsconfig.json @@ -0,0 +1,17 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "../.." + }, + "include": [ + "./**/*.ts", + "../../src/**/*.ts" + ], + "exclude": [ + "node_modules", + "dist", + "../../src/**/*.test.ts", + "../../src/**/__tests__/**/*.ts" + ] +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3c5f7aa..0f11ee8 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -190,6 +190,50 @@ importers: specifier: ^5.3.3 version: 5.9.3 + examples/compaction-demo: + dependencies: + '@xynehq/jaf': + specifier: workspace:* + version: link:../.. + zod: + specifier: ^3.22.0 + version: 3.25.76 + devDependencies: + '@types/node': + specifier: ^20.10.5 + version: 20.19.11 + tsx: + specifier: ^4.7.0 + version: 4.20.4 + typescript: + specifier: ^5.3.3 + version: 5.9.2 + + examples/compaction-real-llm-demo: + dependencies: + '@xynehq/jaf': + specifier: workspace:* + version: link:../.. + dotenv: + specifier: ^17.2.1 + version: 17.2.1 + openai: + specifier: ^4.0.0 + version: 4.104.0(ws@8.18.3)(zod@3.25.76) + zod: + specifier: ^3.22.0 + version: 3.25.76 + devDependencies: + '@types/node': + specifier: ^20.10.5 + version: 20.19.11 + tsx: + specifier: ^4.7.0 + version: 4.20.4 + typescript: + specifier: ^5.3.3 + version: 5.9.2 + examples/flight-booking: dependencies: '@juspay-jaf/jaf': diff --git a/src/core/compaction.ts b/src/core/compaction.ts new file mode 100644 index 0000000..38b72be --- /dev/null +++ b/src/core/compaction.ts @@ -0,0 +1,1104 @@ +import { + Agent, + CompactionConfig, + Message, + ModelProvider, + RunConfig, + RunState, + TokenLedger, + getTextContent, +} from './types.js'; +import { safeConsole } from '../utils/logger.js'; + +const DEFAULT_TRIGGER_PERCENTAGE = 0.8; +const DEFAULT_MIN_CANDIDATE_MESSAGES = 4; +const DEFAULT_COMPACTION_PREFIX = '[JAF COMPACTION SUMMARY]\n'; +const FIXED_IMAGE_TOKENS = 1844; +const BASE64_DECODE_OVERHEAD_BYTES = 650; + +type ResolvedCompactionConfig = { + readonly enabled: true; + readonly triggerPercentage: number; + readonly doNotCompactSystemPrompt: boolean; + readonly preserveLastAssistantMessage: boolean; + readonly instructions?: string; + readonly prompt?: string; + readonly rules?: string; + readonly minCandidateMessages: number; +}; + +type CompactionSegments = { + readonly boundaryIndex: number; + readonly compactableMessages: readonly Message[]; + readonly preservedMessages: readonly Message[]; +}; + +type CompactStateSuccess = { + readonly success: true; + readonly state: RunState; +}; + +type CompactStateFailure = { + readonly success: false; + readonly state: RunState; + readonly error: string; +}; + +export type CompactStateResult = CompactStateSuccess | CompactStateFailure; + +function logCompaction(message: string, metadata?: Record) { + safeConsole.log(`[JAF:COMPACTION] ${message}`, metadata ?? {}); +} + +function warnCompaction(message: string, metadata?: Record) { + safeConsole.warn(`[JAF:COMPACTION] ${message}`, metadata ?? {}); +} + +function countMessagesByRole(messages: readonly Message[]) { + return messages.reduce>((counts, message) => { + counts[message.role] = (counts[message.role] || 0) + 1; + return counts; + }, {}); +} + +// Normalizes the agent compaction setting into a fully-populated runtime config. +export function normalizeCompactionConfig( + config?: boolean | CompactionConfig +): ResolvedCompactionConfig | null { + if (config === undefined || config === false) { + return null; + } + + if (config === true) { + return { + enabled: true, + triggerPercentage: DEFAULT_TRIGGER_PERCENTAGE, + doNotCompactSystemPrompt: true, + preserveLastAssistantMessage: true, + minCandidateMessages: DEFAULT_MIN_CANDIDATE_MESSAGES, + }; + } + + if (config.enabled === false) { + return null; + } + + return { + enabled: true, + triggerPercentage: normalizeTriggerPercentage(config.triggerPercentage), + doNotCompactSystemPrompt: config.doNotCompactSystemPrompt ?? true, + preserveLastAssistantMessage: config.preserveLastAssistantMessage ?? true, + instructions: config.instructions?.trim() || undefined, + prompt: config.prompt?.trim() || undefined, + rules: config.rules?.trim() || undefined, + minCandidateMessages: normalizeMinCandidateMessages(config.minCandidateMessages), + }; +} + +// Decides whether this run should maintain token estimates for compaction-aware state updates. +export function shouldTrackTokens(state: Readonly>, agent: Readonly>): boolean { + return Boolean(state.tokenLedger) || Boolean(normalizeCompactionConfig(agent.compaction)); +} + +// Approximates text token usage with a lightweight characters-to-tokens heuristic. +export function estimateTextTokens(text: string): number { + if (!text) { + return 0; + } + return Math.ceil(text.length / 4); +} + +// Ensures the state has a token ledger aligned with the current message list. +export function ensureTokenLedger(state: Readonly>): RunState { + if (state.tokenLedger && state.tokenLedger.messageTokenEstimates.length === state.messages.length) { + logCompaction('Reusing existing token ledger.', { + messageCount: state.messages.length, + totalMessageTokens: state.tokenLedger.totalMessageTokens, + }); + return state as RunState; + } + + logCompaction('Rebuilding token ledger to match messages.', { + messageCount: state.messages.length, + existingLedgerMessageCount: state.tokenLedger?.messageTokenEstimates.length ?? 0, + }); + + return { + ...state, + tokenLedger: createTokenLedger(state.messages, undefined, state.tokenLedger), + }; +} + +// Appends messages and updates the token ledger when token tracking is active. +export function appendMessagesWithLedger( + state: Readonly>, + messagesToAppend: readonly Message[], + options?: { + readonly trackTokens?: boolean; + readonly overrides?: readonly (number | undefined)[]; + } +): RunState { + const nextMessages = [...state.messages, ...messagesToAppend]; + if (!(options?.trackTokens || state.tokenLedger)) { + logCompaction('Appending messages without token tracking.', { + previousMessageCount: state.messages.length, + appendedMessageCount: messagesToAppend.length, + nextMessageCount: nextMessages.length, + }); + return { + ...state, + messages: nextMessages, + }; + } + + const stateWithLedger = ensureTokenLedger(state); + const baseLedger = stateWithLedger.tokenLedger!; + const appendedEstimates = messagesToAppend.map((message, index) => + options?.overrides?.[index] ?? estimateMessageTokens(message) + ); + logCompaction('Appending messages with token tracking.', { + previousMessageCount: state.messages.length, + appendedMessageCount: messagesToAppend.length, + nextMessageCount: nextMessages.length, + appendedTokenEstimates: appendedEstimates, + appendedTokenTotal: sum(appendedEstimates), + previousTotalMessageTokens: baseLedger.totalMessageTokens, + }); + + return { + ...stateWithLedger, + messages: nextMessages, + tokenLedger: { + ...baseLedger, + messageTokenEstimates: [...baseLedger.messageTokenEstimates, ...appendedEstimates], + totalMessageTokens: baseLedger.totalMessageTokens + sum(appendedEstimates), + }, + }; +} + +// Replaces the message list and rebuilds the ledger so token estimates stay consistent. +export function rebuildStateWithLedger( + state: Readonly>, + messages: readonly Message[], + options?: { + readonly trackTokens?: boolean; + readonly overrides?: readonly (number | undefined)[]; + } +): RunState { + if (!(options?.trackTokens || state.tokenLedger)) { + logCompaction('Rebuilding state without token tracking.', { + previousMessageCount: state.messages.length, + nextMessageCount: messages.length, + }); + return { + ...state, + messages, + }; + } + + logCompaction('Rebuilding state with token tracking.', { + previousMessageCount: state.messages.length, + nextMessageCount: messages.length, + overrideCount: options?.overrides?.length ?? 0, + }); + + return { + ...state, + messages, + tokenLedger: createTokenLedger(messages, options?.overrides, state.tokenLedger), + }; +} + +// Refreshes cached system prompt token estimates and returns the current total input size. +export function syncSystemPromptLedger( + state: Readonly>, + agent: Readonly> +): { + readonly state: RunState; + readonly systemPromptText: string; + readonly inputTokens: number; +} { + const stateWithLedger = ensureTokenLedger(state); + const currentLedger = stateWithLedger.tokenLedger!; + const systemPromptText = agent.instructions(stateWithLedger); + + if (currentLedger.lastSystemPromptText === systemPromptText) { + logCompaction('System prompt ledger is already in sync.', { + messageCount: stateWithLedger.messages.length, + totalMessageTokens: currentLedger.totalMessageTokens, + systemPromptTokens: currentLedger.lastSystemPromptTokens, + inputTokens: currentLedger.totalMessageTokens + currentLedger.lastSystemPromptTokens, + }); + return { + state: stateWithLedger, + systemPromptText, + inputTokens: currentLedger.totalMessageTokens + currentLedger.lastSystemPromptTokens, + }; + } + + const systemPromptTokens = estimateTextTokens(systemPromptText); + logCompaction('System prompt changed. Refreshing system prompt token estimate.', { + messageCount: stateWithLedger.messages.length, + previousSystemPromptTokens: currentLedger.lastSystemPromptTokens, + nextSystemPromptTokens: systemPromptTokens, + totalMessageTokens: currentLedger.totalMessageTokens, + }); + const nextState: RunState = { + ...stateWithLedger, + tokenLedger: { + ...currentLedger, + lastSystemPromptText: systemPromptText, + lastSystemPromptTokens: systemPromptTokens, + }, + }; + + return { + state: nextState, + systemPromptText, + inputTokens: nextState.tokenLedger!.totalMessageTokens + systemPromptTokens, + }; +} + +// Compacts older transcript history before a turn when the estimated input exceeds the configured threshold. +export async function maybeCompactStateBeforeTurn( + state: Readonly>, + agent: Readonly>, + config: Readonly>, + turnNumber: number +): Promise> { + const compactionConfig = normalizeCompactionConfig(agent.compaction); + logCompaction('Checking whether compaction should run at turn start.', { + turnNumber, + agentName: agent.name, + messageCount: state.messages.length, + messageRoles: countMessagesByRole(state.messages), + hasTokenLedger: Boolean(state.tokenLedger), + compactionEnabled: Boolean(compactionConfig), + }); + if (!compactionConfig) { + logCompaction('Compaction is disabled for this agent. Skipping check.', { + turnNumber, + agentName: agent.name, + }); + return { + success: true, + state: state as RunState, + }; + } + + const synced = syncSystemPromptLedger(state, agent); + const stateWithLedger = synced.state; + const currentInputTokens = synced.inputTokens; + logCompaction('Token counts computed for compaction check.', { + turnNumber, + agentName: agent.name, + messageCount: stateWithLedger.messages.length, + totalMessageTokens: stateWithLedger.tokenLedger?.totalMessageTokens ?? 0, + systemPromptTokens: stateWithLedger.tokenLedger?.lastSystemPromptTokens ?? 0, + currentInputTokens, + }); + + const limits = await config.modelProvider.getTokenLimits?.(stateWithLedger, agent, config); + const maxInputTokens = limits?.maxInputTokens; + if (!Number.isFinite(maxInputTokens) || (maxInputTokens ?? 0) <= 0) { + warnCompaction('Main model provider did not return a valid maxInputTokens limit.', { + turnNumber, + agentName: agent.name, + reportedLimits: limits, + }); + return { + success: false, + state: stateWithLedger, + error: `Compaction is enabled for agent ${agent.name}, but the main model provider did not return a valid maxInputTokens limit.`, + }; + } + + const thresholdTokens = Math.floor((maxInputTokens as number) * compactionConfig.triggerPercentage); + logCompaction('Computed compaction threshold.', { + turnNumber, + agentName: agent.name, + maxInputTokens, + triggerPercentage: compactionConfig.triggerPercentage, + thresholdTokens, + currentInputTokens, + }); + if (currentInputTokens <= thresholdTokens) { + logCompaction('Current input is below compaction threshold. Skipping compaction.', { + turnNumber, + agentName: agent.name, + currentInputTokens, + thresholdTokens, + remainingHeadroom: thresholdTokens - currentInputTokens, + }); + return { + success: true, + state: stateWithLedger, + }; + } + + const segments = splitMessagesForCompaction(stateWithLedger.messages, compactionConfig); + const compactedMessageCount = segments.compactableMessages.length; + const preservedMessageCount = segments.preservedMessages.length; + const { provider, model, usingOverrideProvider, error: compactionProviderError } = resolveCompactionRuntime(agent, config); + logCompaction('Compaction threshold exceeded. Prepared transcript segments.', { + turnNumber, + agentName: agent.name, + currentInputTokens, + thresholdTokens, + compactedMessageCount, + preservedMessageCount, + compactedRoles: countMessagesByRole(segments.compactableMessages), + preservedRoles: countMessagesByRole(segments.preservedMessages), + boundaryIndex: segments.boundaryIndex, + }); + logCompaction('Resolved compaction runtime.', { + turnNumber, + agentName: agent.name, + model, + usingOverrideProvider, + providerResolved: Boolean(provider), + providerError: compactionProviderError, + }); + + config.onEvent?.({ + type: 'compaction_start', + data: { + turn: turnNumber, + agentName: agent.name, + thresholdTokens, + currentInputTokens, + compactableMessageCount: compactedMessageCount, + preservedMessageCount, + usingOverrideProvider, + model, + }, + }); + + if (compactedMessageCount < compactionConfig.minCandidateMessages) { + warnCompaction('Compaction threshold was exceeded, but too few messages were eligible.', { + turnNumber, + agentName: agent.name, + compactedMessageCount, + minCandidateMessages: compactionConfig.minCandidateMessages, + preservedMessageCount, + currentInputTokens, + thresholdTokens, + }); + config.onEvent?.({ + type: 'compaction_end', + data: { + turn: turnNumber, + agentName: agent.name, + status: 'skipped', + thresholdTokens, + beforeInputTokens: currentInputTokens, + compactedMessageCount, + preservedMessageCount, + reason: `Not enough messages eligible for compaction (minimum ${compactionConfig.minCandidateMessages}).`, + model, + }, + }); + + return { + success: false, + state: stateWithLedger, + error: `Context exceeded the compaction threshold for agent ${agent.name}, but only ${compactedMessageCount} messages were eligible for compaction.`, + }; + } + + if (compactionProviderError || !provider) { + warnCompaction('Compaction provider resolution failed.', { + turnNumber, + agentName: agent.name, + model, + usingOverrideProvider, + error: compactionProviderError || 'Compaction provider resolution failed.', + }); + config.onEvent?.({ + type: 'compaction_end', + data: { + turn: turnNumber, + agentName: agent.name, + status: 'failed', + thresholdTokens, + beforeInputTokens: currentInputTokens, + compactedMessageCount, + preservedMessageCount, + error: compactionProviderError || 'Compaction provider resolution failed.', + model, + }, + }); + + return { + success: false, + state: stateWithLedger, + error: compactionProviderError || 'Compaction provider resolution failed.', + }; + } + + try { + logCompaction('Invoking compaction provider.', { + turnNumber, + agentName: agent.name, + model, + compactedMessageCount, + preservedMessageCount, + }); + const compactionResponse = await provider.getCompletion( + createCompactionState(stateWithLedger, segments, compactionConfig, synced.systemPromptText), + createCompactionAgent(agent, model, compactionConfig), + createCompactionRunConfig(config, provider, model) + ); + + const summaryText = getTextContent(compactionResponse.message?.content || '').trim(); + logCompaction('Compaction provider returned a response.', { + turnNumber, + agentName: agent.name, + model, + summaryLength: summaryText.length, + usage: (compactionResponse as any)?.usage, + }); + if (!summaryText) { + warnCompaction('Compaction provider returned an empty summary.', { + turnNumber, + agentName: agent.name, + model, + }); + config.onEvent?.({ + type: 'compaction_end', + data: { + turn: turnNumber, + agentName: agent.name, + status: 'failed', + thresholdTokens, + beforeInputTokens: currentInputTokens, + compactedMessageCount, + preservedMessageCount, + error: 'Compaction provider returned an empty summary.', + model, + }, + }); + + return { + success: false, + state: stateWithLedger, + error: 'Compaction provider returned an empty summary.', + }; + } + + const summaryMessage: Message = { + role: 'assistant', + content: `${DEFAULT_COMPACTION_PREFIX}${summaryText}`, + }; + const summaryMessageTokens = normalizeUsageTokens((compactionResponse as any)?.usage?.completion_tokens ?? (compactionResponse as any)?.usage?.completionTokens) + ?? estimateMessageTokens(summaryMessage); + const preservedOverrides = stateWithLedger.tokenLedger!.messageTokenEstimates.slice(segments.boundaryIndex); + logCompaction('Rebuilding transcript with compaction summary.', { + turnNumber, + agentName: agent.name, + summaryMessageTokens, + preservedOverrideCount: preservedOverrides.length, + previousMessageCount: stateWithLedger.messages.length, + nextMessageCount: 1 + segments.preservedMessages.length, + }); + const rebuiltState = rebuildStateWithLedger( + stateWithLedger, + [summaryMessage, ...segments.preservedMessages], + { + trackTokens: true, + overrides: [summaryMessageTokens, ...preservedOverrides], + } + ); + const syncedRebuiltState = syncSystemPromptLedger(rebuiltState, agent); + logCompaction('Recomputed token counts after rebuilding compacted transcript.', { + turnNumber, + agentName: agent.name, + rebuiltMessageCount: syncedRebuiltState.state.messages.length, + rebuiltTotalMessageTokens: syncedRebuiltState.state.tokenLedger?.totalMessageTokens ?? 0, + rebuiltInputTokens: syncedRebuiltState.inputTokens, + thresholdTokens, + }); + + if (syncedRebuiltState.inputTokens > thresholdTokens) { + warnCompaction('Compaction completed, but rebuilt transcript still exceeds threshold.', { + turnNumber, + agentName: agent.name, + beforeInputTokens: currentInputTokens, + afterInputTokens: syncedRebuiltState.inputTokens, + thresholdTokens, + summaryMessageTokens, + }); + config.onEvent?.({ + type: 'compaction_end', + data: { + turn: turnNumber, + agentName: agent.name, + status: 'failed', + thresholdTokens, + beforeInputTokens: currentInputTokens, + afterInputTokens: syncedRebuiltState.inputTokens, + compactedMessageCount, + preservedMessageCount, + summaryMessageTokens, + error: 'Compaction completed but the rebuilt transcript still exceeds the configured threshold.', + model, + }, + }); + + return { + success: false, + state: syncedRebuiltState.state, + error: 'Compaction completed but the rebuilt transcript still exceeds the configured threshold.', + }; + } + + config.onEvent?.({ + type: 'compaction_end', + data: { + turn: turnNumber, + agentName: agent.name, + status: 'success', + thresholdTokens, + beforeInputTokens: currentInputTokens, + afterInputTokens: syncedRebuiltState.inputTokens, + compactedMessageCount, + preservedMessageCount, + summaryMessageTokens, + model, + }, + }); + + logCompaction('Compaction succeeded.', { + turnNumber, + agentName: agent.name, + compactedMessageCount, + preservedMessageCount, + beforeInputTokens: currentInputTokens, + afterInputTokens: syncedRebuiltState.inputTokens, + summaryMessageTokens, + model, + }); + + return { + success: true, + state: syncedRebuiltState.state, + }; + } catch (error) { + const detail = error instanceof Error ? error.message : String(error); + warnCompaction('Compaction provider threw an error.', { + turnNumber, + agentName: agent.name, + model, + error: detail, + compactedMessageCount, + preservedMessageCount, + }); + config.onEvent?.({ + type: 'compaction_end', + data: { + turn: turnNumber, + agentName: agent.name, + status: 'failed', + thresholdTokens, + beforeInputTokens: currentInputTokens, + compactedMessageCount, + preservedMessageCount, + error: detail, + model, + }, + }); + + return { + success: false, + state: stateWithLedger, + error: detail, + }; + } +} + +// Converts trigger percentages like 80 or 0.8 into a validated fraction with defaults. +function normalizeTriggerPercentage(value?: number): number { + if (value === undefined || !Number.isFinite(value)) { + return DEFAULT_TRIGGER_PERCENTAGE; + } + + if (value > 1 && value <= 100) { + return value / 100; + } + + if (value <= 0 || value > 1) { + return DEFAULT_TRIGGER_PERCENTAGE; + } + + return value; +} + +// Normalizes the minimum message count required before compaction can run. +function normalizeMinCandidateMessages(value?: number): number { + if (value === undefined || !Number.isFinite(value)) { + return DEFAULT_MIN_CANDIDATE_MESSAGES; + } + + return Math.max(1, Math.floor(value)); +} + +// Sums numeric token estimates into a single total. +function sum(values: readonly number[]): number { + return values.reduce((total, value) => total + value, 0); +} + +// Builds a fresh token ledger for a message list, optionally reusing known estimates. +function createTokenLedger( + messages: readonly Message[], + overrides?: readonly (number | undefined)[], + seed?: Readonly +): TokenLedger { + const messageTokenEstimates = messages.map((message, index) => overrides?.[index] ?? estimateMessageTokens(message)); + const totalMessageTokens = sum(messageTokenEstimates); + logCompaction('Created token ledger snapshot.', { + messageCount: messages.length, + totalMessageTokens, + overrideCount: overrides?.filter(value => value !== undefined).length ?? 0, + messageRoles: countMessagesByRole(messages), + }); + + return { + messageTokenEstimates, + totalMessageTokens, + lastSystemPromptText: seed?.lastSystemPromptText, + lastSystemPromptTokens: seed?.lastSystemPromptTokens ?? 0, + }; +} + +// Estimates total tokens for a message, including content, tool metadata, and attachments. +function estimateMessageTokens(message: Readonly): number { + let total = estimateContentTokens(message.content); + + if (message.tool_calls && message.tool_calls.length > 0) { + total += estimateTextTokens(JSON.stringify(message.tool_calls)); + } + + if (message.attachments && message.attachments.length > 0) { + total += message.attachments.reduce((sumTokens, attachment) => sumTokens + estimateAttachmentTokens(attachment), 0); + } + + if (message.tool_call_id) { + total += estimateTextTokens(message.tool_call_id); + } + + return total; +} + +// Estimates token usage for message content across plain text and structured content parts. +function estimateContentTokens(content: Message['content']): number { + if (typeof content === 'string') { + return estimateTextTokens(content); + } + + if (Array.isArray(content)) { + return content.reduce((total, part) => { + if (part.type === 'text') { + return total + estimateTextTokens(part.text); + } + + if (part.type === 'image_url') { + return total + estimateImageUrlTokens(part.image_url.url); + } + + if (part.type === 'file') { + return total + estimateTextTokens(JSON.stringify(part.file)); + } + + return total; + }, 0); + } + + return estimateTextTokens(getTextContent(content)); +} + +// Estimates attachment token cost using fixed image cost, encoded payload size, or a placeholder fallback. +// Note: estimates non-image attachment tokens from base64 size, but src/providers/model.ts does not send most documents as base64 to the model. It extracts text or falls back to a short placeholder. So compaction thresholds can be materially wrong for document-heavy chats. +function estimateAttachmentTokens(attachment: NonNullable[number]): number { + if (attachment.kind === 'image') { + return FIXED_IMAGE_TOKENS; + } + + if (attachment.data) { + return estimateEncodedTokens(attachment.data.length); + } + + if (attachment.url?.startsWith('data:')) { + const encodedPayload = attachment.url.split(',', 2)[1] || ''; + return estimateEncodedTokens(encodedPayload.length); + } + + const placeholder = `${attachment.kind}:${attachment.name || attachment.mimeType || attachment.format || 'attachment'}${attachment.url ? `:${attachment.url}` : ''}`; + return estimateTextTokens(placeholder); +} + +// Assigns the current fixed token estimate for image URLs and inline image data. +function estimateImageUrlTokens(url: string): number { + if (url.startsWith('data:')) { + return FIXED_IMAGE_TOKENS; + } + return FIXED_IMAGE_TOKENS; +} + +// Converts a base64 payload length into an approximate token count after decoding overhead. +function estimateEncodedTokens(encodedLength: number): number { + const estimatedBytes = Math.max(0, Math.floor((encodedLength * 3) / 4 - BASE64_DECODE_OVERHEAD_BYTES)); + return Math.ceil(estimatedBytes / 4); +} + +// Validates provider-reported usage values before reusing them as ledger entries. +function normalizeUsageTokens(value: unknown): number | undefined { + if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) { + return undefined; + } + + return Math.ceil(value); +} + +// Splits the transcript into the compactable prefix and the live suffix that must be preserved. +function splitMessagesForCompaction( + messages: readonly Message[], + compactionConfig: ResolvedCompactionConfig +): CompactionSegments { + const boundaryIndex = resolveCompactionBoundary(messages, compactionConfig); + const segments = { + boundaryIndex, + compactableMessages: messages.slice(0, boundaryIndex), + preservedMessages: messages.slice(boundaryIndex), + }; + logCompaction('Split transcript for compaction.', { + totalMessageCount: messages.length, + boundaryIndex, + compactableMessageCount: segments.compactableMessages.length, + preservedMessageCount: segments.preservedMessages.length, + preserveLastAssistantMessage: compactionConfig.preserveLastAssistantMessage, + }); + + return segments; +} + +// Chooses the compaction cut-off so the recent conversational suffix remains intact. +function resolveCompactionBoundary( + messages: readonly Message[], + compactionConfig: ResolvedCompactionConfig +): number { + let boundaryIndex = messages.length; + + const lastUserIndex = findLastIndex(messages, message => message.role === 'user'); + if (lastUserIndex >= 0) { + boundaryIndex = Math.min(boundaryIndex, lastUserIndex); + } + + if (compactionConfig.preserveLastAssistantMessage) { + const lastAssistantIndex = findLastIndex(messages, message => message.role === 'assistant'); + if (lastAssistantIndex >= 0) { + boundaryIndex = Math.min(boundaryIndex, lastAssistantIndex); + } + } + + const liveBoundary = findLiveSuffixBoundary(messages); + if (liveBoundary >= 0) { + boundaryIndex = Math.min(boundaryIndex, liveBoundary); + } + + if (!Number.isFinite(boundaryIndex)) { + return 0; + } + + const resolvedBoundary = Math.max(0, Math.min(boundaryIndex, messages.length)); + logCompaction('Resolved compaction boundary.', { + totalMessageCount: messages.length, + lastUserIndex, + preserveLastAssistantMessage: compactionConfig.preserveLastAssistantMessage, + liveSuffixBoundary: liveBoundary, + resolvedBoundary, + }); + return resolvedBoundary; +} + +// Finds the earliest message that must remain because the tail contains live tool or clarification state. +function findLiveSuffixBoundary(messages: readonly Message[]): number { + const pendingToolCallBoundary = findPendingToolCallBoundary(messages); + if (pendingToolCallBoundary >= 0) { + return pendingToolCallBoundary; + } + + const lastMessage = messages[messages.length - 1]; + if (lastMessage?.role === 'tool') { + const toolStatus = tryReadToolStatus(lastMessage); + if (toolStatus === 'halted' || toolStatus === 'awaiting_clarification') { + const toolCallId = lastMessage.tool_call_id; + if (toolCallId) { + const assistantIndex = findLastIndex( + messages, + (message, index) => + index < messages.length - 1 && + message.role === 'assistant' && + Boolean(message.tool_calls?.some(toolCall => toolCall.id === toolCallId)) + ); + if (assistantIndex >= 0) { + return assistantIndex; + } + } + + return messages.length - 1; + } + } + + return -1; +} + +// Finds the assistant message where an unresolved tool call sequence begins. +function findPendingToolCallBoundary(messages: readonly Message[]): number { + for (let index = messages.length - 1; index >= 0; index--) { + const message = messages[index]; + if (message.role !== 'assistant' || !message.tool_calls || message.tool_calls.length === 0) { + continue; + } + + const pendingToolCall = message.tool_calls.some(toolCall => !hasMatchingToolResultAfter(messages, index, toolCall.id)); + if (pendingToolCall) { + return index; + } + } + + return -1; +} + +// Checks whether a tool call already has a matching tool-result message later in the transcript. +function hasMatchingToolResultAfter(messages: readonly Message[], assistantIndex: number, toolCallId: string): boolean { + for (let index = assistantIndex + 1; index < messages.length; index++) { + const message = messages[index]; + if (message.role === 'tool' && message.tool_call_id === toolCallId) { + return true; + } + } + + return false; +} + +// Reads a serialized tool status from a tool message when the content is JSON-shaped. +function tryReadToolStatus(message: Readonly): string | undefined { + if (message.role !== 'tool') { + return undefined; + } + + try { + const content = JSON.parse(getTextContent(message.content)); + return typeof content?.status === 'string' ? content.status : undefined; + } catch { + return undefined; + } +} + +// Returns the last index matching a predicate without relying on newer runtime helpers. +function findLastIndex( + values: readonly T[], + predicate: (value: T, index: number) => boolean +): number { + for (let index = values.length - 1; index >= 0; index--) { + if (predicate(values[index], index)) { + return index; + } + } + + return -1; +} + +// Builds the one-message state sent to the compaction model. +function createCompactionState( + state: Readonly>, + segments: Readonly, + compactionConfig: Readonly, + systemPromptText: string +): RunState { + return { + ...state, + currentAgentName: `${state.currentAgentName}:compaction`, + messages: [ + { + role: 'user', + content: buildCompactionTranscript(segments.compactableMessages, systemPromptText, compactionConfig), + }, + ], + tokenLedger: undefined, + }; +} + +// Renders the compactable transcript into a plain-text prompt for the compaction model. +function buildCompactionTranscript( + messages: readonly Message[], + systemPromptText: string, + compactionConfig: Readonly +): string { + const sections: string[] = compactionConfig.prompt + ? [compactionConfig.prompt] + : [ + 'Compact the following conversation history into a concise summary that preserves goals, facts, decisions, constraints, unresolved questions, approvals, clarifications, and important tool outputs.', + 'Return plain text only.', + ]; + + if (!compactionConfig.doNotCompactSystemPrompt) { + sections.push(`SYSTEM PROMPT:\n${systemPromptText}`); + } + + if (compactionConfig.rules) { + sections.push(`ADDITIONAL COMPACTION RULES:\n${compactionConfig.rules}`); + } + + sections.push(`TRANSCRIPT:\n${messages.map(formatMessageForCompaction).join('\n\n')}`); + return sections.join('\n\n'); +} + +// Serializes a single message into a compact, role-labelled text block. +function formatMessageForCompaction(message: Readonly): string { + const lines: string[] = [`[${message.role.toUpperCase()}]`]; + const body = describeMessageBody(message); + if (body) { + lines.push(body); + } + + if (message.tool_calls && message.tool_calls.length > 0) { + lines.push(`Tool Calls: ${JSON.stringify(message.tool_calls)}`); + } + + if (message.tool_call_id) { + lines.push(`Tool Call ID: ${message.tool_call_id}`); + } + + return lines.join('\n'); +} + +// Extracts human-readable content from a message, including placeholders for non-text parts. +function describeMessageBody(message: Readonly): string { + const fragments: string[] = []; + + if (typeof message.content === 'string') { + if (message.content.trim().length > 0) { + fragments.push(message.content); + } + } else if (Array.isArray(message.content)) { + for (const part of message.content) { + if (part.type === 'text' && part.text.trim().length > 0) { + fragments.push(part.text); + } else if (part.type === 'image_url') { + fragments.push('[Image content]'); + } else if (part.type === 'file') { + fragments.push(`[File content: ${JSON.stringify(part.file)}]`); + } + } + } else { + const text = getTextContent(message.content); + if (text.trim().length > 0) { + fragments.push(text); + } + } + + if (message.attachments && message.attachments.length > 0) { + for (const attachment of message.attachments) { + fragments.push( + `[${attachment.kind === 'image' ? 'Image' : 'Attachment'} attachment: ${attachment.name || attachment.mimeType || attachment.format || 'unknown'}]` + ); + } + } + + return fragments.join('\n'); +} + +// Creates the minimal agent definition used exclusively for the compaction LLM call. +function createCompactionAgent( + agent: Readonly>, + model: string, + compactionConfig: Readonly +): Agent { + const instructionLines = compactionConfig.instructions + ? [compactionConfig.instructions] + : [ + 'You summarize older conversation history for JAF core compaction.', + 'Preserve user intent, important facts, constraints, important tool outputs, approvals, clarifications, and unresolved threads.', + 'Do not invent details.', + 'Return plain text only.', + ]; + + if (compactionConfig.rules) { + instructionLines.push(`Additional rules:\n${compactionConfig.rules}`); + } + + return { + name: `${agent.name}_compaction`, + instructions: () => instructionLines.join('\n\n'), + tools: [], + modelConfig: { + name: model || undefined, + temperature: 0, + }, + }; +} + +// Derives the run config used for the compaction call, including any provider override. +function createCompactionRunConfig( + config: Readonly>, + provider: ModelProvider, + model: string +): RunConfig { + return { + ...config, + modelProvider: provider, + modelOverride: model || config.modelOverride, + }; +} + +// Resolves which provider and model should execute the compaction request. +function resolveCompactionRuntime( + agent: Readonly>, + config: Readonly> +): { + readonly provider?: ModelProvider; + readonly model: string; + readonly usingOverrideProvider: boolean; + readonly error?: string; +} { + const provider = config.compaction?.modelProvider ?? config.modelProvider; + const model = config.compaction?.modelOverride ?? agent.modelConfig?.name ?? config.modelOverride ?? ''; + const usingOverrideProvider = Boolean(config.compaction?.modelProvider && config.compaction.modelProvider !== config.modelProvider); + + if (!provider?.getCompletion) { + warnCompaction('Resolved compaction runtime without a usable provider.', { + model, + usingOverrideProvider, + }); + return { + model, + usingOverrideProvider, + error: 'Compaction provider does not implement getCompletion.', + }; + } + + if (!model && !provider.isAiSdkProvider) { + warnCompaction('Resolved compaction runtime without a model.', { + usingOverrideProvider, + providerIsAiSdkProvider: Boolean(provider.isAiSdkProvider), + }); + return { + provider, + model, + usingOverrideProvider, + error: 'No model is configured for compaction.', + }; + } + + logCompaction('Resolved compaction runtime successfully.', { + model, + usingOverrideProvider, + providerIsAiSdkProvider: Boolean(provider.isAiSdkProvider), + }); + return { + provider, + model, + usingOverrideProvider, + }; +} diff --git a/src/core/engine.ts b/src/core/engine.ts index e24ca96..a59e3ac 100644 --- a/src/core/engine.ts +++ b/src/core/engine.ts @@ -16,6 +16,7 @@ import { } from './types.js'; import { setToolRuntime } from './tool-runtime.js'; import { buildEffectiveGuardrails, executeInputGuardrailsParallel, executeInputGuardrailsSequential, executeOutputGuardrails } from './guardrails.js'; +import { appendMessagesWithLedger, maybeCompactStateBeforeTurn, rebuildStateWithLedger, shouldTrackTokens } from './compaction.js'; import { safeConsole, isVerboseLogging } from '../utils/logger.js'; import { DEFAULT_CLARIFICATION_DESCRIPTION } from '../utils/constants.js'; @@ -328,10 +329,16 @@ async function tryResumePendingToolCalls( .filter((it): it is Interruption => it !== undefined); if (interruptions.length > 0) { const nonInterruptedResults = toolResults.filter(r => !r.interruption); + const stateWithToolResults = appendMessagesWithLedger( + state, + nonInterruptedResults.map(result => result.message), + { + trackTokens: shouldTrackTokens(state, currentAgent), + } + ); return { finalState: { - ...state, - messages: [...state.messages, ...nonInterruptedResults.map(r => r.message)], + ...stateWithToolResults, turnCount: state.turnCount, }, outcome: { @@ -346,12 +353,17 @@ async function tryResumePendingToolCalls( data: { results: toolResults.map(r => r.message) } }); - const nextState: RunState = { - ...state, - messages: [...state.messages, ...toolResults.map(r => r.message)], - turnCount: state.turnCount, - approvals: state.approvals ?? new Map(), - }; + const nextState = appendMessagesWithLedger( + { + ...state, + turnCount: state.turnCount, + approvals: state.approvals ?? new Map(), + }, + toolResults.map(result => result.message), + { + trackTokens: shouldTrackTokens(state, currentAgent), + } + ); return await runInternal(nextState, config); } } @@ -495,6 +507,13 @@ async function runInternal( tools: effectiveTools }; + const turnNumber = state.turnCount + 1; + const compactionResult = await maybeCompactStateBeforeTurn(state, effectiveAgent, config, turnNumber); + if (!compactionResult.success) { + safeConsole.warn(`[JAF:COMPACTION] ${compactionResult.error}. Continuing without aborting the run.`); + } + state = compactionResult.state; + safeConsole.log(`[JAF:ENGINE] Using agent: ${effectiveAgent.name}`); if (isVerboseLogging() && effectiveTools) { safeConsole.log(`[JAF:ENGINE] Available tools:`, effectiveTools.map(t => t.schema.name)); @@ -541,8 +560,7 @@ async function runInternal( } }; } - - const turnNumber = state.turnCount + 1; + config.onEvent?.({ type: 'turn_start', data: { turn: turnNumber, agentName: currentAgent.name } }); const llmCallData = { @@ -902,7 +920,16 @@ async function runInternal( }); } - const newMessages = [...state.messages, assistantMessage]; + const assistantMessageTokenEstimate = getCompletionTokenEstimate((llmResponse as any)?.usage); + const stateWithAssistantMessage = appendMessagesWithLedger( + state, + [assistantMessage], + { + trackTokens: shouldTrackTokens(state, effectiveAgent), + overrides: [assistantMessageTokenEstimate] + } + ); + const newMessages = stateWithAssistantMessage.messages; const updatedTurnCount = state.turnCount + 1; if (llmResponse.message.tool_calls && llmResponse.message.tool_calls.length > 0) { @@ -959,19 +986,28 @@ async function runInternal( } } - const interruptedState = { - ...state, - messages: [...newMessages, ...completedToolResults.map(r => r.message)], + const interruptedState = appendMessagesWithLedger( + { + ...stateWithAssistantMessage, + turnCount: updatedTurnCount, + approvals: updatedApprovals, + clarifications: updatedClarifications, + }, + completedToolResults.map(result => result.message), + { + trackTokens: shouldTrackTokens(stateWithAssistantMessage, effectiveAgent), + } + ); + const interruptedStateWithTurn = { + ...interruptedState, turnCount: updatedTurnCount, - approvals: updatedApprovals, - clarifications: updatedClarifications, }; if (config.memory?.autoStore && config.conversationId) { safeConsole.log(`[JAF:ENGINE] Storing conversation state due to interruption for ${config.conversationId}`); const stateForStorage = { - ...interruptedState, - messages: [...interruptedState.messages, ...approvalRequiredResults.map(r => r.message)] + ...interruptedStateWithTurn, + messages: [...interruptedStateWithTurn.messages, ...approvalRequiredResults.map(r => r.message)] }; await storeConversationHistory(stateForStorage, config); } @@ -979,12 +1015,12 @@ async function runInternal( await runTurnEndHooks(config, { turn: turnNumber, agentName: currentAgent.name, - state: interruptedState, + state: interruptedStateWithTurn, lastAssistantMessage: assistantMessage }); return { - finalState: interruptedState, + finalState: interruptedStateWithTurn, outcome: { status: 'interrupted', interruptions, @@ -1009,7 +1045,10 @@ async function runInternal( type: 'handoff_denied', data: { from: currentAgent.name, to: targetAgent, reason: `Agent ${currentAgent.name} cannot handoff to ${targetAgent}` } }); - const failureState = { ...state, messages: newMessages, turnCount: updatedTurnCount }; + const failureState = { + ...stateWithAssistantMessage, + turnCount: updatedTurnCount, + }; await runTurnEndHooks(config, { turn: turnNumber, agentName: currentAgent.name, @@ -1048,13 +1087,18 @@ async function runInternal( } }); - const nextState: RunState = { - ...state, - messages: [...cleanedNewMessages, ...toolResults.map(r => r.message)], - currentAgentName: targetAgent, - turnCount: updatedTurnCount, - approvals: state.approvals ?? new Map(), - }; + const nextState = rebuildStateWithLedger( + { + ...stateWithAssistantMessage, + currentAgentName: targetAgent, + turnCount: updatedTurnCount, + approvals: state.approvals ?? new Map(), + }, + [...cleanedNewMessages, ...toolResults.map(r => r.message)], + { + trackTokens: shouldTrackTokens(stateWithAssistantMessage, effectiveAgent), + } + ); await runTurnEndHooks(config, { turn: turnNumber, agentName: currentAgent.name, @@ -1080,12 +1124,17 @@ async function runInternal( } }); - const nextState: RunState = { - ...state, - messages: [...cleanedNewMessages, ...toolResults.map(r => r.message)], - turnCount: updatedTurnCount, - approvals: state.approvals ?? new Map(), - }; + const nextState = rebuildStateWithLedger( + { + ...stateWithAssistantMessage, + turnCount: updatedTurnCount, + approvals: state.approvals ?? new Map(), + }, + [...cleanedNewMessages, ...toolResults.map(r => r.message)], + { + trackTokens: shouldTrackTokens(stateWithAssistantMessage, effectiveAgent), + } + ); await runTurnEndHooks(config, { turn: turnNumber, agentName: currentAgent.name, @@ -1103,14 +1152,18 @@ async function runInternal( if (!parseResult.success) { config.onEvent?.({ type: 'decode_error', data: { errors: parseResult.error.issues } }); + const stateAfterAssistant = { + ...stateWithAssistantMessage, + turnCount: updatedTurnCount, + }; await runTurnEndHooks(config, { turn: turnNumber, agentName: currentAgent.name, - state: { ...state, messages: newMessages, turnCount: updatedTurnCount }, + state: stateAfterAssistant, lastAssistantMessage: assistantMessage }); return { - finalState: { ...state, messages: newMessages, turnCount: updatedTurnCount }, + finalState: stateAfterAssistant, outcome: { status: 'error', error: { @@ -1140,14 +1193,18 @@ async function runInternal( } } if (!outputGuardrailResult.isValid) { + const stateAfterAssistant = { + ...stateWithAssistantMessage, + turnCount: updatedTurnCount, + }; await runTurnEndHooks(config, { turn: turnNumber, agentName: currentAgent.name, - state: { ...state, messages: newMessages, turnCount: updatedTurnCount }, + state: stateAfterAssistant, lastAssistantMessage: assistantMessage }); return { - finalState: { ...state, messages: newMessages, turnCount: updatedTurnCount }, + finalState: stateAfterAssistant, outcome: { status: 'error', error: { @@ -1160,15 +1217,19 @@ async function runInternal( config.onEvent?.({ type: 'final_output', data: { output: parseResult.data } }); // End of turn + const completedState = { + ...stateWithAssistantMessage, + turnCount: updatedTurnCount, + }; await runTurnEndHooks(config, { turn: turnNumber, agentName: currentAgent.name, - state: { ...state, messages: newMessages, turnCount: updatedTurnCount }, + state: completedState, lastAssistantMessage: assistantMessage }); return { - finalState: { ...state, messages: newMessages, turnCount: updatedTurnCount }, + finalState: completedState, outcome: { status: 'completed', output: parseResult.data as Out @@ -1194,14 +1255,18 @@ async function runInternal( } } if (!outputGuardrailResult.isValid) { + const stateAfterAssistant = { + ...stateWithAssistantMessage, + turnCount: updatedTurnCount, + }; await runTurnEndHooks(config, { turn: turnNumber, agentName: currentAgent.name, - state: { ...state, messages: newMessages, turnCount: updatedTurnCount }, + state: stateAfterAssistant, lastAssistantMessage: assistantMessage }); return { - finalState: { ...state, messages: newMessages, turnCount: updatedTurnCount }, + finalState: stateAfterAssistant, outcome: { status: 'error', error: { @@ -1214,15 +1279,19 @@ async function runInternal( config.onEvent?.({ type: 'final_output', data: { output: llmResponse.message.content } }); // End of turn + const completedState = { + ...stateWithAssistantMessage, + turnCount: updatedTurnCount, + }; await runTurnEndHooks(config, { turn: turnNumber, agentName: currentAgent.name, - state: { ...state, messages: newMessages, turnCount: updatedTurnCount }, + state: completedState, lastAssistantMessage: assistantMessage }); return { - finalState: { ...state, messages: newMessages, turnCount: updatedTurnCount }, + finalState: completedState, outcome: { status: 'completed', output: llmResponse.message.content as Out @@ -1234,13 +1303,13 @@ async function runInternal( await runTurnEndHooks(config, { turn: turnNumber, agentName: currentAgent.name, - state: { ...state, messages: newMessages, turnCount: updatedTurnCount }, + state: { ...stateWithAssistantMessage, turnCount: updatedTurnCount }, lastAssistantMessage: assistantMessage }); safeConsole.error(`[JAF:ENGINE] No tool calls or content returned by model. LLMResponse: `, llmResponse); return { - finalState: { ...state, messages: newMessages, turnCount: updatedTurnCount }, + finalState: { ...stateWithAssistantMessage, turnCount: updatedTurnCount }, outcome: { status: 'error', error: { @@ -1251,6 +1320,15 @@ async function runInternal( }; } +function getCompletionTokenEstimate(usage: any): number | undefined { + const completionTokens = usage?.completion_tokens ?? usage?.completionTokens; + if (typeof completionTokens !== 'number' || !Number.isFinite(completionTokens) || completionTokens <= 0) { + return undefined; + } + + return Math.ceil(completionTokens); +} + type ToolCallResult = { message: Message; isHandoff?: boolean; @@ -1745,4 +1823,3 @@ async function storeConversationHistory( safeConsole.log(`[JAF:MEMORY] Stored ${messagesToStore.length} messages for conversation ${config.conversationId}`); } - diff --git a/src/core/tracing.ts b/src/core/tracing.ts index 668b5ba..a299d10 100644 --- a/src/core/tracing.ts +++ b/src/core/tracing.ts @@ -320,6 +320,12 @@ export class ConsoleTraceCollector implements TraceCollector { case 'tool_call_end': console.log(`${prefix} Tool ${event.data.toolName} completed`); break; + case 'compaction_start': + console.log(`${prefix} Compaction started for ${event.data.agentName} (turn ${event.data.turn}) threshold=${event.data.thresholdTokens} current=${event.data.currentInputTokens}`); + break; + case 'compaction_end': + console.log(`${prefix} Compaction ${event.data.status} for ${event.data.agentName} (turn ${event.data.turn})`); + break; case 'handoff': console.log(`${prefix} Agent handoff: ${event.data.from} → ${event.data.to}`); break; diff --git a/src/core/types.ts b/src/core/types.ts index b512ec0..59b05a2 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -75,6 +75,25 @@ export type ModelConfig = { readonly reasoning?: ReasoningConfig; }; +export type CompactionConfig = { + readonly enabled: boolean; + readonly triggerPercentage?: number; + readonly doNotCompactSystemPrompt?: boolean; + readonly preserveLastAssistantMessage?: boolean; + /** + * Overrides the default system instructions used for the compaction model. + * Additional `rules`, when provided, are still appended below these instructions. + */ + readonly instructions?: string; + /** + * Overrides the default user prompt preamble used before the compactable transcript. + * The runtime still appends the optional system prompt, additional `rules`, and transcript. + */ + readonly prompt?: string; + readonly rules?: string; + readonly minCandidateMessages?: number; +}; + export type Tool = { readonly schema: { readonly name: string; @@ -139,6 +158,7 @@ export type Agent = { readonly handoffs?: readonly string[]; readonly modelConfig?: ModelConfig; readonly advancedConfig?: AdvancedConfig; + readonly compaction?: boolean | CompactionConfig; }; export type Guardrail = ( @@ -164,6 +184,13 @@ export enum InterruptionStatus { ApprovedAndExecuted = 'approved_and_executed' } +export type TokenLedger = { + readonly messageTokenEstimates: readonly number[]; + readonly totalMessageTokens: number; + readonly lastSystemPromptText?: string; + readonly lastSystemPromptTokens: number; +}; + export type RunState = { readonly runId: RunId; readonly traceId: TraceId; @@ -173,6 +200,7 @@ export type RunState = { readonly turnCount: number; readonly approvals?: ReadonlyMap; readonly clarifications?: ReadonlyMap; + readonly tokenLedger?: TokenLedger; }; export type JAFError = @@ -248,7 +276,9 @@ export type TraceEvent = | { type: 'turn_end'; data: { turn: number; agentName: string } } | { type: 'run_end'; data: { outcome: RunResult['outcome']; finalState: RunState; traceId: TraceId; runId: RunId; } } | { type: 'clarification_requested'; data: { clarificationId: string; question: string; options: readonly ClarificationOption[]; context?: any; } } - | { type: 'clarification_provided'; data: { clarificationId: string; selectedOption: ClarificationOption; selectedId: string; } }; + | { type: 'clarification_provided'; data: { clarificationId: string; selectedOption: ClarificationOption; selectedId: string; } } + | { type: 'compaction_start'; data: { turn: number; agentName: string; thresholdTokens: number; currentInputTokens: number; compactableMessageCount: number; preservedMessageCount: number; usingOverrideProvider: boolean; model: string; } } + | { type: 'compaction_end'; data: { turn: number; agentName: string; status: 'success' | 'skipped' | 'failed'; thresholdTokens: number; beforeInputTokens: number; afterInputTokens?: number; compactedMessageCount: number; preservedMessageCount: number; summaryMessageTokens?: number; reason?: string; error?: string; model: string; } }; /** * Helper type to extract event data by event type @@ -419,6 +449,11 @@ export type CompletionStreamChunk = { readonly raw?: any; }; +export type ModelTokenLimits = { + readonly maxInputTokens: number; + readonly maxOutputTokens?: number; +}; + export interface ModelProvider { isAiSdkProvider?: boolean; getCompletion: ( @@ -436,8 +471,18 @@ export interface ModelProvider { agent: Readonly>, config: Readonly> ) => AsyncGenerator; + getTokenLimits?: ( + state: Readonly>, + agent: Readonly>, + config: Readonly> + ) => Promise | ModelTokenLimits | undefined; } +export type CompactionRuntimeConfig = { + readonly modelProvider?: ModelProvider; + readonly modelOverride?: string; +}; + export type RunConfig = { readonly agentRegistry: ReadonlyMap>; readonly modelProvider: ModelProvider; @@ -467,6 +512,7 @@ export type RunConfig = { readonly memory?: MemoryConfig; readonly conversationId?: string; readonly approvalStorage?: ApprovalStorage; + readonly compaction?: CompactionRuntimeConfig; readonly defaultFastModel?: string; readonly allowClarificationRequests?: boolean; readonly clarificationDescription?: string; From e15f6f3e962d58761bd6fe72bf54399261e49b1e Mon Sep 17 00:00:00 2001 From: Aayush Shah Date: Thu, 7 May 2026 14:22:04 +0530 Subject: [PATCH 2/3] fix: align compaction workspace lockfile --- .../package.json | 2 +- examples/message-agents-real-llm-demo/package.json | 2 +- pnpm-lock.yaml | 14 ++++++++++---- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/examples/compaction-real-llm-multi-turn-demo/package.json b/examples/compaction-real-llm-multi-turn-demo/package.json index 0eb8de3..7864c6b 100644 --- a/examples/compaction-real-llm-multi-turn-demo/package.json +++ b/examples/compaction-real-llm-multi-turn-demo/package.json @@ -9,7 +9,7 @@ "build": "tsc" }, "dependencies": { - "@xynehq/jaf": "workspace:*", + "@juspay-jaf/jaf": "workspace:*", "dotenv": "^17.2.1", "openai": "^4.0.0", "zod": "^3.22.0" diff --git a/examples/message-agents-real-llm-demo/package.json b/examples/message-agents-real-llm-demo/package.json index 319e475..375d6e9 100644 --- a/examples/message-agents-real-llm-demo/package.json +++ b/examples/message-agents-real-llm-demo/package.json @@ -9,7 +9,7 @@ "build": "tsc" }, "dependencies": { - "@xynehq/jaf": "workspace:*", + "@juspay-jaf/jaf": "workspace:*", "dotenv": "^17.2.1", "openai": "^4.0.0", "zod": "^3.22.0" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0f11ee8..68319d1 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -190,11 +190,17 @@ importers: specifier: ^5.3.3 version: 5.9.3 - examples/compaction-demo: + examples/compaction-real-llm-multi-turn-demo: dependencies: - '@xynehq/jaf': + '@juspay-jaf/jaf': specifier: workspace:* version: link:../.. + dotenv: + specifier: ^17.2.1 + version: 17.2.1 + openai: + specifier: ^4.0.0 + version: 4.104.0(ws@8.18.3)(zod@3.25.76) zod: specifier: ^3.22.0 version: 3.25.76 @@ -209,9 +215,9 @@ importers: specifier: ^5.3.3 version: 5.9.2 - examples/compaction-real-llm-demo: + examples/message-agents-real-llm-demo: dependencies: - '@xynehq/jaf': + '@juspay-jaf/jaf': specifier: workspace:* version: link:../.. dotenv: From 4637f1a56e8f8ace4b3ec9f2260a7d30e75313b2 Mon Sep 17 00:00:00 2001 From: Aayush Shah Date: Thu, 7 May 2026 14:25:28 +0530 Subject: [PATCH 3/3] fix: regenerate pnpm lockfile for compaction examples --- pnpm-lock.yaml | 99 +++++++++++++++----------------------------------- 1 file changed, 30 insertions(+), 69 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 68319d1..a51d57a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -197,48 +197,23 @@ importers: version: link:../.. dotenv: specifier: ^17.2.1 - version: 17.2.1 - openai: - specifier: ^4.0.0 - version: 4.104.0(ws@8.18.3)(zod@3.25.76) - zod: - specifier: ^3.22.0 - version: 3.25.76 - devDependencies: - '@types/node': - specifier: ^20.10.5 - version: 20.19.11 - tsx: - specifier: ^4.7.0 - version: 4.20.4 - typescript: - specifier: ^5.3.3 - version: 5.9.2 - - examples/message-agents-real-llm-demo: - dependencies: - '@juspay-jaf/jaf': - specifier: workspace:* - version: link:../.. - dotenv: - specifier: ^17.2.1 - version: 17.2.1 + version: 17.4.2 openai: specifier: ^4.0.0 - version: 4.104.0(ws@8.18.3)(zod@3.25.76) + version: 4.104.0(ws@8.20.0)(zod@3.25.76) zod: specifier: ^3.22.0 version: 3.25.76 devDependencies: '@types/node': specifier: ^20.10.5 - version: 20.19.11 + version: 20.19.39 tsx: specifier: ^4.7.0 - version: 4.20.4 + version: 4.21.0 typescript: specifier: ^5.3.3 - version: 5.9.2 + version: 5.9.3 examples/flight-booking: dependencies: @@ -360,6 +335,31 @@ importers: specifier: ^5.3.3 version: 5.9.3 + examples/message-agents-real-llm-demo: + dependencies: + '@juspay-jaf/jaf': + specifier: workspace:* + version: link:../.. + dotenv: + specifier: ^17.2.1 + version: 17.4.2 + openai: + specifier: ^4.0.0 + version: 4.104.0(ws@8.20.0)(zod@3.25.76) + zod: + specifier: ^3.22.0 + version: 3.25.76 + devDependencies: + '@types/node': + specifier: ^20.10.5 + version: 20.19.39 + tsx: + specifier: ^4.7.0 + version: 4.21.0 + typescript: + specifier: ^5.3.3 + version: 5.9.3 + examples/otel-tracing-demo: dependencies: '@juspay-jaf/jaf': @@ -592,7 +592,6 @@ packages: '@babel/parser@7.29.3': resolution: {integrity: sha512-b3ctpQwp+PROvU/cttc4OYl4MzfJUWy6FZg+PMXfzmt/+39iHVF0sDfqay8TQM3JA2EUOyKcFZt75jWriQijsA==} engines: {node: '>=6.0.0'} - hasBin: true '@babel/plugin-syntax-async-generators@7.8.4': resolution: {integrity: sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==} @@ -930,7 +929,6 @@ packages: '@grpc/proto-loader@0.8.0': resolution: {integrity: sha512-rc1hOQtjIWGxcxpb9aHAfLpIctjEnsDehj0DAiVfBlmT84uvR0uUtN2hEi/ecvWVjXUGf5qPF4qEgiLOx1YIMQ==} engines: {node: '>=6'} - hasBin: true '@hono/node-server@1.19.14': resolution: {integrity: sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw==} @@ -1574,7 +1572,6 @@ packages: acorn@8.16.0: resolution: {integrity: sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==} engines: {node: '>=0.4.0'} - hasBin: true adler-32@1.3.1: resolution: {integrity: sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==} @@ -1696,7 +1693,6 @@ packages: baseline-browser-mapping@2.10.27: resolution: {integrity: sha512-zEs/ufmZoUd7WftKpKyXaT6RFxpQ5Qm9xytKRHvJfxFV9DFJkZph9RvJ1LcOUi0Z1ZVijMte65JbILeV+8QQEA==} engines: {node: '>=6.0.0'} - hasBin: true bignumber.js@9.3.1: resolution: {integrity: sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==} @@ -1725,7 +1721,6 @@ packages: browserslist@4.28.2: resolution: {integrity: sha512-48xSriZYYg+8qXna9kwqjIVzuQxi+KYWp2+5nCYnYKPTr0LvD89Jqk2Or5ogxz0NUMfIjhh2lIUX/LyX9B4oIg==} engines: {node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7} - hasBin: true bs-logger@0.2.6: resolution: {integrity: sha512-pd8DCoxmbgc7hyPKOvxtqNcjYoOsABPQdcCUjGp3d42VR2CX1ORhk2A87oqqu5R1kk+76nsxZupkmyd+MVtCog==} @@ -1865,12 +1860,10 @@ packages: crc-32@1.2.2: resolution: {integrity: sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==} engines: {node: '>=0.8'} - hasBin: true create-jest@29.7.0: resolution: {integrity: sha512-Adz2bdH0Vq3F53KEMJOoftQFutWCukm6J24wbPWRO4k1kMY7gS7ds/uoJkNuV8wDCtWWnuwGcJwpWcih+zEW1Q==} engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - hasBin: true cross-spawn@7.0.6: resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} @@ -2011,7 +2004,6 @@ packages: esbuild@0.27.7: resolution: {integrity: sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==} engines: {node: '>=18'} - hasBin: true escalade@3.2.0: resolution: {integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==} @@ -2043,7 +2035,6 @@ packages: resolution: {integrity: sha512-ypowyDxpVSYpkXr9WPv2PAZCtNip1Mv5KTW0SCurXv/9iOpcrH9PaqUElksqEB6pChqHGDRCFTyrZlGhnLNGiA==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} deprecated: This version is no longer supported. Please see https://eslint.org/version-support for other options. - hasBin: true espree@9.6.1: resolution: {integrity: sha512-oruZaFkjorTpF32kDSI5/75ViwGeZginGGy2NoOSg3Q9bnwlnmDm4HLnkl0RE3n+njDXR037aY1+x58Z/zFdwQ==} @@ -2052,7 +2043,6 @@ packages: esprima@4.0.1: resolution: {integrity: sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==} engines: {node: '>=4'} - hasBin: true esquery@1.7.0: resolution: {integrity: sha512-Ap6G0WQwcU/LHsvLwON1fAQX9Zp0A2Y6Y/cJBl9r/JbW90Zyg4/zbG6zzKa2OTALELarYHmKu0GhpM5EO+7T0g==} @@ -2339,7 +2329,6 @@ packages: handlebars@4.7.9: resolution: {integrity: sha512-4E71E0rpOaQuJR2A3xDZ+GM1HyWYv1clR58tC8emQNeQe3RH7MAzSbat+V0wG78LQBo6m6bzSG/L4pBuCsgnUQ==} engines: {node: '>=0.4.7'} - hasBin: true has-flag@4.0.0: resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==} @@ -2404,7 +2393,6 @@ packages: import-local@3.2.0: resolution: {integrity: sha512-2SPlun1JUPWoM6t3F0dw0FkCF/jWY8kttcY4f599GLTSjh2OCuuhdTkJQsEcZzBqbXZGKMK2OqW1oZsjtf/gQA==} engines: {node: '>=8'} - hasBin: true imurmurhash@0.1.4: resolution: {integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==} @@ -2515,7 +2503,6 @@ packages: jest-cli@29.7.0: resolution: {integrity: sha512-OVVobw2IubN/GSYsxETi+gOe7Ka59EFMR/twOU3Jb2GnKKeMGJB5SGUUrEz3SFVmJASUdZUzy83sLNNQ2gZslg==} engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - hasBin: true peerDependencies: node-notifier: ^8.0.1 || ^9.0.0 || ^10.0.0 peerDependenciesMeta: @@ -2626,7 +2613,6 @@ packages: jest@29.7.0: resolution: {integrity: sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==} engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - hasBin: true peerDependencies: node-notifier: ^8.0.1 || ^9.0.0 || ^10.0.0 peerDependenciesMeta: @@ -2641,16 +2627,13 @@ packages: js-yaml@3.14.2: resolution: {integrity: sha512-PMSmkqxr106Xa156c2M265Z+FTrPl+oxd/rgOQy2tijQeK5TxQ43psO1ZCwhVOSdnn+RzkzlRz/eY4BgJBYVpg==} - hasBin: true js-yaml@4.1.1: resolution: {integrity: sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==} - hasBin: true jsesc@3.1.0: resolution: {integrity: sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==} engines: {node: '>=6'} - hasBin: true json-bigint@1.0.0: resolution: {integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==} @@ -2685,7 +2668,6 @@ packages: json5@2.2.3: resolution: {integrity: sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==} engines: {node: '>=6'} - hasBin: true jszip@3.10.1: resolution: {integrity: sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==} @@ -2768,7 +2750,6 @@ packages: mammoth@1.12.0: resolution: {integrity: sha512-cwnK1RIcRdDMi2HRx2EXGYlxqIEh0Oo3bLhorgnsVJi2UkbX1+jKxuBNR9PC5+JaX7EkmJxFPmo6mjLpqShI2w==} engines: {node: '>=12.0.0'} - hasBin: true math-intrinsics@1.1.0: resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} @@ -2777,7 +2758,6 @@ packages: mathjs@14.9.1: resolution: {integrity: sha512-xhqv8Xjf+caWG3WlaPekg4v8QFOR3D5+8ycfcjMcPcnCNDgAONQLaLfyGgrggJrcHx2yUGCpACRpiD4GmXwX+Q==} engines: {node: '>= 18'} - hasBin: true media-typer@0.3.0: resolution: {integrity: sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==} @@ -2828,7 +2808,6 @@ packages: mime@1.6.0: resolution: {integrity: sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==} engines: {node: '>=4'} - hasBin: true mimic-fn@2.1.0: resolution: {integrity: sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==} @@ -2933,7 +2912,6 @@ packages: openai@4.104.0: resolution: {integrity: sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==} - hasBin: true peerDependencies: ws: ^8.18.0 zod: ^3.23.8 @@ -3020,7 +2998,6 @@ packages: pdf-parse@2.4.5: resolution: {integrity: sha512-mHU89HGh7v+4u2ubfnevJ03lmPgQ5WU4CxAVmTSh/sxVTEDYd1er/dKS/A6vg77NX47KTEoihq8jZBLr8Cxuwg==} engines: {node: '>=20.16.0 <21 || >=22.3.0'} - hasBin: true pdfjs-dist@5.4.296: resolution: {integrity: sha512-DlOzet0HO7OEnmUmB6wWGJrrdvbyJKftI1bhMitK7O2N8W2gc757yyYBbINy9IDafXAV9wmKr9t7xsTaNKRG5Q==} @@ -3081,11 +3058,9 @@ packages: pino@10.3.1: resolution: {integrity: sha512-r34yH/GlQpKZbU1BvFFqOjhISRo1MNx1tWYsYvmj6KIRHSPMT2+yHOEb1SG6NMvRoHRF0a07kCOox/9yakl1vg==} - hasBin: true pino@9.14.0: resolution: {integrity: sha512-8OEwKp5juEvb/MjpIc4hjqfgCNysrS94RIOMXYvpYCdm/jglrKEiAYmiumbmGhCvs+IcInsphYDFwqrjr7398w==} - hasBin: true pirates@4.0.7: resolution: {integrity: sha512-TfySrs/5nm8fQJDcBDuUng3VOUKsd7S+zqvbOTiGXHfxX4wK31ard+hoNuvkicM/2YFzlpDgABOevKSsB4G/FA==} @@ -3235,7 +3210,6 @@ packages: resolve@1.22.12: resolution: {integrity: sha512-TyeJ1zif53BPfHootBGwPRYT1RUt6oGWsaQr8UyZW/eAm9bKoijtvruSDEmZHm92CwS9nj7/fWttqPCgzep8CA==} engines: {node: '>= 0.4'} - hasBin: true ret@0.4.3: resolution: {integrity: sha512-0f4Memo5QP7WQyUEAYUO3esD/XjOc3Zjjg5CPsAq1p8sIu0XPeMbHJemKA0BO7tV0X7+A0FoEpbmHXWxPyD3wQ==} @@ -3259,7 +3233,6 @@ packages: rimraf@3.0.2: resolution: {integrity: sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==} deprecated: Rimraf versions prior to v4 are no longer supported - hasBin: true router@2.2.0: resolution: {integrity: sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==} @@ -3279,7 +3252,6 @@ packages: safe-regex2@5.1.1: resolution: {integrity: sha512-mOSBvHGDZMuIEZMdOz/aCEYDCv0E7nfcNsIhUF+/P+xC7Hyf3FkvymqgPbg9D1EdSGu+uKbJgy09K/RKKc7kJA==} - hasBin: true safe-stable-stringify@2.5.0: resolution: {integrity: sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==} @@ -3299,12 +3271,10 @@ packages: semver@6.3.1: resolution: {integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==} - hasBin: true semver@7.7.4: resolution: {integrity: sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==} engines: {node: '>=10'} - hasBin: true send@0.19.2: resolution: {integrity: sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg==} @@ -3483,7 +3453,6 @@ packages: ts-jest@29.4.9: resolution: {integrity: sha512-LTb9496gYPMCqjeDLdPrKuXtncudeV1yRZnF4Wo5l3SFi0RYEnYRNgMrFIdg+FHvfzjCyQk1cLncWVqiSX+EvQ==} engines: {node: ^14.15.0 || ^16.10.0 || ^18.0.0 || >=20.0.0} - hasBin: true peerDependencies: '@babel/core': '>=7.0.0-beta.0 <8' '@jest/transform': ^29.0.0 || ^30.0.0 @@ -3510,7 +3479,6 @@ packages: tsx@4.21.0: resolution: {integrity: sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==} engines: {node: '>=18.0.0'} - hasBin: true tunnel@0.0.6: resolution: {integrity: sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg==} @@ -3551,12 +3519,10 @@ packages: typescript@5.9.3: resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==} engines: {node: '>=14.17'} - hasBin: true uglify-js@3.19.3: resolution: {integrity: sha512-v3Xu+yuwBXisp6QYTcH4UbH+xYJXqnq2m/LtQVWKWzYc1iehYnLixoQDN9FH6/j9/oybfd6W9Ghwkl8+UMKTKQ==} engines: {node: '>=0.8.0'} - hasBin: true underscore@1.13.8: resolution: {integrity: sha512-DXtD3ZtEQzc7M8m4cXotyHR+FAS18C64asBYY5vqZexfYryNNnDc02W4hKg3rdQuqOYas1jkseX0+nZXjTXnvQ==} @@ -3573,7 +3539,6 @@ packages: update-browserslist-db@1.2.3: resolution: {integrity: sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==} - hasBin: true peerDependencies: browserslist: '>= 4.21.0' @@ -3589,8 +3554,6 @@ packages: uuid@9.0.1: resolution: {integrity: sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==} - deprecated: uuid@10 and below is no longer supported. For ESM codebases, update to uuid@latest. For CommonJS codebases, use uuid@11 (but be aware this version will likely be deprecated in 2028). - hasBin: true v8-to-istanbul@9.3.0: resolution: {integrity: sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA==} @@ -3620,7 +3583,6 @@ packages: which@2.0.2: resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} engines: {node: '>= 8'} - hasBin: true wmf@1.0.2: resolution: {integrity: sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==} @@ -3663,7 +3625,6 @@ packages: xlsx@0.18.5: resolution: {integrity: sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==} engines: {node: '>=0.8'} - hasBin: true xmlbuilder@10.1.1: resolution: {integrity: sha512-OyzrcFLL/nb6fMGHbiRDuPup9ljBycsdCypwuyg5AAHvyWzGfChJpCXMG88AGTIMFhGZ9RccFN1e6lhg3hkwKg==}