diff --git a/apps/docs/content/3.core-concepts/11.ai-sdk.md b/apps/docs/content/3.core-concepts/11.ai-sdk.md index ba359c90..beb30c73 100644 --- a/apps/docs/content/3.core-concepts/11.ai-sdk.md +++ b/apps/docs/content/3.core-concepts/11.ai-sdk.md @@ -2,7 +2,7 @@ title: AI SDK Integration description: Capture token usage, tool calls, model info, and streaming metrics from the Vercel AI SDK into wide events. Wrap your model and get full AI observability. navigation: - icon: i-lucide-scan-eye + icon: i-simple-icons-vercel links: - label: Wide Events icon: i-lucide-layers @@ -112,15 +112,47 @@ Your wide event now includes: ## How It Works -`createAILogger(log)` returns an `AILogger` with two methods: +`createAILogger(log, options?)` returns an `AILogger` with two methods: | Method | Description | |--------|-------------| -| `wrap(model)` | Wraps a language model with middleware. Accepts a model string (e.g. `'anthropic/claude-sonnet-4.6'`) or a `LanguageModelV3` object. Works with `generateText`, `streamText`, `generateObject`, `streamObject`, and `ToolLoopAgent`. | +| `wrap(model)` | Wraps a language model with middleware. Accepts a model string (e.g. `'anthropic/claude-sonnet-4.6'`) or a `LanguageModelV3` object. Works with `generateText`, `streamText`, `generateObject`, `streamObject`, and `ToolLoopAgent`. Also works with pre-wrapped models (e.g. from supermemory). | | `captureEmbed(result)` | Manually captures token usage from `embed()` or `embedMany()` results (embedding models use a different type). | The middleware intercepts calls at the provider level. It does not touch your callbacks, prompts, or responses. Captured data flows through the normal evlog pipeline (sampling, enrichers, drains) and ends up in Axiom, Better Stack, or wherever you drain to. +### Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `toolInputs` | `boolean \| ToolInputsOptions` | `false` | When enabled, `toolCalls` contains `{ name, input }` objects instead of plain strings. Opt-in because inputs can be large and may contain sensitive data. | + +Pass `true` to capture all inputs as-is, or an options object for fine-grained control: + +| Sub-option | Type | Description | +|------------|------|-------------| +| `maxLength` | `number` | Truncate stringified inputs exceeding this character length (appends `…`) | +| `transform` | `(input, toolName) => unknown` | Custom transform applied before `maxLength`. Use to redact fields or reshape data. | + +```typescript +// Capture everything +const ai = createAILogger(log, { toolInputs: true }) + +// Truncate long inputs (e.g. SQL queries) +const ai = createAILogger(log, { toolInputs: { maxLength: 200 } }) + +// Redact sensitive tool inputs +const ai = createAILogger(log, { + toolInputs: { + maxLength: 500, + transform: (input, toolName) => { + if (toolName === 'queryDB') return { sql: '***' } + return input + }, + }, +}) +``` + ## Usage Patterns ### streamText @@ -182,7 +214,9 @@ import { createAILogger } from 'evlog/ai' export default defineEventHandler(async (event) => { const log = useLogger(event) - const ai = createAILogger(log) + const ai = createAILogger(log, { + toolInputs: { maxLength: 500 }, + }) const agent = new ToolLoopAgent({ model: ai.wrap('anthropic/claude-sonnet-4.6'), @@ -210,7 +244,17 @@ Wide event after a 3-step agent run: "outputTokens": 1200, "totalTokens": 5700, "finishReason": "stop", - "toolCalls": ["searchWeb", "queryDatabase", "searchWeb"], + "toolCalls": [ + { "name": "searchWeb", "input": { "query": "TypeScript 6.0 features" } }, + { "name": "queryDatabase", "input": { "sql": "SELECT * FROM docs WHERE topic = 'typescript'" } }, + { "name": "searchWeb", "input": { "query": "TypeScript 6.0 release date" } } + ], + "responseId": "msg_01XFDUDYJgAACzvnptvVoYEL", + "stepsUsage": [ + { "model": "claude-sonnet-4.6", "inputTokens": 1200, "outputTokens": 300, "toolCalls": ["searchWeb"] }, + { "model": "claude-sonnet-4.6", "inputTokens": 1500, "outputTokens": 400, "toolCalls": ["queryDatabase", "searchWeb"] }, + { "model": "claude-sonnet-4.6", "inputTokens": 1800, "outputTokens": 500 } + ], "msToFirstChunk": 312, "msToFinish": 8200, "tokensPerSecond": 146 @@ -302,13 +346,42 @@ const model = ai.wrap(anthropic('claude-sonnet-4.6')) | `ai.cacheWriteTokens` | `usage.inputTokens.cacheWrite` | Tokens written to prompt cache | | `ai.reasoningTokens` | `usage.outputTokens.reasoning` | Reasoning tokens (extended thinking) | | `ai.finishReason` | `finishReason.unified` | Why generation ended (`stop`, `tool-calls`, etc.) | -| `ai.toolCalls` | Content / stream chunks | List of tool names called | +| `ai.toolCalls` | Content / stream chunks | `string[]` of tool names by default, or `Array<{ name, input }>` when `toolInputs` is enabled | +| `ai.responseId` | `response.id` | Provider-assigned response ID (e.g. Anthropic's `msg_...`) | | `ai.steps` | Step count | Number of LLM calls (only when > 1) | +| `ai.stepsUsage` | Per-step accumulation | Per-step token and tool call breakdown (only when > 1 step) | | `ai.msToFirstChunk` | Stream timing | Time to first text chunk (streaming only) | | `ai.msToFinish` | Stream timing | Total stream duration (streaming only) | | `ai.tokensPerSecond` | Computed | Output tokens per second (streaming only) | | `ai.error` | Error capture | Error message if a model call fails | +## Composability + +`ai.wrap()` works with models that are already wrapped by other tools. If you use supermemory, guardrails middleware, or any other model wrapper, pass the wrapped model to `ai.wrap()`: + +```typescript +import { createAILogger } from 'evlog/ai' +import { withSupermemory } from '@supermemory/tools/ai-sdk' + +const ai = createAILogger(log) +const base = gateway('anthropic/claude-sonnet-4.6') +const model = ai.wrap(withSupermemory(base, orgId, { mode: 'full' })) +``` + +For explicit middleware composition, use `createAIMiddleware` to get the raw middleware and compose it yourself via `wrapLanguageModel`: + +```typescript +import { createAIMiddleware } from 'evlog/ai' +import { wrapLanguageModel } from 'ai' + +const model = wrapLanguageModel({ + model: base, + middleware: [createAIMiddleware(log, { toolInputs: true }), otherMiddleware], +}) +``` + +`createAIMiddleware` returns the same middleware that `createAILogger` uses internally. The difference: `createAIMiddleware` does not include `captureEmbed` (embedding models don't use middleware). Use `createAILogger` for the full API, `createAIMiddleware` when you need explicit middleware ordering. + ## Error Handling If a model call fails, the middleware captures the error into the wide event before re-throwing: diff --git a/apps/nuxthub-playground/app/components/LogGenerator.vue b/apps/nuxthub-playground/app/components/LogGenerator.vue index 70f99ede..d6edf441 100644 --- a/apps/nuxthub-playground/app/components/LogGenerator.vue +++ b/apps/nuxthub-playground/app/components/LogGenerator.vue @@ -32,11 +32,14 @@ async function fireAll() { + -

+

{{ lastResult }}

diff --git a/apps/nuxthub-playground/server/api/chat.post.ts b/apps/nuxthub-playground/server/api/chat.post.ts index 3ac12ddb..0a962662 100644 --- a/apps/nuxthub-playground/server/api/chat.post.ts +++ b/apps/nuxthub-playground/server/api/chat.post.ts @@ -63,7 +63,7 @@ export default defineEventHandler(async (event) => { logger.set({ action: 'chat', messagesCount: messages.length }) - const ai = createAILogger(logger) + const ai = createAILogger(logger, { toolInputs: true }) try { const agent = new ToolLoopAgent({ diff --git a/apps/nuxthub-playground/server/api/test/ai-wrap.get.ts b/apps/nuxthub-playground/server/api/test/ai-wrap.get.ts new file mode 100644 index 00000000..6fa67fd8 --- /dev/null +++ b/apps/nuxthub-playground/server/api/test/ai-wrap.get.ts @@ -0,0 +1,45 @@ +import { gateway, generateText, wrapLanguageModel } from 'ai' +import type { LanguageModelV3Middleware } from '@ai-sdk/provider' +import { createAILogger } from 'evlog/ai' + +/** + * Simulates an external middleware (supermemory, guardrails, etc.) + * that injects a system message — proves the middleware actually ran in the chain. + */ +const externalMiddleware: LanguageModelV3Middleware = { + specificationVersion: 'v3', + transformParams({ params }) { + return Promise.resolve({ + ...params, + prompt: [ + { role: 'system' as const, content: 'Always start your answer with "MIDDLEWARE_OK:"' }, + ...params.prompt, + ], + }) + }, +} + +export default defineEventHandler(async (event) => { + const logger = useLogger(event) + logger.set({ action: 'test-ai-wrap-composition' }) + + const ai = createAILogger(logger, { toolInputs: true }) + + const base = gateway('google/gemini-3-flash') + const preWrapped = wrapLanguageModel({ model: base, middleware: externalMiddleware }) + const model = ai.wrap(preWrapped) + + const result = await generateText({ + model, + prompt: 'Say hello.', + maxOutputTokens: 200, + }) + + const middlewareRan = result.text.startsWith('MIDDLEWARE_OK:') + + return { + status: 'ok', + middlewareRan, + text: result.text, + } +}) diff --git a/apps/playground/package.json b/apps/playground/package.json index fb20dc02..23c9272c 100644 --- a/apps/playground/package.json +++ b/apps/playground/package.json @@ -12,9 +12,9 @@ "typecheck": "vue-tsc --noEmit" }, "dependencies": { + "@nuxt/ui": "^4.5.1", "evlog": "workspace:*", "nuxt": "^4.4.2", - "@nuxt/ui": "^4.5.1", "tailwindcss": "^4.2.1" } } diff --git a/packages/evlog/src/ai/index.ts b/packages/evlog/src/ai/index.ts index 239ccc21..86d8fdf3 100644 --- a/packages/evlog/src/ai/index.ts +++ b/packages/evlog/src/ai/index.ts @@ -3,6 +3,48 @@ import type { GatewayModelId } from 'ai' import type { LanguageModelV3, LanguageModelV3Middleware, LanguageModelV3StreamPart } from '@ai-sdk/provider' import type { RequestLogger } from '../types' +/** + * Fine-grained control over tool call input capture. + */ +export interface ToolInputsOptions { + /** + * Max character length for the stringified input JSON. + * Inputs exceeding this limit are truncated with a `…` suffix. + */ + maxLength?: number + /** + * Custom transform applied to each captured input before storing. + * Receives the parsed input and tool name; return value is stored. + * Runs before `maxLength` truncation. + */ + transform?: (input: unknown, toolName: string) => unknown +} + +/** + * Options for `createAILogger` and `createAIMiddleware`. + */ +export interface AILoggerOptions { + /** + * When enabled, `toolCalls` contains `{ name, input }` objects instead of plain tool name strings. + * Opt-in because inputs can be large and may contain sensitive data. + * + * - `true` — capture all inputs as-is + * - `{ maxLength, transform }` — capture with truncation or custom transform + * @default false + */ + toolInputs?: boolean | ToolInputsOptions +} + +/** + * Per-step token usage breakdown for multi-step agent runs. + */ +export interface AIStepUsage { + model: string + inputTokens: number + outputTokens: number + toolCalls?: string[] +} + /** * Shape of the `ai` field written to the wide event. */ @@ -18,8 +60,10 @@ export interface AIEventData { cacheWriteTokens?: number reasoningTokens?: number finishReason?: string - toolCalls?: string[] + toolCalls?: string[] | Array<{ name: string, input: unknown }> + responseId?: string steps?: number + stepsUsage?: AIStepUsage[] msToFirstChunk?: number msToFinish?: number tokensPerSecond?: number @@ -35,6 +79,9 @@ export interface AILogger { * Accepts a `LanguageModelV3` object or a model string (e.g. `'anthropic/claude-sonnet-4.6'`). * Strings are resolved via the AI SDK gateway. * + * Also works with pre-wrapped models (e.g. from supermemory, guardrails): + * `ai.wrap(withSupermemory(base, orgId))` composes correctly. + * * @example * ```ts * const ai = createAILogger(log) @@ -98,6 +145,34 @@ function resolveProviderAndModel(provider: string, modelId: string): { provider: } } +/** + * Create the evlog AI middleware that captures AI SDK data into a wide event. + * + * Use this when you need explicit middleware composition with other wrappers + * (e.g. supermemory, guardrails). For most cases, use `createAILogger` instead. + * + * Note: `captureEmbed` is not available with the raw middleware — use + * `createAILogger` if you need embedding capture. + * + * @example Nuxt API route with supermemory + * ```ts + * import { createAIMiddleware } from 'evlog/ai' + * import { wrapLanguageModel } from 'ai' + * + * export default defineEventHandler(async (event) => { + * const log = useLogger(event) + * + * const model = wrapLanguageModel({ + * model: withSupermemory(base, orgId), + * middleware: [createAIMiddleware(log, { toolInputs: true })], + * }) + * }) + * ``` + */ +export function createAIMiddleware(log: RequestLogger, options?: AILoggerOptions): LanguageModelV3Middleware { + return buildMiddleware(log, options) +} + /** * Create an AI logger that captures AI SDK data into the wide event. * @@ -118,90 +193,215 @@ function resolveProviderAndModel(provider: string, modelId: string): { provider: * onFinish: ({ text }) => saveConversation(text), * }) * ``` + * + * @example Capture tool call inputs + * ```ts + * const ai = createAILogger(log, { toolInputs: true }) + * ``` */ -export function createAILogger(log: RequestLogger): AILogger { - let calls = 0 - let steps = 0 - const usage: UsageAccumulator = { - inputTokens: 0, - outputTokens: 0, - cacheReadTokens: 0, - cacheWriteTokens: 0, - reasoningTokens: 0, +export function createAILogger(log: RequestLogger, options?: AILoggerOptions): AILogger { + const state = createAccumulatorState(options) + const middleware = buildMiddlewareFromState(log, state) + + return { + wrap: (model: LanguageModelV3 | GatewayModelId) => { + const resolved = typeof model === 'string' ? gateway(model) : model + return wrapLanguageModel({ model: resolved, middleware }) + }, + + captureEmbed: (result: { usage: { tokens: number } }) => { + state.calls++ + state.usage.inputTokens += result.usage.tokens + flushState(log, state) + }, } - const models: string[] = [] - const providers: string[] = [] - const allToolCalls: string[] = [] - let lastFinishReason: string | undefined - let lastMsToFirstChunk: number | undefined - let lastMsToFinish: number | undefined - let lastError: string | undefined - - function flush(): void { - const uniqueModels = [...new Set(models)] - const lastModel = models[models.length - 1] - const lastProvider = providers[providers.length - 1] - - const data: Partial & { calls: number, inputTokens: number, outputTokens: number, totalTokens: number } = { - calls, - inputTokens: usage.inputTokens, - outputTokens: usage.outputTokens, - totalTokens: usage.inputTokens + usage.outputTokens, - } +} - if (lastModel) data.model = lastModel - if (lastProvider) data.provider = lastProvider - if (uniqueModels.length > 1) data.models = uniqueModels - if (usage.cacheReadTokens > 0) data.cacheReadTokens = usage.cacheReadTokens - if (usage.cacheWriteTokens > 0) data.cacheWriteTokens = usage.cacheWriteTokens - if (usage.reasoningTokens > 0) data.reasoningTokens = usage.reasoningTokens - if (lastFinishReason) data.finishReason = lastFinishReason - if (allToolCalls.length > 0) data.toolCalls = [...allToolCalls] - if (steps > 1) data.steps = steps - if (lastMsToFirstChunk !== undefined) data.msToFirstChunk = lastMsToFirstChunk - if (lastMsToFinish !== undefined) { - data.msToFinish = lastMsToFinish - if (usage.outputTokens > 0 && lastMsToFinish > 0) { - data.tokensPerSecond = Math.round((usage.outputTokens / lastMsToFinish) * 1000) - } +interface AccumulatorState { + calls: number + steps: number + usage: UsageAccumulator + models: string[] + lastProvider: string | undefined + allToolCalls: string[] + allToolCallInputs: Array<{ name: string, input: unknown }> + stepsUsage: AIStepUsage[] + lastFinishReason: string | undefined + lastMsToFirstChunk: number | undefined + lastMsToFinish: number | undefined + lastError: string | undefined + lastResponseId: string | undefined + toolInputs: boolean + toolInputsOptions: ToolInputsOptions | undefined +} + +function resolveToolInputs(raw?: boolean | ToolInputsOptions): { enabled: boolean, options: ToolInputsOptions | undefined } { + if (!raw) return { enabled: false, options: undefined } + if (raw === true) return { enabled: true, options: undefined } + return { enabled: true, options: raw } +} + +function processToolInput(input: unknown, toolName: string, options: ToolInputsOptions | undefined): unknown { + let value = input + if (options?.transform) { + value = options.transform(value, toolName) + } + if (options?.maxLength) { + const str = typeof value === 'string' ? value : JSON.stringify(value) + if (str.length > options.maxLength) { + return `${str.slice(0, options.maxLength)}…` } - if (lastError) data.error = lastError + } + return value +} + +function createAccumulatorState(options?: AILoggerOptions): AccumulatorState { + const { enabled, options: captureOpts } = resolveToolInputs(options?.toolInputs) + return { + calls: 0, + steps: 0, + usage: { + inputTokens: 0, + outputTokens: 0, + cacheReadTokens: 0, + cacheWriteTokens: 0, + reasoningTokens: 0, + }, + models: [], + lastProvider: undefined, + allToolCalls: [], + allToolCallInputs: [], + stepsUsage: [], + lastFinishReason: undefined, + lastMsToFirstChunk: undefined, + lastMsToFinish: undefined, + lastError: undefined, + lastResponseId: undefined, + toolInputs: enabled, + toolInputsOptions: captureOpts, + } +} + +function flushState(log: RequestLogger, state: AccumulatorState): void { + const uniqueModels = [...new Set(state.models)] + const lastModel = state.models[state.models.length - 1] + + const data: Partial & { calls: number, inputTokens: number, outputTokens: number, totalTokens: number } = { + calls: state.calls, + inputTokens: state.usage.inputTokens, + outputTokens: state.usage.outputTokens, + totalTokens: state.usage.inputTokens + state.usage.outputTokens, + } - log.set({ ai: data } as Record) + if (lastModel) data.model = lastModel + if (state.lastProvider) data.provider = state.lastProvider + if (uniqueModels.length > 1) data.models = uniqueModels + if (state.usage.cacheReadTokens > 0) data.cacheReadTokens = state.usage.cacheReadTokens + if (state.usage.cacheWriteTokens > 0) data.cacheWriteTokens = state.usage.cacheWriteTokens + if (state.usage.reasoningTokens > 0) data.reasoningTokens = state.usage.reasoningTokens + if (state.lastFinishReason) data.finishReason = state.lastFinishReason + if (state.toolInputs && state.allToolCallInputs.length > 0) { + data.toolCalls = [...state.allToolCallInputs] + } else if (state.allToolCalls.length > 0) { + data.toolCalls = [...state.allToolCalls] + } + if (state.lastResponseId) data.responseId = state.lastResponseId + if (state.steps > 1) { + data.steps = state.steps + data.stepsUsage = [...state.stepsUsage] + } + if (state.lastMsToFirstChunk !== undefined) data.msToFirstChunk = state.lastMsToFirstChunk + if (state.lastMsToFinish !== undefined) { + data.msToFinish = state.lastMsToFinish + if (state.usage.outputTokens > 0 && state.lastMsToFinish > 0) { + data.tokensPerSecond = Math.round((state.usage.outputTokens / state.lastMsToFinish) * 1000) + } } + if (state.lastError) data.error = state.lastError + + log.set({ ai: data } as Record) +} + +function recordModel(state: AccumulatorState, provider: string, modelId: string, responseModelId?: string): void { + const resolved = resolveProviderAndModel(provider, responseModelId ?? modelId) + state.models.push(resolved.model) + state.lastProvider = resolved.provider +} - function recordModel(provider: string, modelId: string, responseModelId?: string): void { - const resolved = resolveProviderAndModel(provider, responseModelId ?? modelId) - models.push(resolved.model) - providers.push(resolved.provider) +function safeParseJSON(input: string): unknown { + try { + return JSON.parse(input) + } catch { + return input } +} - const middleware: LanguageModelV3Middleware = { +function recordError(log: RequestLogger, state: AccumulatorState, model: { provider: string, modelId: string }, error: unknown): void { + state.calls++ + state.steps++ + recordModel(state, model.provider, model.modelId) + state.lastFinishReason = 'error' + state.lastError = error instanceof Error ? error.message : String(error) + + const resolved = resolveProviderAndModel(model.provider, model.modelId) + state.stepsUsage.push({ + model: resolved.model, + inputTokens: 0, + outputTokens: 0, + }) + + flushState(log, state) +} + +function buildMiddleware(log: RequestLogger, options?: AILoggerOptions): LanguageModelV3Middleware { + const state = createAccumulatorState(options) + return buildMiddlewareFromState(log, state) +} + +function buildMiddlewareFromState(log: RequestLogger, state: AccumulatorState): LanguageModelV3Middleware { + return { + specificationVersion: 'v3', wrapGenerate: async ({ doGenerate, model }) => { try { const result = await doGenerate() - calls++ - steps++ - addUsage(usage, result.usage) - recordModel(model.provider, model.modelId, result.response?.modelId) - lastFinishReason = result.finishReason.unified + state.calls++ + state.steps++ + addUsage(state.usage, result.usage) + recordModel(state, model.provider, model.modelId, result.response?.modelId) + state.lastFinishReason = result.finishReason.unified + if (result.response?.id) { + state.lastResponseId = result.response.id + } + + const stepToolCalls: string[] = [] for (const item of result.content) { if (item.type === 'tool-call') { - allToolCalls.push(item.toolName) + state.allToolCalls.push(item.toolName) + stepToolCalls.push(item.toolName) + if (state.toolInputs) { + const raw = typeof item.input === 'string' ? safeParseJSON(item.input) : item.input + state.allToolCallInputs.push({ + name: item.toolName, + input: processToolInput(raw, item.toolName, state.toolInputsOptions), + }) + } } } - flush() + const resolvedModel = resolveProviderAndModel(model.provider, result.response?.modelId ?? model.modelId) + state.stepsUsage.push({ + model: resolvedModel.model, + inputTokens: result.usage.inputTokens.total ?? 0, + outputTokens: result.usage.outputTokens.total ?? 0, + ...(stepToolCalls.length > 0 ? { toolCalls: stepToolCalls } : {}), + }) + + flushState(log, state) return result } catch (error) { - calls++ - steps++ - recordModel(model.provider, model.modelId) - lastFinishReason = 'error' - lastError = error instanceof Error ? error.message : String(error) - flush() + recordError(log, state, model, error) throw error } }, @@ -213,19 +413,16 @@ export function createAILogger(log: RequestLogger): AILogger { let streamUsage: UsageAccumulator | undefined let streamFinishReason: string | undefined let streamModelId: string | undefined + let streamResponseId: string | undefined const streamToolCalls: string[] = [] + const streamToolInputBuffers = new Map() let streamError: string | undefined let doStreamResult: Awaited> try { doStreamResult = await doStream() } catch (error) { - calls++ - steps++ - recordModel(model.provider, model.modelId) - lastFinishReason = 'error' - lastError = error instanceof Error ? error.message : String(error) - flush() + recordError(log, state, model, error) throw error } @@ -242,6 +439,28 @@ export function createAILogger(log: RequestLogger): AILogger { if (chunk.type === 'tool-input-start') { streamToolCalls.push(chunk.toolName) + if (state.toolInputs) { + streamToolInputBuffers.set(chunk.id, { name: chunk.toolName, chunks: [] }) + } + } + + if (chunk.type === 'tool-input-delta' && state.toolInputs) { + const buffer = streamToolInputBuffers.get(chunk.id) + if (buffer) { + buffer.chunks.push(chunk.delta) + } + } + + if (chunk.type === 'tool-input-end' && state.toolInputs) { + const buffer = streamToolInputBuffers.get(chunk.id) + if (buffer) { + const raw = safeParseJSON(buffer.chunks.join('')) + state.allToolCallInputs.push({ + name: buffer.name, + input: processToolInput(raw, buffer.name, state.toolInputsOptions), + }) + streamToolInputBuffers.delete(chunk.id) + } } if (chunk.type === 'finish') { @@ -255,8 +474,9 @@ export function createAILogger(log: RequestLogger): AILogger { streamFinishReason = chunk.finishReason.unified } - if (chunk.type === 'response-metadata' && 'modelId' in chunk && chunk.modelId) { - streamModelId = chunk.modelId as string + if (chunk.type === 'response-metadata') { + if (chunk.modelId) streamModelId = chunk.modelId + if (chunk.id) streamResponseId = chunk.id } if (chunk.type === 'error') { @@ -267,32 +487,42 @@ export function createAILogger(log: RequestLogger): AILogger { }, flush() { - calls++ - steps++ + state.calls++ + state.steps++ if (streamUsage) { - usage.inputTokens += streamUsage.inputTokens - usage.outputTokens += streamUsage.outputTokens - usage.cacheReadTokens += streamUsage.cacheReadTokens - usage.cacheWriteTokens += streamUsage.cacheWriteTokens - usage.reasoningTokens += streamUsage.reasoningTokens + state.usage.inputTokens += streamUsage.inputTokens + state.usage.outputTokens += streamUsage.outputTokens + state.usage.cacheReadTokens += streamUsage.cacheReadTokens + state.usage.cacheWriteTokens += streamUsage.cacheWriteTokens + state.usage.reasoningTokens += streamUsage.reasoningTokens } - recordModel(model.provider, model.modelId, streamModelId) - lastFinishReason = streamFinishReason + recordModel(state, model.provider, model.modelId, streamModelId) + state.lastFinishReason = streamFinishReason + + state.allToolCalls.push(...streamToolCalls) - for (const name of streamToolCalls) { - allToolCalls.push(name) + if (streamResponseId) { + state.lastResponseId = streamResponseId } if (firstChunkTime) { - lastMsToFirstChunk = firstChunkTime - streamStart + state.lastMsToFirstChunk = firstChunkTime - streamStart } - lastMsToFinish = Date.now() - streamStart + state.lastMsToFinish = Date.now() - streamStart - if (streamError) lastError = streamError + if (streamError) state.lastError = streamError - flush() + const resolvedModel = resolveProviderAndModel(model.provider, streamModelId ?? model.modelId) + state.stepsUsage.push({ + model: resolvedModel.model, + inputTokens: streamUsage?.inputTokens ?? 0, + outputTokens: streamUsage?.outputTokens ?? 0, + ...(streamToolCalls.length > 0 ? { toolCalls: [...streamToolCalls] } : {}), + }) + + flushState(log, state) }, }) @@ -302,17 +532,4 @@ export function createAILogger(log: RequestLogger): AILogger { } }, } - - return { - wrap: (model: LanguageModelV3 | GatewayModelId) => { - const resolved = typeof model === 'string' ? gateway(model) : model - return wrapLanguageModel({ model: resolved, middleware }) - }, - - captureEmbed: (result: { usage: { tokens: number } }) => { - calls++ - usage.inputTokens += result.usage.tokens - flush() - }, - } } diff --git a/packages/evlog/test/ai/ai.test.ts b/packages/evlog/test/ai/ai.test.ts index b5c066c5..06103e02 100644 --- a/packages/evlog/test/ai/ai.test.ts +++ b/packages/evlog/test/ai/ai.test.ts @@ -1,7 +1,7 @@ import { afterEach, describe, expect, it, vi } from 'vitest' import type { LanguageModelV3, LanguageModelV3StreamPart } from '@ai-sdk/provider' import type { RequestLogger } from '../../src/types' -import { createAILogger } from '../../src/ai' +import { createAILogger, createAIMiddleware } from '../../src/ai' function createMockLogger(): RequestLogger & { setCalls: Array> } { const setCalls: Array> = [] @@ -744,4 +744,506 @@ describe('createAILogger', () => { expect(aiData.totalTokens).toBe(330) }) }) + + describe('toolInputs option', () => { + it('does not capture tool call inputs by default', async () => { + const log = createMockLogger() + const ai = createAILogger(log) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + ;(model.doGenerate as ReturnType).mockResolvedValue({ + content: [{ type: 'tool-call', toolCallId: 'tc1', toolName: 'searchWeb', input: '{"query":"weather"}' }], + finishReason: createFinishReason('tool-calls'), + usage: createMockUsage(), + response: { modelId: 'claude-sonnet-4.6' }, + }) + + await wrappedModel.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + expect(aiData.toolCalls).toEqual(['searchWeb']) + }) + + it('captures tool call inputs from doGenerate when enabled', async () => { + const log = createMockLogger() + const ai = createAILogger(log, { toolInputs: true }) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + ;(model.doGenerate as ReturnType).mockResolvedValue({ + content: [ + { type: 'tool-call', toolCallId: 'tc1', toolName: 'searchWeb', input: '{"query":"weather in SF"}' }, + { type: 'tool-call', toolCallId: 'tc2', toolName: 'calculate', input: '{"expression":"2+2"}' }, + ], + finishReason: createFinishReason('tool-calls'), + usage: createMockUsage(), + response: { modelId: 'claude-sonnet-4.6' }, + }) + + await wrappedModel.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + expect(aiData.toolCalls).toEqual([ + { name: 'searchWeb', input: { query: 'weather in SF' } }, + { name: 'calculate', input: { expression: '2+2' } }, + ]) + }) + + it('handles non-JSON tool inputs gracefully', async () => { + const log = createMockLogger() + const ai = createAILogger(log, { toolInputs: true }) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + ;(model.doGenerate as ReturnType).mockResolvedValue({ + content: [{ type: 'tool-call', toolCallId: 'tc1', toolName: 'run', input: 'not-json' }], + finishReason: createFinishReason('tool-calls'), + usage: createMockUsage(), + response: { modelId: 'claude-sonnet-4.6' }, + }) + + await wrappedModel.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + const toolCalls = aiData.toolCalls as Array<{ name: string, input: unknown }> + expect(toolCalls[0].input).toBe('not-json') + }) + + it('captures tool call inputs from stream deltas when enabled', async () => { + const log = createMockLogger() + const ai = createAILogger(log, { toolInputs: true }) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + const chunks: LanguageModelV3StreamPart[] = [ + { type: 'tool-input-start', id: 'tc1', toolName: 'searchWeb' }, + { type: 'tool-input-delta', id: 'tc1', delta: '{"que' }, + { type: 'tool-input-delta', id: 'tc1', delta: 'ry":"hello"}' }, + { type: 'tool-input-end', id: 'tc1' }, + { type: 'finish', finishReason: createFinishReason('tool-calls'), usage: createMockUsage() }, + ] + + ;(model.doStream as ReturnType).mockResolvedValue({ + stream: makeReadableStream(chunks), + }) + + const result = await wrappedModel.doStream({} as any) + await consumeStream(result.stream) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + expect(aiData.toolCalls).toEqual([{ name: 'searchWeb', input: { query: 'hello' } },]) + }) + + it('does not capture stream tool inputs when toolInputs is false', async () => { + const log = createMockLogger() + const ai = createAILogger(log) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + const chunks: LanguageModelV3StreamPart[] = [ + { type: 'tool-input-start', id: 'tc1', toolName: 'searchWeb' }, + { type: 'tool-input-delta', id: 'tc1', delta: '{"query":"test"}' }, + { type: 'tool-input-end', id: 'tc1' }, + { type: 'finish', finishReason: createFinishReason('tool-calls'), usage: createMockUsage() }, + ] + + ;(model.doStream as ReturnType).mockResolvedValue({ + stream: makeReadableStream(chunks), + }) + + const result = await wrappedModel.doStream({} as any) + await consumeStream(result.stream) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + expect(aiData.toolCalls).toEqual(['searchWeb']) + }) + + it('handles object-type tool inputs from doGenerate', async () => { + const log = createMockLogger() + const ai = createAILogger(log, { toolInputs: true }) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + ;(model.doGenerate as ReturnType).mockResolvedValue({ + content: [{ type: 'tool-call', toolCallId: 'tc1', toolName: 'run', input: { already: 'parsed' } }], + finishReason: createFinishReason('tool-calls'), + usage: createMockUsage(), + response: { modelId: 'claude-sonnet-4.6' }, + }) + + await wrappedModel.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + const toolCalls = aiData.toolCalls as Array<{ name: string, input: unknown }> + expect(toolCalls[0].input).toEqual({ already: 'parsed' }) + }) + + it('truncates inputs exceeding maxLength', async () => { + const log = createMockLogger() + const ai = createAILogger(log, { toolInputs: { maxLength: 20 } }) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + ;(model.doGenerate as ReturnType).mockResolvedValue({ + content: [{ type: 'tool-call', toolCallId: 'tc1', toolName: 'queryDB', input: '{"sql":"SELECT * FROM events WHERE status = 200 ORDER BY created_at DESC LIMIT 50"}' },], + finishReason: createFinishReason('tool-calls'), + usage: createMockUsage(), + response: { modelId: 'claude-sonnet-4.6' }, + }) + + await wrappedModel.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + const inputs = aiData.toolCalls as Array<{ name: string, input: unknown }> + expect(inputs[0].input).toBeTypeOf('string') + expect((inputs[0].input as string).length).toBeLessThanOrEqual(21) + expect((inputs[0].input as string).endsWith('…')).toBe(true) + }) + + it('does not truncate inputs within maxLength', async () => { + const log = createMockLogger() + const ai = createAILogger(log, { toolInputs: { maxLength: 500 } }) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + ;(model.doGenerate as ReturnType).mockResolvedValue({ + content: [{ type: 'tool-call', toolCallId: 'tc1', toolName: 'search', input: '{"q":"hello"}' },], + finishReason: createFinishReason('tool-calls'), + usage: createMockUsage(), + response: { modelId: 'claude-sonnet-4.6' }, + }) + + await wrappedModel.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + const inputs = aiData.toolCalls as Array<{ name: string, input: unknown }> + expect(inputs[0].input).toEqual({ q: 'hello' }) + }) + + it('applies transform function to inputs', async () => { + const log = createMockLogger() + const ai = createAILogger(log, { + toolInputs: { + transform: (input, toolName) => { + if (toolName === 'queryDB') { + return { sql: '***' } + } + return input + }, + }, + }) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + ;(model.doGenerate as ReturnType).mockResolvedValue({ + content: [ + { type: 'tool-call', toolCallId: 'tc1', toolName: 'queryDB', input: '{"sql":"SELECT * FROM users"}' }, + { type: 'tool-call', toolCallId: 'tc2', toolName: 'search', input: '{"q":"hello"}' }, + ], + finishReason: createFinishReason('tool-calls'), + usage: createMockUsage(), + response: { modelId: 'claude-sonnet-4.6' }, + }) + + await wrappedModel.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + const inputs = aiData.toolCalls as Array<{ name: string, input: unknown }> + expect(inputs[0]).toEqual({ name: 'queryDB', input: { sql: '***' } }) + expect(inputs[1]).toEqual({ name: 'search', input: { q: 'hello' } }) + }) + + it('applies transform then maxLength truncation', async () => { + const log = createMockLogger() + const ai = createAILogger(log, { + toolInputs: { + transform: (input) => input, + maxLength: 10, + }, + }) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + ;(model.doGenerate as ReturnType).mockResolvedValue({ + content: [{ type: 'tool-call', toolCallId: 'tc1', toolName: 'search', input: '{"query":"a very long search query that exceeds the limit"}' },], + finishReason: createFinishReason('tool-calls'), + usage: createMockUsage(), + response: { modelId: 'claude-sonnet-4.6' }, + }) + + await wrappedModel.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + const inputs = aiData.toolCalls as Array<{ name: string, input: unknown }> + expect((inputs[0].input as string).endsWith('…')).toBe(true) + expect((inputs[0].input as string).length).toBeLessThanOrEqual(11) + }) + + it('truncates stream tool inputs with maxLength', async () => { + const log = createMockLogger() + const ai = createAILogger(log, { toolInputs: { maxLength: 15 } }) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + const chunks: LanguageModelV3StreamPart[] = [ + { type: 'tool-input-start', id: 'tc1', toolName: 'queryDB' }, + { type: 'tool-input-delta', id: 'tc1', delta: '{"sql":"SELECT * FROM events' }, + { type: 'tool-input-delta', id: 'tc1', delta: ' WHERE id = 1"}' }, + { type: 'tool-input-end', id: 'tc1' }, + { type: 'finish', finishReason: createFinishReason('tool-calls'), usage: createMockUsage() }, + ] + + ;(model.doStream as ReturnType).mockResolvedValue({ + stream: makeReadableStream(chunks), + }) + + const result = await wrappedModel.doStream({} as any) + await consumeStream(result.stream) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + const inputs = aiData.toolCalls as Array<{ name: string, input: unknown }> + expect((inputs[0].input as string).endsWith('…')).toBe(true) + expect((inputs[0].input as string).length).toBeLessThanOrEqual(16) + }) + }) + + describe('responseId', () => { + it('captures responseId from doGenerate', async () => { + const log = createMockLogger() + const ai = createAILogger(log) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + ;(model.doGenerate as ReturnType).mockResolvedValue({ + content: [], + finishReason: createFinishReason(), + usage: createMockUsage(), + response: { modelId: 'claude-sonnet-4.6', id: 'msg_01XFDUDYJgAACzvnptvVoYEL' }, + }) + + await wrappedModel.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + expect(aiData.responseId).toBe('msg_01XFDUDYJgAACzvnptvVoYEL') + }) + + it('captures responseId from stream response-metadata', async () => { + const log = createMockLogger() + const ai = createAILogger(log) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + const chunks: LanguageModelV3StreamPart[] = [ + { type: 'response-metadata', id: 'msg_stream_123', modelId: 'claude-sonnet-4.6' } as LanguageModelV3StreamPart, + { type: 'text-start', id: 't1' }, + { type: 'text-delta', id: 't1', delta: 'Hi' }, + { type: 'text-end', id: 't1' }, + { type: 'finish', finishReason: createFinishReason(), usage: createMockUsage() }, + ] + + ;(model.doStream as ReturnType).mockResolvedValue({ + stream: makeReadableStream(chunks), + }) + + const result = await wrappedModel.doStream({} as any) + await consumeStream(result.stream) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + expect(aiData.responseId).toBe('msg_stream_123') + }) + + it('omits responseId when not provided', async () => { + const log = createMockLogger() + const ai = createAILogger(log) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + ;(model.doGenerate as ReturnType).mockResolvedValue({ + content: [], + finishReason: createFinishReason(), + usage: createMockUsage(), + response: { modelId: 'claude-sonnet-4.6' }, + }) + + await wrappedModel.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + expect(aiData.responseId).toBeUndefined() + }) + }) + + describe('stepsUsage', () => { + it('omits stepsUsage for a single call', async () => { + const log = createMockLogger() + const ai = createAILogger(log) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + ;(model.doGenerate as ReturnType).mockResolvedValue({ + content: [], + finishReason: createFinishReason(), + usage: createMockUsage({ inputTotal: 100, outputTotal: 50 }), + response: { modelId: 'claude-sonnet-4.6' }, + }) + + await wrappedModel.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + expect(aiData.stepsUsage).toBeUndefined() + expect(aiData.steps).toBeUndefined() + }) + + it('includes stepsUsage for multiple calls', async () => { + const log = createMockLogger() + const ai = createAILogger(log) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + ;(model.doGenerate as ReturnType) + .mockResolvedValueOnce({ + content: [{ type: 'tool-call', toolCallId: 'tc1', toolName: 'search', input: '{}' }], + finishReason: createFinishReason('tool-calls'), + usage: createMockUsage({ inputTotal: 100, outputTotal: 50 }), + response: { modelId: 'claude-sonnet-4.6' }, + }) + .mockResolvedValueOnce({ + content: [], + finishReason: createFinishReason(), + usage: createMockUsage({ inputTotal: 300, outputTotal: 200 }), + response: { modelId: 'claude-sonnet-4.6' }, + }) + + await wrappedModel.doGenerate({} as any) + await wrappedModel.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + expect(aiData.steps).toBe(2) + const stepsUsage = aiData.stepsUsage as Array> + expect(stepsUsage).toHaveLength(2) + expect(stepsUsage[0]).toEqual({ + model: 'claude-sonnet-4.6', + inputTokens: 100, + outputTokens: 50, + toolCalls: ['search'], + }) + expect(stepsUsage[1]).toEqual({ + model: 'claude-sonnet-4.6', + inputTokens: 300, + outputTokens: 200, + }) + }) + + it('includes stepsUsage with stream calls', async () => { + const log = createMockLogger() + const ai = createAILogger(log) + const model = createMockModel() + const wrappedModel = ai.wrap(model) + + const chunks1: LanguageModelV3StreamPart[] = [ + { type: 'tool-input-start', id: 'tc1', toolName: 'search' }, + { type: 'tool-input-delta', id: 'tc1', delta: '{}' }, + { type: 'tool-input-end', id: 'tc1' }, + { type: 'finish', finishReason: createFinishReason('tool-calls'), usage: createMockUsage({ inputTotal: 150, outputTotal: 80 }) }, + ] + + const chunks2: LanguageModelV3StreamPart[] = [ + { type: 'text-start', id: 't1' }, + { type: 'text-delta', id: 't1', delta: 'Done' }, + { type: 'text-end', id: 't1' }, + { type: 'finish', finishReason: createFinishReason(), usage: createMockUsage({ inputTotal: 400, outputTotal: 100 }) }, + ] + + ;(model.doStream as ReturnType) + .mockResolvedValueOnce({ stream: makeReadableStream(chunks1) }) + .mockResolvedValueOnce({ stream: makeReadableStream(chunks2) }) + + const result1 = await wrappedModel.doStream({} as any) + await consumeStream(result1.stream) + const result2 = await wrappedModel.doStream({} as any) + await consumeStream(result2.stream) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + expect(aiData.steps).toBe(2) + const stepsUsage = aiData.stepsUsage as Array> + expect(stepsUsage).toHaveLength(2) + expect(stepsUsage[0]).toEqual({ + model: 'claude-sonnet-4.6', + inputTokens: 150, + outputTokens: 80, + toolCalls: ['search'], + }) + expect(stepsUsage[1]).toEqual({ + model: 'claude-sonnet-4.6', + inputTokens: 400, + outputTokens: 100, + }) + }) + + it('tracks per-step models in stepsUsage', async () => { + const log = createMockLogger() + const ai = createAILogger(log) + const fast = createMockModel({ provider: 'anthropic', modelId: 'claude-haiku-4.5' }) + const smart = createMockModel({ provider: 'anthropic', modelId: 'claude-sonnet-4.6' }) + + const wrappedFast = ai.wrap(fast) + const wrappedSmart = ai.wrap(smart) + + ;(fast.doGenerate as ReturnType).mockResolvedValue({ + content: [], + finishReason: createFinishReason(), + usage: createMockUsage({ inputTotal: 50, outputTotal: 20 }), + response: { modelId: 'claude-haiku-4.5' }, + }) + + ;(smart.doGenerate as ReturnType).mockResolvedValue({ + content: [], + finishReason: createFinishReason(), + usage: createMockUsage({ inputTotal: 200, outputTotal: 100 }), + response: { modelId: 'claude-sonnet-4.6' }, + }) + + await wrappedFast.doGenerate({} as any) + await wrappedSmart.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + const stepsUsage = aiData.stepsUsage as Array> + expect(stepsUsage[0].model).toBe('claude-haiku-4.5') + expect(stepsUsage[1].model).toBe('claude-sonnet-4.6') + }) + }) + + describe('createAIMiddleware', () => { + it('returns a valid middleware object', () => { + const log = createMockLogger() + const middleware = createAIMiddleware(log) + + expect(middleware).toBeDefined() + expect(middleware.wrapGenerate).toBeTypeOf('function') + expect(middleware.wrapStream).toBeTypeOf('function') + }) + + it('captures data when used with wrapLanguageModel', async () => { + const { wrapLanguageModel } = await import('ai') + const log = createMockLogger() + const middleware = createAIMiddleware(log, { toolInputs: true }) + const model = createMockModel() + + const wrappedModel = wrapLanguageModel({ model, middleware }) + + ;(model.doGenerate as ReturnType).mockResolvedValue({ + content: [{ type: 'tool-call', toolCallId: 'tc1', toolName: 'search', input: '{"q":"test"}' }], + finishReason: createFinishReason('tool-calls'), + usage: createMockUsage({ inputTotal: 100, outputTotal: 50 }), + response: { modelId: 'claude-sonnet-4.6', id: 'msg_abc' }, + }) + + await wrappedModel.doGenerate({} as any) + + const aiData = log.setCalls[log.setCalls.length - 1].ai as Record + expect(aiData.calls).toBe(1) + expect(aiData.toolCalls).toEqual([{ name: 'search', input: { q: 'test' } }]) + expect(aiData.responseId).toBe('msg_abc') + }) + }) })