From 4f147d06208f205a5f4cc78253447a6c0b0377a5 Mon Sep 17 00:00:00 2001 From: Ken Jiang Date: Wed, 1 Apr 2026 19:46:26 -0400 Subject: [PATCH 1/3] native inference billing --- apis/cloudflare/src/billing.ts | 82 ++++++++++++++++++++++++++ apis/cloudflare/src/env.ts | 1 + apis/cloudflare/src/proxy.ts | 50 ++++++++++++---- apis/cloudflare/wrangler-template.toml | 2 + packages/proxy/edge/index.ts | 6 +- packages/proxy/src/proxy.ts | 74 +++++++++++++++++++++++ 6 files changed, 203 insertions(+), 12 deletions(-) create mode 100644 apis/cloudflare/src/billing.ts diff --git a/apis/cloudflare/src/billing.ts b/apis/cloudflare/src/billing.ts new file mode 100644 index 00000000..02a55b8a --- /dev/null +++ b/apis/cloudflare/src/billing.ts @@ -0,0 +1,82 @@ +import { type BillingEvent } from "@braintrust/proxy"; + +const DEFAULT_BILLING_TELEMETRY_URL = + "https://api.braintrust.dev/billing/telemetry/ingest"; + +function buildPayloadEvent(event: BillingEvent) { + if (!event.org_id) { + console.warn("billing event skipped: missing org_id"); + return null; + } + if (!event.model) { + console.warn("billing event skipped: missing model"); + return null; + } + if (!event.resolved_model) { + console.warn("billing event skipped: missing resolved_model"); + return null; + } + const hasTokenUsageData = + event.input_tokens !== undefined || + event.output_tokens !== undefined || + event.cached_input_tokens !== undefined || + event.cache_write_input_tokens !== undefined; + if (!hasTokenUsageData) { + console.warn("billing event skipped: missing token usage"); + return null; + } + const requestId = crypto.randomUUID(); + const timestamp = new Date().toISOString(); + + return { + event_name: "NativeInferenceTokenUsageEvent", + external_customer_id: event.org_id, + timestamp, + idempotency_key: requestId, + properties: { + model: event.model, + resolved_model: event.resolved_model, + org_id: event.org_id, + input_tokens: event.input_tokens, + output_tokens: event.output_tokens, + cached_input_tokens: event.cached_input_tokens, + cache_write_input_tokens: event.cache_write_input_tokens, + }, + }; +} + +export async function sendBillingTelemetryEvent({ + telemetryUrl, + event, +}: { + telemetryUrl?: string; + event: BillingEvent; +}): Promise { + try { + const payloadEvent = buildPayloadEvent(event); + if (!payloadEvent) { + return; + } + + const destination = telemetryUrl || DEFAULT_BILLING_TELEMETRY_URL; + const response = await fetch(destination, { + method: "POST", + headers: { + Authorization: `Bearer ${event.auth_token}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + events: [payloadEvent], + }), + }); + + if (!response.ok) { + const responseBody = await response.text(); + console.warn( + `billing event failed: ${response.status} ${response.statusText} ${responseBody}`, + ); + } + } catch (error) { + console.warn("billing event threw an error", error); + } +} diff --git a/apis/cloudflare/src/env.ts b/apis/cloudflare/src/env.ts index 5fdb164f..ac9a3eb5 100644 --- a/apis/cloudflare/src/env.ts +++ b/apis/cloudflare/src/env.ts @@ -4,6 +4,7 @@ declare global { BRAINTRUST_APP_URL: string; WHITELISTED_ORIGINS?: string; METRICS_LICENSE_KEY?: string; + BILLING_TELEMETRY_URL?: string; NATIVE_INFERENCE_SECRET_KEY?: string; } } diff --git a/apis/cloudflare/src/proxy.ts b/apis/cloudflare/src/proxy.ts index 5eb614da..5c74d961 100644 --- a/apis/cloudflare/src/proxy.ts +++ b/apis/cloudflare/src/proxy.ts @@ -19,6 +19,7 @@ import { BT_PARENT, resolveParentHeader } from "braintrust/util"; import { cachedLogin, makeProxySpanLogger } from "./tracing"; import { MeterProvider } from "@opentelemetry/sdk-metrics"; import { Meter, Attributes, Histogram } from "@opentelemetry/api"; +import { sendBillingTelemetryEvent } from "./billing"; export type LogHistogramFn = (args: { name: string; @@ -117,6 +118,30 @@ export async function handleProxyV1( let span: Span | undefined; let spanId: string | undefined; let spanExport: string | undefined; + let billingOrgId: string | undefined; + const orgName = request.headers.get(ORG_NAME_HEADER) ?? undefined; + const apiKey = + parseAuthHeader({ + authorization: request.headers.get("authorization") ?? undefined, + }) ?? undefined; + + const getLoginState = async () => + cachedLogin({ + appUrl: braintrustAppUrl(env).toString(), + apiKey, + orgName, + cache: credentialsCache, + }); + + if (apiKey) { + try { + const loginState = await getLoginState(); + billingOrgId = loginState.orgId ?? undefined; + } catch (error) { + console.warn("Failed to resolve billing org id", error); + } + } + const parentHeader = request.headers.get(BT_PARENT); if (parentHeader) { let parent; @@ -131,19 +156,11 @@ export async function handleProxyV1( ); } - const orgName = request.headers.get(ORG_NAME_HEADER) ?? undefined; - const apiKey = - parseAuthHeader({ - authorization: request.headers.get("authorization") ?? undefined, - }) ?? undefined; + const loginState = await getLoginState(); + billingOrgId = loginState.orgId ?? undefined; span = startSpan({ - state: await cachedLogin({ - appUrl: braintrustAppUrl(env).toString(), - apiKey, - orgName, - cache: credentialsCache, - }), + state: loginState, type: "llm", name: "LLM", parent: parent.toStr(), @@ -199,6 +216,17 @@ export async function handleProxyV1( spanLogger, spanId, spanExport, + billingOrgId, + onBillingEvent: (event) => { + ctx.waitUntil( + sendBillingTelemetryEvent({ + telemetryUrl: env.BILLING_TELEMETRY_URL, + event, + }).catch((error) => { + console.warn("billing waitUntil task failed", error); + }), + ); + }, nativeInferenceSecretKey: env.NATIVE_INFERENCE_SECRET_KEY, }; diff --git a/apis/cloudflare/wrangler-template.toml b/apis/cloudflare/wrangler-template.toml index 47b3ef4b..140710cc 100644 --- a/apis/cloudflare/wrangler-template.toml +++ b/apis/cloudflare/wrangler-template.toml @@ -28,10 +28,12 @@ head_sampling_rate = 0.2 # You should not need to edit this BRAINTRUST_APP_URL = "https://www.braintrust.dev" METRICS_LICENSE_KEY="" +BILLING_TELEMETRY_URL="https://api.braintrust.dev/billing/telemetry/ingest" [env.staging.vars] BRAINTRUST_APP_URL = "https://www.braintrust.dev" METRICS_LICENSE_KEY="" +BILLING_TELEMETRY_URL="https://api.braintrust.dev/billing/telemetry/ingest" [env.staging] kv_namespaces = [ diff --git a/packages/proxy/edge/index.ts b/packages/proxy/edge/index.ts index a577651b..0a7f5e83 100644 --- a/packages/proxy/edge/index.ts +++ b/packages/proxy/edge/index.ts @@ -1,6 +1,6 @@ import { DEFAULT_BRAINTRUST_APP_URL } from "@lib/constants"; import { flushMetrics } from "@lib/metrics"; -import { proxyV1, SpanLogger, LogHistogramFn } from "@lib/proxy"; +import { proxyV1, SpanLogger, LogHistogramFn, BillingEvent } from "@lib/proxy"; import { isEmpty } from "@lib/util"; import { MeterProvider } from "@opentelemetry/sdk-metrics"; @@ -36,6 +36,8 @@ export interface ProxyOpts { logHistogram?: LogHistogramFn; whitelist?: (string | RegExp)[]; spanLogger?: SpanLogger; + billingOrgId?: string; + onBillingEvent?: (event: BillingEvent) => void; spanId?: string; spanExport?: string; nativeInferenceSecretKey?: string; @@ -398,6 +400,8 @@ export function EdgeProxyV1(opts: ProxyOpts) { digest: digestMessage, logHistogram: opts.logHistogram, spanLogger: opts.spanLogger, + billingOrgId: opts.billingOrgId, + onBillingEvent: opts.onBillingEvent, }); } catch (e) { return new Response(`${e}`, { diff --git a/packages/proxy/src/proxy.ts b/packages/proxy/src/proxy.ts index 78a80d1b..eb37f37f 100644 --- a/packages/proxy/src/proxy.ts +++ b/packages/proxy/src/proxy.ts @@ -190,6 +190,19 @@ export interface SpanLogger { reportProgress: (progress: string) => void; } +export type BillingEvent = { + event_name: "NativeInferenceTokenUsageEvent"; + auth_token: string; + org_id?: string; + model?: string | null; + resolved_model?: string | null; + org_name?: string; + input_tokens?: number; + output_tokens?: number; + cached_input_tokens?: number; + cache_write_input_tokens?: number; +}; + // This is an isomorphic implementation of proxyV1, which is used by both edge functions // in CloudFlare and by the node proxy (locally and in lambda). export async function proxyV1({ @@ -208,6 +221,8 @@ export async function proxyV1({ cacheKeyOptions = {}, decompressFetch = false, spanLogger, + billingOrgId, + onBillingEvent, signal, fetch = globalThis.fetch, }: { @@ -237,6 +252,8 @@ export async function proxyV1({ cacheKeyOptions?: CacheKeyOptions; decompressFetch?: boolean; spanLogger?: SpanLogger; + billingOrgId?: string; + onBillingEvent?: (event: BillingEvent) => void; signal?: AbortSignal; fetch?: FetchFn; }): Promise { @@ -299,6 +316,7 @@ export async function proxyV1({ ); let orgName: string | undefined = proxyHeaders[ORG_NAME_HEADER] ?? undefined; + let resolvedOrgName: string | undefined = orgName; const projectId: string | undefined = proxyHeaders[PROJECT_ID_HEADER] ?? undefined; @@ -649,6 +667,7 @@ export async function proxyV1({ if (secrets.length > 0 && !orgName && secrets[0].org_name) { baseAttributes.org_name = secrets[0].org_name; + resolvedOrgName = secrets[0].org_name; } logRequest(); @@ -759,6 +778,11 @@ export async function proxyV1({ if (stream) { let first = true; const allChunks: Uint8Array[] = []; + let resolvedModel: string | undefined = undefined; + let inputTokens: number | undefined = undefined; + let outputTokens: number | undefined = undefined; + let cachedInputTokens: number | undefined = undefined; + let cacheWriteInputTokens: number | undefined = undefined; // These parameters are for the streaming case let reasoning: OpenAIReasoning[] | undefined = undefined; @@ -787,10 +811,20 @@ export async function proxyV1({ | OpenAIChatCompletionChunk | undefined; if (result) { + if (typeof result.model === "string" && result.model) { + resolvedModel = result.model; + } const extendedUsage = completionUsageSchema.safeParse( result.usage, ); if (extendedUsage.success) { + inputTokens = extendedUsage.data.prompt_tokens; + outputTokens = extendedUsage.data.completion_tokens; + cachedInputTokens = + extendedUsage.data.prompt_tokens_details?.cached_tokens; + cacheWriteInputTokens = + extendedUsage.data.prompt_tokens_details + ?.cache_creation_tokens; spanLogger?.log({ metrics: { tokens: extendedUsage.data.total_tokens, @@ -978,10 +1012,20 @@ export async function proxyV1({ case "chat": case "completion": { const data = dataRaw as ChatCompletion; + if (typeof data.model === "string" && data.model) { + resolvedModel = data.model; + } const extendedUsage = completionUsageSchema.safeParse( data.usage, ); if (extendedUsage.success) { + inputTokens = extendedUsage.data.prompt_tokens; + outputTokens = extendedUsage.data.completion_tokens; + cachedInputTokens = + extendedUsage.data.prompt_tokens_details?.cached_tokens; + cacheWriteInputTokens = + extendedUsage.data.prompt_tokens_details + ?.cache_creation_tokens; spanLogger?.log({ output: data.choices, metrics: { @@ -1041,6 +1085,15 @@ export async function proxyV1({ } case "response": { const data = dataRaw as OpenAIResponse; + if (typeof data.model === "string" && data.model) { + resolvedModel = data.model; + } + if (data.usage) { + inputTokens = data.usage.input_tokens; + outputTokens = data.usage.output_tokens; + cachedInputTokens = + data.usage.input_tokens_details?.cached_tokens; + } spanLogger?.log({ output: data.output, metrics: { @@ -1089,6 +1142,27 @@ export async function proxyV1({ }); spanLogger?.end(); + if (!responseFailed) { + try { + if (typeof onBillingEvent !== "function") { + return; + } + onBillingEvent({ + event_name: "NativeInferenceTokenUsageEvent", + auth_token: authToken, + org_id: billingOrgId, + model, + resolved_model: resolvedModel, + org_name: resolvedOrgName, + input_tokens: inputTokens, + output_tokens: outputTokens, + cached_input_tokens: cachedInputTokens, + cache_write_input_tokens: cacheWriteInputTokens, + }); + } catch (error) { + console.warn("billing callback failed", error); + } + } controller.terminate(); }, }); From b8426af7692e79f508d44fcf4e5b329bb8f0f211 Mon Sep 17 00:00:00 2001 From: Ken Jiang Date: Wed, 1 Apr 2026 20:10:36 -0400 Subject: [PATCH 2/3] add brain model guard --- apis/cloudflare/src/billing.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apis/cloudflare/src/billing.ts b/apis/cloudflare/src/billing.ts index 02a55b8a..8398a96c 100644 --- a/apis/cloudflare/src/billing.ts +++ b/apis/cloudflare/src/billing.ts @@ -3,6 +3,10 @@ import { type BillingEvent } from "@braintrust/proxy"; const DEFAULT_BILLING_TELEMETRY_URL = "https://api.braintrust.dev/billing/telemetry/ingest"; +function isBrainModel(model: string): boolean { + return model.startsWith("brain-"); +} + function buildPayloadEvent(event: BillingEvent) { if (!event.org_id) { console.warn("billing event skipped: missing org_id"); @@ -12,6 +16,10 @@ function buildPayloadEvent(event: BillingEvent) { console.warn("billing event skipped: missing model"); return null; } + // Skip non-brain models since braintrust only hosts brain models. + if (!isBrainModel(event.model)) { + return null; + } if (!event.resolved_model) { console.warn("billing event skipped: missing resolved_model"); return null; From d3880ec4893e3ecff5f22cf56a636674a968edff Mon Sep 17 00:00:00 2001 From: Ken Jiang Date: Thu, 2 Apr 2026 13:25:41 -0400 Subject: [PATCH 3/3] move brain model call up --- apis/cloudflare/src/billing.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/apis/cloudflare/src/billing.ts b/apis/cloudflare/src/billing.ts index 8398a96c..48d7aefc 100644 --- a/apis/cloudflare/src/billing.ts +++ b/apis/cloudflare/src/billing.ts @@ -8,10 +8,6 @@ function isBrainModel(model: string): boolean { } function buildPayloadEvent(event: BillingEvent) { - if (!event.org_id) { - console.warn("billing event skipped: missing org_id"); - return null; - } if (!event.model) { console.warn("billing event skipped: missing model"); return null; @@ -20,6 +16,11 @@ function buildPayloadEvent(event: BillingEvent) { if (!isBrainModel(event.model)) { return null; } + + if (!event.org_id) { + console.warn("billing event skipped: missing org_id"); + return null; + } if (!event.resolved_model) { console.warn("billing event skipped: missing resolved_model"); return null;