@@ -190,6 +190,19 @@ export interface SpanLogger {
190190 reportProgress : ( progress : string ) => void ;
191191}
192192
193+ export type BillingEvent = {
194+ event_name : "NativeInferenceTokenUsageEvent" ;
195+ auth_token : string ;
196+ org_id ?: string ;
197+ model ?: string | null ;
198+ resolved_model ?: string | null ;
199+ org_name ?: string ;
200+ input_tokens ?: number ;
201+ output_tokens ?: number ;
202+ cached_input_tokens ?: number ;
203+ cache_write_input_tokens ?: number ;
204+ } ;
205+
193206// This is an isomorphic implementation of proxyV1, which is used by both edge functions
194207// in CloudFlare and by the node proxy (locally and in lambda).
195208export async function proxyV1 ( {
@@ -208,6 +221,8 @@ export async function proxyV1({
208221 cacheKeyOptions = { } ,
209222 decompressFetch = false ,
210223 spanLogger,
224+ billingOrgId,
225+ onBillingEvent,
211226 signal,
212227 fetch = globalThis . fetch ,
213228} : {
@@ -237,6 +252,8 @@ export async function proxyV1({
237252 cacheKeyOptions ?: CacheKeyOptions ;
238253 decompressFetch ?: boolean ;
239254 spanLogger ?: SpanLogger ;
255+ billingOrgId ?: string ;
256+ onBillingEvent ?: ( event : BillingEvent ) => void ;
240257 signal ?: AbortSignal ;
241258 fetch ?: FetchFn ;
242259} ) : Promise < void > {
@@ -299,6 +316,7 @@ export async function proxyV1({
299316 ) ;
300317
301318 let orgName : string | undefined = proxyHeaders [ ORG_NAME_HEADER ] ?? undefined ;
319+ let resolvedOrgName : string | undefined = orgName ;
302320 const projectId : string | undefined =
303321 proxyHeaders [ PROJECT_ID_HEADER ] ?? undefined ;
304322
@@ -649,6 +667,7 @@ export async function proxyV1({
649667
650668 if ( secrets . length > 0 && ! orgName && secrets [ 0 ] . org_name ) {
651669 baseAttributes . org_name = secrets [ 0 ] . org_name ;
670+ resolvedOrgName = secrets [ 0 ] . org_name ;
652671 }
653672 logRequest ( ) ;
654673
@@ -759,6 +778,11 @@ export async function proxyV1({
759778 if ( stream ) {
760779 let first = true ;
761780 const allChunks : Uint8Array [ ] = [ ] ;
781+ let resolvedModel : string | undefined = undefined ;
782+ let inputTokens : number | undefined = undefined ;
783+ let outputTokens : number | undefined = undefined ;
784+ let cachedInputTokens : number | undefined = undefined ;
785+ let cacheWriteInputTokens : number | undefined = undefined ;
762786
763787 // These parameters are for the streaming case
764788 let reasoning : OpenAIReasoning [ ] | undefined = undefined ;
@@ -787,10 +811,20 @@ export async function proxyV1({
787811 | OpenAIChatCompletionChunk
788812 | undefined ;
789813 if ( result ) {
814+ if ( typeof result . model === "string" && result . model ) {
815+ resolvedModel = result . model ;
816+ }
790817 const extendedUsage = completionUsageSchema . safeParse (
791818 result . usage ,
792819 ) ;
793820 if ( extendedUsage . success ) {
821+ inputTokens = extendedUsage . data . prompt_tokens ;
822+ outputTokens = extendedUsage . data . completion_tokens ;
823+ cachedInputTokens =
824+ extendedUsage . data . prompt_tokens_details ?. cached_tokens ;
825+ cacheWriteInputTokens =
826+ extendedUsage . data . prompt_tokens_details
827+ ?. cache_creation_tokens ;
794828 spanLogger ?. log ( {
795829 metrics : {
796830 tokens : extendedUsage . data . total_tokens ,
@@ -978,10 +1012,20 @@ export async function proxyV1({
9781012 case "chat" :
9791013 case "completion" : {
9801014 const data = dataRaw as ChatCompletion ;
1015+ if ( typeof data . model === "string" && data . model ) {
1016+ resolvedModel = data . model ;
1017+ }
9811018 const extendedUsage = completionUsageSchema . safeParse (
9821019 data . usage ,
9831020 ) ;
9841021 if ( extendedUsage . success ) {
1022+ inputTokens = extendedUsage . data . prompt_tokens ;
1023+ outputTokens = extendedUsage . data . completion_tokens ;
1024+ cachedInputTokens =
1025+ extendedUsage . data . prompt_tokens_details ?. cached_tokens ;
1026+ cacheWriteInputTokens =
1027+ extendedUsage . data . prompt_tokens_details
1028+ ?. cache_creation_tokens ;
9851029 spanLogger ?. log ( {
9861030 output : data . choices ,
9871031 metrics : {
@@ -1041,6 +1085,15 @@ export async function proxyV1({
10411085 }
10421086 case "response" : {
10431087 const data = dataRaw as OpenAIResponse ;
1088+ if ( typeof data . model === "string" && data . model ) {
1089+ resolvedModel = data . model ;
1090+ }
1091+ if ( data . usage ) {
1092+ inputTokens = data . usage . input_tokens ;
1093+ outputTokens = data . usage . output_tokens ;
1094+ cachedInputTokens =
1095+ data . usage . input_tokens_details ?. cached_tokens ;
1096+ }
10441097 spanLogger ?. log ( {
10451098 output : data . output ,
10461099 metrics : {
@@ -1089,6 +1142,27 @@ export async function proxyV1({
10891142 } ) ;
10901143
10911144 spanLogger ?. end ( ) ;
1145+ if ( ! responseFailed ) {
1146+ try {
1147+ if ( typeof onBillingEvent !== "function" ) {
1148+ return ;
1149+ }
1150+ onBillingEvent ( {
1151+ event_name : "NativeInferenceTokenUsageEvent" ,
1152+ auth_token : authToken ,
1153+ org_id : billingOrgId ,
1154+ model,
1155+ resolved_model : resolvedModel ,
1156+ org_name : resolvedOrgName ,
1157+ input_tokens : inputTokens ,
1158+ output_tokens : outputTokens ,
1159+ cached_input_tokens : cachedInputTokens ,
1160+ cache_write_input_tokens : cacheWriteInputTokens ,
1161+ } ) ;
1162+ } catch ( error ) {
1163+ console . warn ( "billing callback failed" , error ) ;
1164+ }
1165+ }
10921166 controller . terminate ( ) ;
10931167 } ,
10941168 } ) ;
0 commit comments