From 6e0b264b26b4a166b8acff802ea7e910f57c0a3c Mon Sep 17 00:00:00 2001 From: Alex Z Date: Tue, 10 Feb 2026 15:18:52 -0800 Subject: [PATCH 1/8] get spans --- eval/eval.go | 19 ++++ eval/functions_api.go | 9 ++ eval/task.go | 1 + eval/trace.go | 246 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 275 insertions(+) create mode 100644 eval/trace.go diff --git a/eval/eval.go b/eval/eval.go index 2d4f008..eed600a 100644 --- a/eval/eval.go +++ b/eval/eval.go @@ -232,6 +232,7 @@ type eval[I, R any] struct { scorers []Scorer[I, R] tracer oteltrace.Tracer startSpanOpt oteltrace.SpanStartOption + ensureFlush func() error goroutines int quiet bool } @@ -254,6 +255,7 @@ func newEval[I, R any]( dataset Dataset[I, R], task TaskFunc[I, R], scorers []Scorer[I, R], + ensureFlush func() error, parallelism int, quiet bool, ) *eval[I, R] { @@ -283,6 +285,7 @@ func newEval[I, R any]( scorers: scorers, tracer: tracer, startSpanOpt: startSpanOpt, + ensureFlush: ensureFlush, goroutines: goroutines, quiet: quiet, } @@ -320,6 +323,11 @@ func newEvalOpts[I, R any](ctx context.Context, s *auth.Session, tp *trace.Trace opts.Dataset, opts.Task, opts.Scorers, + func() error { + flushCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + return tp.ForceFlush(flushCtx) + }, opts.Parallelism, opts.Quiet, ), nil @@ -425,6 +433,13 @@ func (e *eval[I, R]) runCase(ctx context.Context, span oteltrace.Span, c Case[I, return err } output := taskResult.Output + taskResult.Trace = newEvalTrace( + e.session, + "experiment", + e.experimentID, + span.SpanContext().SpanID().String(), + e.ensureFlush, + ) _, err = e.runScorers(ctx, taskResult) if err != nil { @@ -523,6 +538,9 @@ func (e *eval[I, R]) runScorers(ctx context.Context, taskResult TaskResult[I, R] if err := setJSONAttr(span, "braintrust.span_attributes", scoreSpanAttrs); err != nil { return nil, err } + if taskResult.Trace == nil { + taskResult.Trace = newTrace() + } var scores []Score @@ -730,6 +748,7 @@ func testNewEval[I, R any]( dataset, task, scorers, + nil, parallelism, true, // quiet=true for tests ) diff --git a/eval/functions_api.go b/eval/functions_api.go index 3dc1126..b3dd186 100644 --- a/eval/functions_api.go +++ b/eval/functions_api.go @@ -115,11 +115,20 @@ func (f *FunctionsAPI[I, R]) Scorer(ctx context.Context, opts FunctionOpts) (Sco // Create a scorer that invokes the function scorerFunc := func(ctx context.Context, result TaskResult[I, R]) (Scores, error) { + trace := result.Trace + if trace == nil { + trace = newTrace() + } + // Build scorer input scorerInput := map[string]any{ "input": result.Input, "output": result.Output, "expected": result.Expected, + "trace": map[string]any{ + "spans": trace.GetSpans(nil), + "thread": trace.GetThread(), + }, } // Invoke the scorer function diff --git a/eval/task.go b/eval/task.go index 77e96e2..2f02dc3 100644 --- a/eval/task.go +++ b/eval/task.go @@ -41,6 +41,7 @@ type TaskResult[I, R any] struct { Expected R // What we expected Output R // What the task actually returned Metadata Metadata // Case metadata + Trace Trace // Eval trace context for scorers // UserData is custom application context from the task. // This field is NOT logged and isn't supported outside the context of the Go SDK. diff --git a/eval/trace.go b/eval/trace.go new file mode 100644 index 0000000..13b896a --- /dev/null +++ b/eval/trace.go @@ -0,0 +1,246 @@ +package eval + +import ( + "context" + "encoding/json" + "fmt" + "sync" + "time" + + "github.com/braintrustdata/braintrust-sdk-go/internal/auth" + "github.com/braintrustdata/braintrust-sdk-go/internal/https" + "github.com/braintrustdata/braintrust-sdk-go/logger" +) + +// JSONObject represents a JSON object for trace payloads. +type JSONObject = map[string]any + +// Trace provides access to trace data for scorers. +type Trace interface { + // GetSpans returns spans for the provided span types. + GetSpans(spanTypes []string) []JSONObject + // GetThread returns thread entries associated with the case. + GetThread() []JSONObject +} + +type noopTrace struct{} + +func newTrace() Trace { + return noopTrace{} +} + +func (t noopTrace) GetSpans(spanTypes []string) []JSONObject { + return []JSONObject{} +} + +func (t noopTrace) GetThread() []JSONObject { + return []JSONObject{} +} + +type traceImpl struct { + objectType string + objectID string + rootSpanID string + + session *auth.Session + ensureSpansFlushed func() error + + flushOnce sync.Once + flushErr error +} + +func newEvalTrace( + session *auth.Session, + objectType string, + objectID string, + rootSpanID string, + ensureSpansFlushed func() error, +) Trace { + return &traceImpl{ + objectType: objectType, + objectID: objectID, + rootSpanID: rootSpanID, + session: session, + ensureSpansFlushed: ensureSpansFlushed, + } +} + +func (t *traceImpl) GetSpans(spanTypes []string) []JSONObject { + if t.objectType == "" || t.objectID == "" || t.rootSpanID == "" || t.session == nil { + return []JSONObject{} + } + + if err := t.ensureSpansReady(); err != nil { + return []JSONObject{} + } + + spans, err := t.fetchSpans(spanTypes) + if err != nil { + return []JSONObject{} + } + return spans +} + +func (t *traceImpl) GetThread() []JSONObject { + // TODO: Add preprocessor-based thread extraction for trace_ref. + return []JSONObject{} +} + +func (t *traceImpl) ensureSpansReady() error { + t.flushOnce.Do(func() { + if t.ensureSpansFlushed == nil { + return + } + t.flushErr = t.ensureSpansFlushed() + }) + return t.flushErr +} + +func (t *traceImpl) fetchSpans(spanTypes []string) ([]JSONObject, error) { + loginCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + _ = t.session.Login(loginCtx) + + apiInfo := t.session.APIInfo() + client := https.NewClient(apiInfo.APIKey, apiInfo.APIURL, logger.Discard()) + + var all []JSONObject + cursor := "" + + for { + reqBody := map[string]any{ + "limit": 1000, + "filter": buildSpanFilter(t.rootSpanID, spanTypes), + } + if cursor != "" { + reqBody["cursor"] = cursor + } + + resp, err := client.POST(context.Background(), fmt.Sprintf("/v1/%s/%s/fetch", t.objectType, t.objectID), reqBody) + if err != nil { + return nil, err + } + + var payload struct { + Events []JSONObject `json:"events"` + Rows []JSONObject `json:"rows"` + Objects []JSONObject `json:"objects"` + Cursor string `json:"cursor"` + } + err = json.NewDecoder(resp.Body).Decode(&payload) + _ = resp.Body.Close() + if err != nil { + return nil, err + } + + rows := payload.Events + if len(rows) == 0 { + rows = payload.Rows + } + if len(rows) == 0 { + rows = payload.Objects + } + + for _, row := range rows { + if isScorerPurpose(row) { + continue + } + all = append(all, projectSpanRow(row)) + } + + if payload.Cursor == "" { + break + } + cursor = payload.Cursor + } + + return all, nil +} + +func buildSpanFilter(rootSpanID string, spanTypeFilter []string) JSONObject { + children := []JSONObject{ + { + "op": "eq", + "left": JSONObject{ + "op": "ident", + "name": []string{"root_span_id"}, + }, + "right": JSONObject{ + "op": "literal", + "value": rootSpanID, + }, + }, + { + "op": "or", + "children": []JSONObject{ + { + "op": "isnull", + "expr": JSONObject{ + "op": "ident", + "name": []string{"span_attributes", "purpose"}, + }, + }, + { + "op": "ne", + "left": JSONObject{ + "op": "ident", + "name": []string{"span_attributes", "purpose"}, + }, + "right": JSONObject{ + "op": "literal", + "value": "scorer", + }, + }, + }, + }, + } + + if len(spanTypeFilter) > 0 { + children = append(children, JSONObject{ + "op": "in", + "left": JSONObject{ + "op": "ident", + "name": []string{"span_attributes", "type"}, + }, + "right": JSONObject{ + "op": "literal", + "value": spanTypeFilter, + }, + }) + } + + return JSONObject{ + "op": "and", + "children": children, + } +} + +func isScorerPurpose(row JSONObject) bool { + attrs, ok := row["span_attributes"].(map[string]any) + if !ok || attrs == nil { + return false + } + purpose, ok := attrs["purpose"].(string) + return ok && purpose == "scorer" +} + +func projectSpanRow(row JSONObject) JSONObject { + out := JSONObject{} + for _, key := range []string{ + "input", + "output", + "metadata", + "span_id", + "span_parents", + "span_attributes", + "id", + "_xact_id", + "_pagination_key", + "root_span_id", + } { + if value, ok := row[key]; ok { + out[key] = value + } + } + return out +} From b8f53fa2185b632983747b07c8f854b8ea13b2d6 Mon Sep 17 00:00:00 2001 From: Alex Z Date: Tue, 17 Feb 2026 16:24:48 -0800 Subject: [PATCH 2/8] getThread --- eval/trace.go | 69 +++++++++++++++++++++++++++++++++-- eval/trace_test.go | 89 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+), 2 deletions(-) create mode 100644 eval/trace_test.go diff --git a/eval/trace.go b/eval/trace.go index 13b896a..1bf7ddd 100644 --- a/eval/trace.go +++ b/eval/trace.go @@ -82,8 +82,19 @@ func (t *traceImpl) GetSpans(spanTypes []string) []JSONObject { } func (t *traceImpl) GetThread() []JSONObject { - // TODO: Add preprocessor-based thread extraction for trace_ref. - return []JSONObject{} + if t.objectType == "" || t.objectID == "" || t.rootSpanID == "" || t.session == nil { + return []JSONObject{} + } + + if err := t.ensureSpansReady(); err != nil { + return []JSONObject{} + } + + thread, err := t.fetchThread() + if err != nil { + return []JSONObject{} + } + return thread } func (t *traceImpl) ensureSpansReady() error { @@ -157,6 +168,60 @@ func (t *traceImpl) fetchSpans(spanTypes []string) ([]JSONObject, error) { return all, nil } +func (t *traceImpl) fetchThread() ([]JSONObject, error) { + loginCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + _ = t.session.Login(loginCtx) + + apiInfo := t.session.APIInfo() + client := https.NewClient(apiInfo.APIKey, apiInfo.APIURL, logger.Discard()) + + reqBody := map[string]any{ + "global_function": "project_default", + "function_type": "preprocessor", + "mode": "json", + "input": map[string]any{ + "trace_ref": map[string]any{ + "object_type": t.objectType, + "object_id": t.objectID, + "root_span_id": t.rootSpanID, + }, + }, + } + + resp, err := client.POST(context.Background(), "/v1/function/invoke", reqBody) + if err != nil { + return nil, err + } + + var payload any + err = json.NewDecoder(resp.Body).Decode(&payload) + _ = resp.Body.Close() + if err != nil { + return nil, err + } + + // The invoke response may be either {"output": ...} or a raw JSON value. + if outputWrapper, ok := payload.(map[string]any); ok { + if output, hasOutput := outputWrapper["output"]; hasOutput { + payload = output + } + } + + values, ok := payload.([]any) + if !ok { + return []JSONObject{}, nil + } + + thread := make([]JSONObject, 0, len(values)) + for _, value := range values { + if item, ok := value.(map[string]any); ok { + thread = append(thread, item) + } + } + return thread, nil +} + func buildSpanFilter(rootSpanID string, spanTypeFilter []string) JSONObject { children := []JSONObject{ { diff --git a/eval/trace_test.go b/eval/trace_test.go new file mode 100644 index 0000000..429e1e4 --- /dev/null +++ b/eval/trace_test.go @@ -0,0 +1,89 @@ +package eval + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/braintrustdata/braintrust-sdk-go/internal/auth" + "github.com/braintrustdata/braintrust-sdk-go/logger" +) + +func TestTrace_GetThread_ReturnsThreadFromPreprocessorInvoke(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, http.MethodPost, r.Method) + require.Equal(t, "/v1/function/invoke", r.URL.Path) + + var body map[string]any + require.NoError(t, json.NewDecoder(r.Body).Decode(&body)) + + assert.Equal(t, "project_default", body["global_function"]) + assert.Equal(t, "preprocessor", body["function_type"]) + assert.Equal(t, "json", body["mode"]) + + input, ok := body["input"].(map[string]any) + require.True(t, ok) + + traceRef, ok := input["trace_ref"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "experiment", traceRef["object_type"]) + assert.Equal(t, "obj-123", traceRef["object_id"]) + assert.Equal(t, "root-456", traceRef["root_span_id"]) + + require.NoError(t, json.NewEncoder(w).Encode(map[string]any{ + "output": []map[string]any{ + {"role": "system", "content": "hello"}, + {"role": "user", "content": "hi"}, + }, + })) + })) + defer server.Close() + + session := auth.NewTestSession( + "test-key", + "org-id", + "org-name", + server.URL, + server.URL, + server.URL, + logger.Discard(), + ) + + trace := newEvalTrace(session, "experiment", "obj-123", "root-456", func() error { return nil }) + + thread := trace.GetThread() + require.Len(t, thread, 2) + assert.Equal(t, "system", thread[0]["role"]) + assert.Equal(t, "user", thread[1]["role"]) +} + +func TestTrace_GetThread_ReturnsEmptyForNonArrayOutput(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.NoError(t, json.NewEncoder(w).Encode(map[string]any{ + "output": map[string]any{"not": "an array"}, + })) + })) + defer server.Close() + + session := auth.NewTestSession( + "test-key", + "org-id", + "org-name", + server.URL, + server.URL, + server.URL, + logger.Discard(), + ) + + trace := newEvalTrace(session, "experiment", "obj-123", "root-456", func() error { return nil }) + + assert.Empty(t, trace.GetThread()) +} From e6560985c3675e83105c7dc3243f6982e4cc7108 Mon Sep 17 00:00:00 2001 From: Alex Z Date: Wed, 18 Feb 2026 14:38:47 -0800 Subject: [PATCH 3/8] endpoint fix and temp logging --- eval/trace.go | 88 +++++++++++++++++++++++++++++++++++----- eval/trace_test.go | 3 +- internal/https/client.go | 3 ++ 3 files changed, 83 insertions(+), 11 deletions(-) diff --git a/eval/trace.go b/eval/trace.go index 1bf7ddd..86c9332 100644 --- a/eval/trace.go +++ b/eval/trace.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "io" "sync" "time" @@ -169,6 +170,7 @@ func (t *traceImpl) fetchSpans(spanTypes []string) ([]JSONObject, error) { } func (t *traceImpl) fetchThread() ([]JSONObject, error) { + fmt.Printf("\n=== fetchThread start ===\n") loginCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() _ = t.session.Login(loginCtx) @@ -188,15 +190,9 @@ func (t *traceImpl) fetchThread() ([]JSONObject, error) { }, }, } - - resp, err := client.POST(context.Background(), "/v1/function/invoke", reqBody) - if err != nil { - return nil, err - } - - var payload any - err = json.NewDecoder(resp.Body).Decode(&payload) - _ = resp.Body.Close() + reqJSON, _ := json.MarshalIndent(reqBody, "", " ") + fmt.Printf("request body:\n%s\n", string(reqJSON)) + payload, err := t.invokeThreadEndpoint(client, "/function/invoke", reqBody) if err != nil { return nil, err } @@ -205,23 +201,95 @@ func (t *traceImpl) fetchThread() ([]JSONObject, error) { if outputWrapper, ok := payload.(map[string]any); ok { if output, hasOutput := outputWrapper["output"]; hasOutput { payload = output + fmt.Printf("payload.output:\n") + fmt.Printf(" type: %T\n", payload) + fmt.Printf(" value: %#v\n", payload) + } else { + fmt.Printf("payload object missing output key:\n") + fmt.Printf(" keys: %v\n", keys(outputWrapper)) } } values, ok := payload.([]any) if !ok { + fmt.Printf("payload is not array:\n") + fmt.Printf(" type: %T\n", payload) + fmt.Printf(" value: %#v\n", payload) return []JSONObject{}, nil } thread := make([]JSONObject, 0, len(values)) - for _, value := range values { + for i, value := range values { if item, ok := value.(map[string]any); ok { thread = append(thread, item) + } else { + fmt.Printf("skipping non-object thread item:\n") + fmt.Printf(" index: %d\n", i) + fmt.Printf(" type: %T\n", value) + fmt.Printf(" value: %#v\n", value) } } + fmt.Printf("thread result:\n") + fmt.Printf(" thread_len: %d\n", len(thread)) + fmt.Printf(" raw_array_len: %d\n", len(values)) + fmt.Printf("=== fetchThread end ===\n") return thread, nil } +func (t *traceImpl) invokeThreadEndpoint(client *https.Client, path string, reqBody map[string]any) (any, error) { + fmt.Printf("request path: %s\n", path) + resp, err := client.POST(context.Background(), path, reqBody) + if err != nil { + fmt.Printf("invoke failed:\n") + fmt.Printf(" path: %s\n", path) + fmt.Printf(" object_type: %s\n", t.objectType) + fmt.Printf(" object_id: %s\n", t.objectID) + fmt.Printf(" root_span_id: %s\n", t.rootSpanID) + fmt.Printf(" error: %v\n", err) + return nil, err + } + + fmt.Printf("response status (%s): %d\n", path, resp.StatusCode) + fmt.Printf("response headers (%s):\n%v\n", path, resp.Header) + bodyBytes, readErr := io.ReadAll(resp.Body) + _ = resp.Body.Close() + if readErr != nil { + fmt.Printf("read body failed:\n") + fmt.Printf(" path: %s\n", path) + fmt.Printf(" object_type: %s\n", t.objectType) + fmt.Printf(" object_id: %s\n", t.objectID) + fmt.Printf(" root_span_id: %s\n", t.rootSpanID) + fmt.Printf(" error: %v\n", readErr) + return nil, readErr + } + fmt.Printf("response body (%s):\n%s\n", path, string(bodyBytes)) + + var payload any + err = json.Unmarshal(bodyBytes, &payload) + if err != nil { + fmt.Printf("decode failed:\n") + fmt.Printf(" path: %s\n", path) + fmt.Printf(" object_type: %s\n", t.objectType) + fmt.Printf(" object_id: %s\n", t.objectID) + fmt.Printf(" root_span_id: %s\n", t.rootSpanID) + fmt.Printf(" error: %v\n", err) + fmt.Printf(" raw: %s\n", string(bodyBytes)) + return nil, err + } + fmt.Printf("parsed payload (%s):\n", path) + fmt.Printf(" type: %T\n", payload) + fmt.Printf(" value: %#v\n", payload) + return payload, nil +} + +func keys(m map[string]any) []string { + out := make([]string, 0, len(m)) + for k := range m { + out = append(out, k) + } + return out +} + func buildSpanFilter(rootSpanID string, spanTypeFilter []string) JSONObject { children := []JSONObject{ { diff --git a/eval/trace_test.go b/eval/trace_test.go index 429e1e4..eeef0cd 100644 --- a/eval/trace_test.go +++ b/eval/trace_test.go @@ -18,7 +18,8 @@ func TestTrace_GetThread_ReturnsThreadFromPreprocessorInvoke(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { require.Equal(t, http.MethodPost, r.Method) - require.Equal(t, "/v1/function/invoke", r.URL.Path) + require.Equal(t, "/function/invoke", r.URL.Path) + assert.Equal(t, "application/json", r.Header.Get("Accept")) var body map[string]any require.NoError(t, json.NewDecoder(r.Body).Decode(&body)) diff --git a/internal/https/client.go b/internal/https/client.go index 6d69fad..13ad0ed 100644 --- a/internal/https/client.go +++ b/internal/https/client.go @@ -157,6 +157,9 @@ func (c *Client) Client() *http.Client { func (c *Client) doRequest(req *http.Request) (*http.Response, error) { // Add auth header req.Header.Set("Authorization", "Bearer "+c.apiKey) + if req.Header.Get("Accept") == "" { + req.Header.Set("Accept", "application/json") + } // Log request start := time.Now() From 36f3995a426e5fff0ad5fa225975505fe7ba5374 Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Fri, 20 Feb 2026 17:09:15 -0500 Subject: [PATCH 4/8] Fix eval trace thread retrieval and move trace API calls behind API client --- api/client.go | 6 + api/functions/functions.go | 65 +++++++---- api/functions/functions_test.go | 96 ++++++++++++++++ api/functions/types.go | 8 ++ api/objects/objects.go | 42 +++++++ api/objects/objects_test.go | 65 +++++++++++ api/objects/types.go | 17 +++ eval/eval.go | 19 ++- eval/eval_test.go | 73 ++++++++++++ eval/trace.go | 153 ++++--------------------- eval/trace_test.go | 30 ++++- examples/internal/README.md | 1 + examples/internal/trace-scorer/main.go | 90 +++++++++++++++ internal/auth/auth_test.go | 19 +++ internal/auth/session.go | 13 +++ 15 files changed, 538 insertions(+), 159 deletions(-) create mode 100644 api/objects/objects.go create mode 100644 api/objects/objects_test.go create mode 100644 api/objects/types.go create mode 100644 examples/internal/trace-scorer/main.go diff --git a/api/client.go b/api/client.go index 49f9afa..be3f63f 100644 --- a/api/client.go +++ b/api/client.go @@ -5,6 +5,7 @@ import ( "github.com/braintrustdata/braintrust-sdk-go/api/datasets" "github.com/braintrustdata/braintrust-sdk-go/api/experiments" "github.com/braintrustdata/braintrust-sdk-go/api/functions" + "github.com/braintrustdata/braintrust-sdk-go/api/objects" "github.com/braintrustdata/braintrust-sdk-go/api/projects" "github.com/braintrustdata/braintrust-sdk-go/internal/https" "github.com/braintrustdata/braintrust-sdk-go/logger" @@ -86,3 +87,8 @@ func (a *API) Datasets() *datasets.API { func (a *API) Functions() *functions.API { return functions.New(a.client) } + +// Objects is used to access generic object APIs (e.g. /v1/{object_type}/{id}/fetch). +func (a *API) Objects() *objects.API { + return objects.New(a.client) +} diff --git a/api/functions/functions.go b/api/functions/functions.go index 8806ff9..2ded42e 100644 --- a/api/functions/functions.go +++ b/api/functions/functions.go @@ -3,6 +3,7 @@ package functions import ( "context" "encoding/json" + "errors" "fmt" "io" @@ -102,50 +103,70 @@ func (a *API) Invoke(ctx context.Context, functionID string, input any) (any, er } path := fmt.Sprintf("/v1/function/%s/invoke", functionID) + return a.invokePath(ctx, path, req) +} + +// InvokeGlobal calls a global function by slug/type and returns the output. +func (a *API) InvokeGlobal(ctx context.Context, req InvokeGlobalParams) (any, error) { + if req.GlobalFunction == "" { + return nil, fmt.Errorf("global function is required") + } + + out, err := a.invokePath(ctx, "/function/invoke", req) + if err == nil { + return out, nil + } + + var httpErr *https.HTTPError + if errors.As(err, &httpErr) && httpErr.StatusCode == 404 { + return a.invokePath(ctx, "/v1/function/invoke", req) + } + return nil, err +} + +// Delete deletes a function by ID. +func (a *API) Delete(ctx context.Context, functionID string) error { + if functionID == "" { + return fmt.Errorf("function ID is required") + } + + path := fmt.Sprintf("/v1/function/%s", functionID) + resp, err := a.client.DELETE(ctx, path) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + + return nil +} + +func (a *API) invokePath(ctx context.Context, path string, req any) (any, error) { resp, err := a.client.POST(ctx, path, req) if err != nil { return nil, err } defer func() { _ = resp.Body.Close() }() - // Read the entire response body so we can parse it multiple ways body, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("failed to read response body: %w", err) } - // Parse response - try as object first, then as raw value + return decodeInvokeResponse(body) +} + +func decodeInvokeResponse(body []byte) (any, error) { var response map[string]any if err := json.Unmarshal(body, &response); err == nil { - // Response is an object, extract output field if present if output, ok := response["output"]; ok { return output, nil } - // If no output field, return the whole object return response, nil } - // Response is not an object, try parsing as raw JSON value (string, number, etc.) var output any if err := json.Unmarshal(body, &output); err != nil { return nil, fmt.Errorf("failed to decode response: %w", err) } - return output, nil } - -// Delete deletes a function by ID. -func (a *API) Delete(ctx context.Context, functionID string) error { - if functionID == "" { - return fmt.Errorf("function ID is required") - } - - path := fmt.Sprintf("/v1/function/%s", functionID) - resp, err := a.client.DELETE(ctx, path) - if err != nil { - return err - } - defer func() { _ = resp.Body.Close() }() - - return nil -} diff --git a/api/functions/functions_test.go b/api/functions/functions_test.go index 4810db4..636f9dd 100644 --- a/api/functions/functions_test.go +++ b/api/functions/functions_test.go @@ -2,6 +2,9 @@ package functions import ( "context" + "encoding/json" + "net/http" + "net/http/httptest" "testing" "github.com/stretchr/testify/assert" @@ -10,6 +13,7 @@ import ( "github.com/braintrustdata/braintrust-sdk-go/api/projects" "github.com/braintrustdata/braintrust-sdk-go/internal/https" "github.com/braintrustdata/braintrust-sdk-go/internal/vcr" + "github.com/braintrustdata/braintrust-sdk-go/logger" ) const integrationTestProject = "go-sdk-tests" @@ -318,3 +322,95 @@ func TestFunctions_Invoke_Validation(t *testing.T) { require.Error(t, err) assert.Contains(t, err.Error(), "required") } + +func TestFunctions_InvokeGlobal_PostsExpectedPayload(t *testing.T) { + t.Parallel() + + ctx := context.Background() + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, http.MethodPost, r.Method) + require.Equal(t, "/function/invoke", r.URL.Path) + assert.Equal(t, "application/json", r.Header.Get("Accept")) + + var body map[string]any + require.NoError(t, json.NewDecoder(r.Body).Decode(&body)) + assert.Equal(t, "project_default", body["global_function"]) + assert.Equal(t, "preprocessor", body["function_type"]) + assert.Equal(t, "json", body["mode"]) + + input, ok := body["input"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "abc", input["x"]) + + require.NoError(t, json.NewEncoder(w).Encode(map[string]any{ + "output": []map[string]any{ + {"role": "system", "content": "hello"}, + }, + })) + })) + defer server.Close() + + client := New(https.NewClient("test-key", server.URL, logger.Discard())) + + output, err := client.InvokeGlobal(ctx, InvokeGlobalParams{ + GlobalFunction: "project_default", + FunctionType: "preprocessor", + Mode: "json", + Input: map[string]any{"x": "abc"}, + }) + require.NoError(t, err) + + values, ok := output.([]any) + require.True(t, ok) + require.Len(t, values, 1) + + first, ok := values[0].(map[string]any) + require.True(t, ok) + assert.Equal(t, "system", first["role"]) +} + +func TestFunctions_InvokeGlobal_FallbackToV1On404(t *testing.T) { + t.Parallel() + + ctx := context.Background() + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/function/invoke": + http.NotFound(w, r) + case "/v1/function/invoke": + require.NoError(t, json.NewEncoder(w).Encode(map[string]any{ + "output": []map[string]any{ + {"role": "user", "content": "fallback"}, + }, + })) + default: + t.Fatalf("unexpected path: %s", r.URL.Path) + } + })) + defer server.Close() + + client := New(https.NewClient("test-key", server.URL, logger.Discard())) + + output, err := client.InvokeGlobal(ctx, InvokeGlobalParams{ + GlobalFunction: "project_default", + FunctionType: "preprocessor", + Mode: "json", + Input: map[string]any{"x": "abc"}, + }) + require.NoError(t, err) + + values, ok := output.([]any) + require.True(t, ok) + require.Len(t, values, 1) +} + +func TestFunctions_InvokeGlobal_Validation(t *testing.T) { + t.Parallel() + + ctx := context.Background() + client := New(https.NewClient("test-key", "https://example.com", logger.Discard())) + + _, err := client.InvokeGlobal(ctx, InvokeGlobalParams{}) + require.Error(t, err) + assert.Contains(t, err.Error(), "required") +} diff --git a/api/functions/types.go b/api/functions/types.go index f2866ee..4e24d46 100644 --- a/api/functions/types.go +++ b/api/functions/types.go @@ -52,6 +52,14 @@ type InvokeParams struct { Input any `json:"input"` } +// InvokeGlobalParams represents the request payload for invoking a global function. +type InvokeGlobalParams struct { + GlobalFunction string `json:"global_function"` + FunctionType string `json:"function_type,omitempty"` + Mode string `json:"mode,omitempty"` + Input any `json:"input,omitempty"` +} + // QueryResponse represents the response from querying functions. type QueryResponse struct { Objects []Function `json:"objects"` diff --git a/api/objects/objects.go b/api/objects/objects.go new file mode 100644 index 0000000..fcfd489 --- /dev/null +++ b/api/objects/objects.go @@ -0,0 +1,42 @@ +package objects + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/braintrustdata/braintrust-sdk-go/internal/https" +) + +// API provides methods for generic object operations. +type API struct { + client *https.Client +} + +// New creates a new objects API client. +func New(client *https.Client) *API { + return &API{client: client} +} + +// Fetch retrieves rows from a given object type and ID. +func (a *API) Fetch(ctx context.Context, objectType, objectID string, params FetchParams) (*FetchResponse, error) { + if objectType == "" { + return nil, fmt.Errorf("object type is required") + } + if objectID == "" { + return nil, fmt.Errorf("object ID is required") + } + + path := fmt.Sprintf("/v1/%s/%s/fetch", objectType, objectID) + resp, err := a.client.POST(ctx, path, params) + if err != nil { + return nil, err + } + defer func() { _ = resp.Body.Close() }() + + var out FetchResponse + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return nil, fmt.Errorf("error decoding response: %w", err) + } + return &out, nil +} diff --git a/api/objects/objects_test.go b/api/objects/objects_test.go new file mode 100644 index 0000000..b31a52f --- /dev/null +++ b/api/objects/objects_test.go @@ -0,0 +1,65 @@ +package objects + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/braintrustdata/braintrust-sdk-go/internal/https" + "github.com/braintrustdata/braintrust-sdk-go/logger" +) + +func TestObjects_Fetch_PostsExpectedRequest(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, http.MethodPost, r.Method) + require.Equal(t, "/v1/experiment/exp-123/fetch", r.URL.Path) + + var body map[string]any + require.NoError(t, json.NewDecoder(r.Body).Decode(&body)) + assert.Equal(t, float64(1000), body["limit"]) + + filter, ok := body["filter"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "and", filter["op"]) + + require.NoError(t, json.NewEncoder(w).Encode(FetchResponse{ + Events: []map[string]any{{"id": "row-1"}}, + Cursor: "next", + })) + })) + defer server.Close() + + api := New(https.NewClient("test-key", server.URL, logger.Discard())) + resp, err := api.Fetch(context.Background(), "experiment", "exp-123", FetchParams{ + Limit: 1000, + Filter: map[string]any{ + "op": "and", + }, + }) + require.NoError(t, err) + require.NotNil(t, resp) + require.Len(t, resp.Events, 1) + assert.Equal(t, "row-1", resp.Events[0]["id"]) + assert.Equal(t, "next", resp.Cursor) +} + +func TestObjects_Fetch_Validation(t *testing.T) { + t.Parallel() + + api := New(https.NewClient("test-key", "https://example.com", logger.Discard())) + + _, err := api.Fetch(context.Background(), "", "obj-1", FetchParams{}) + require.Error(t, err) + assert.Contains(t, err.Error(), "object type is required") + + _, err = api.Fetch(context.Background(), "experiment", "", FetchParams{}) + require.Error(t, err) + assert.Contains(t, err.Error(), "object ID is required") +} diff --git a/api/objects/types.go b/api/objects/types.go new file mode 100644 index 0000000..f15dc65 --- /dev/null +++ b/api/objects/types.go @@ -0,0 +1,17 @@ +// Package objects provides generic object operations across Braintrust object types. +package objects + +// FetchParams represents request parameters for object fetch. +type FetchParams struct { + Limit int `json:"limit,omitempty"` + Filter map[string]any `json:"filter,omitempty"` + Cursor string `json:"cursor,omitempty"` +} + +// FetchResponse is the response from an object fetch request. +type FetchResponse struct { + Events []map[string]any `json:"events"` + Rows []map[string]any `json:"rows"` + Objects []map[string]any `json:"objects"` + Cursor string `json:"cursor"` +} diff --git a/eval/eval.go b/eval/eval.go index eed600a..1ed6066 100644 --- a/eval/eval.go +++ b/eval/eval.go @@ -230,6 +230,7 @@ type eval[I, R any] struct { datasetID string // For origin.object_id task TaskFunc[I, R] scorers []Scorer[I, R] + apiClient *api.API tracer oteltrace.Tracer startSpanOpt oteltrace.SpanStartOption ensureFlush func() error @@ -259,6 +260,11 @@ func newEval[I, R any]( parallelism int, quiet bool, ) *eval[I, R] { + var traceAPI *api.API + if s != nil { + traceAPI = s.API() + } + // Build parent span option parent := bttrace.NewParent(bttrace.ParentTypeExperimentID, experimentID) startSpanOpt := oteltrace.WithAttributes(parent.Attr()) @@ -283,6 +289,7 @@ func newEval[I, R any]( datasetID: datasetID, task: task, scorers: scorers, + apiClient: traceAPI, tracer: tracer, startSpanOpt: startSpanOpt, ensureFlush: ensureFlush, @@ -434,10 +441,10 @@ func (e *eval[I, R]) runCase(ctx context.Context, span oteltrace.Span, c Case[I, } output := taskResult.Output taskResult.Trace = newEvalTrace( - e.session, + e.apiClient, "experiment", e.experimentID, - span.SpanContext().SpanID().String(), + rootSpanIDFromSpan(span), e.ensureFlush, ) @@ -474,6 +481,14 @@ func (e *eval[I, R]) runCase(ctx context.Context, span oteltrace.Span, c Case[I, return setJSONAttrs(span, meta) } +func rootSpanIDFromSpan(span oteltrace.Span) string { + sc := span.SpanContext() + if sc.TraceID().IsValid() { + return sc.TraceID().String() + } + return sc.SpanID().String() +} + // runTask executes the task function and creates a task span. // Returns a TaskResult containing all task execution data. func (e *eval[I, R]) runTask(ctx context.Context, evalSpan oteltrace.Span, c Case[I, R]) (TaskResult[I, R], error) { diff --git a/eval/eval_test.go b/eval/eval_test.go index 8bac1d4..68fe2b2 100644 --- a/eval/eval_test.go +++ b/eval/eval_test.go @@ -2,8 +2,11 @@ package eval import ( "context" + "encoding/json" "errors" "io" + "net/http" + "net/http/httptest" "testing" "github.com/stretchr/testify/assert" @@ -11,8 +14,10 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" + "github.com/braintrustdata/braintrust-sdk-go/internal/auth" "github.com/braintrustdata/braintrust-sdk-go/internal/oteltest" "github.com/braintrustdata/braintrust-sdk-go/internal/tests" + "github.com/braintrustdata/braintrust-sdk-go/logger" "github.com/braintrustdata/braintrust-sdk-go/trace" ) @@ -257,6 +262,74 @@ func TestNewEval_DefaultParallelism(t *testing.T) { assert.Equal(t, 1, ute2.eval.goroutines) } +func TestEval_Run_TraceRefUsesRootTraceID(t *testing.T) { + t.Parallel() + + var gotRootSpanID string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, http.MethodPost, r.Method) + require.Equal(t, "/function/invoke", r.URL.Path) + + var body map[string]any + require.NoError(t, json.NewDecoder(r.Body).Decode(&body)) + + input, ok := body["input"].(map[string]any) + require.True(t, ok) + traceRef, ok := input["trace_ref"].(map[string]any) + require.True(t, ok) + gotRootSpanID, _ = traceRef["root_span_id"].(string) + + require.NoError(t, json.NewEncoder(w).Encode([]map[string]any{ + {"role": "user", "content": "hello"}, + })) + })) + defer server.Close() + + tp, _ := oteltest.Setup(t) + tracer := tp.Tracer(t.Name()) + session := auth.NewTestSession( + "test-key", + "org-id", + "org-name", + server.URL, + server.URL, + server.URL, + logger.Discard(), + ) + + cases := NewDataset([]Case[testInput, testOutput]{ + { + Input: testInput{Value: "abc"}, + Expected: testOutput{Result: "output-abc"}, + }, + }) + task := T(func(ctx context.Context, input testInput) (testOutput, error) { + return testOutput{Result: "output-" + input.Value}, nil + }) + scorer := NewScorer("thread", func(ctx context.Context, result TaskResult[testInput, testOutput]) (Scores, error) { + _ = result.Trace.GetThread() + return S(1), nil + }) + + e := testNewEval( + session, + tracer, + "exp-123", + "test-exp", + "proj-123", + "test-proj", + cases, + task, + []Scorer[testInput, testOutput]{scorer}, + 1, + ) + + _, err := e.run(context.Background()) + require.NoError(t, err) + require.NotEmpty(t, gotRootSpanID) + assert.Regexp(t, "^[0-9a-f]{32}$", gotRootSpanID, "root_span_id should use trace/root ID format") +} + func TestEval_Run_TaskError(t *testing.T) { t.Parallel() diff --git a/eval/trace.go b/eval/trace.go index 86c9332..0775429 100644 --- a/eval/trace.go +++ b/eval/trace.go @@ -2,15 +2,11 @@ package eval import ( "context" - "encoding/json" - "fmt" - "io" "sync" - "time" - "github.com/braintrustdata/braintrust-sdk-go/internal/auth" - "github.com/braintrustdata/braintrust-sdk-go/internal/https" - "github.com/braintrustdata/braintrust-sdk-go/logger" + "github.com/braintrustdata/braintrust-sdk-go/api" + functionsapi "github.com/braintrustdata/braintrust-sdk-go/api/functions" + "github.com/braintrustdata/braintrust-sdk-go/api/objects" ) // JSONObject represents a JSON object for trace payloads. @@ -43,7 +39,7 @@ type traceImpl struct { objectID string rootSpanID string - session *auth.Session + apiClient *api.API ensureSpansFlushed func() error flushOnce sync.Once @@ -51,7 +47,7 @@ type traceImpl struct { } func newEvalTrace( - session *auth.Session, + apiClient *api.API, objectType string, objectID string, rootSpanID string, @@ -61,13 +57,13 @@ func newEvalTrace( objectType: objectType, objectID: objectID, rootSpanID: rootSpanID, - session: session, + apiClient: apiClient, ensureSpansFlushed: ensureSpansFlushed, } } func (t *traceImpl) GetSpans(spanTypes []string) []JSONObject { - if t.objectType == "" || t.objectID == "" || t.rootSpanID == "" || t.session == nil { + if t.objectType == "" || t.objectID == "" || t.rootSpanID == "" || t.apiClient == nil { return []JSONObject{} } @@ -83,7 +79,7 @@ func (t *traceImpl) GetSpans(spanTypes []string) []JSONObject { } func (t *traceImpl) GetThread() []JSONObject { - if t.objectType == "" || t.objectID == "" || t.rootSpanID == "" || t.session == nil { + if t.objectType == "" || t.objectID == "" || t.rootSpanID == "" || t.apiClient == nil { return []JSONObject{} } @@ -109,38 +105,19 @@ func (t *traceImpl) ensureSpansReady() error { } func (t *traceImpl) fetchSpans(spanTypes []string) ([]JSONObject, error) { - loginCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - _ = t.session.Login(loginCtx) - - apiInfo := t.session.APIInfo() - client := https.NewClient(apiInfo.APIKey, apiInfo.APIURL, logger.Discard()) - var all []JSONObject cursor := "" for { - reqBody := map[string]any{ - "limit": 1000, - "filter": buildSpanFilter(t.rootSpanID, spanTypes), + req := objects.FetchParams{ + Limit: 1000, + Filter: buildSpanFilter(t.rootSpanID, spanTypes), } if cursor != "" { - reqBody["cursor"] = cursor + req.Cursor = cursor } - resp, err := client.POST(context.Background(), fmt.Sprintf("/v1/%s/%s/fetch", t.objectType, t.objectID), reqBody) - if err != nil { - return nil, err - } - - var payload struct { - Events []JSONObject `json:"events"` - Rows []JSONObject `json:"rows"` - Objects []JSONObject `json:"objects"` - Cursor string `json:"cursor"` - } - err = json.NewDecoder(resp.Body).Decode(&payload) - _ = resp.Body.Close() + payload, err := t.apiClient.Objects().Fetch(context.Background(), t.objectType, t.objectID, req) if err != nil { return nil, err } @@ -170,126 +147,36 @@ func (t *traceImpl) fetchSpans(spanTypes []string) ([]JSONObject, error) { } func (t *traceImpl) fetchThread() ([]JSONObject, error) { - fmt.Printf("\n=== fetchThread start ===\n") - loginCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - _ = t.session.Login(loginCtx) - - apiInfo := t.session.APIInfo() - client := https.NewClient(apiInfo.APIKey, apiInfo.APIURL, logger.Discard()) - - reqBody := map[string]any{ - "global_function": "project_default", - "function_type": "preprocessor", - "mode": "json", - "input": map[string]any{ + payload, err := t.apiClient.Functions().InvokeGlobal(context.Background(), functionsapi.InvokeGlobalParams{ + GlobalFunction: "project_default", + FunctionType: "preprocessor", + Mode: "json", + Input: map[string]any{ "trace_ref": map[string]any{ "object_type": t.objectType, "object_id": t.objectID, "root_span_id": t.rootSpanID, }, }, - } - reqJSON, _ := json.MarshalIndent(reqBody, "", " ") - fmt.Printf("request body:\n%s\n", string(reqJSON)) - payload, err := t.invokeThreadEndpoint(client, "/function/invoke", reqBody) + }) if err != nil { return nil, err } - // The invoke response may be either {"output": ...} or a raw JSON value. - if outputWrapper, ok := payload.(map[string]any); ok { - if output, hasOutput := outputWrapper["output"]; hasOutput { - payload = output - fmt.Printf("payload.output:\n") - fmt.Printf(" type: %T\n", payload) - fmt.Printf(" value: %#v\n", payload) - } else { - fmt.Printf("payload object missing output key:\n") - fmt.Printf(" keys: %v\n", keys(outputWrapper)) - } - } - values, ok := payload.([]any) if !ok { - fmt.Printf("payload is not array:\n") - fmt.Printf(" type: %T\n", payload) - fmt.Printf(" value: %#v\n", payload) return []JSONObject{}, nil } thread := make([]JSONObject, 0, len(values)) - for i, value := range values { + for _, value := range values { if item, ok := value.(map[string]any); ok { thread = append(thread, item) - } else { - fmt.Printf("skipping non-object thread item:\n") - fmt.Printf(" index: %d\n", i) - fmt.Printf(" type: %T\n", value) - fmt.Printf(" value: %#v\n", value) } } - fmt.Printf("thread result:\n") - fmt.Printf(" thread_len: %d\n", len(thread)) - fmt.Printf(" raw_array_len: %d\n", len(values)) - fmt.Printf("=== fetchThread end ===\n") return thread, nil } -func (t *traceImpl) invokeThreadEndpoint(client *https.Client, path string, reqBody map[string]any) (any, error) { - fmt.Printf("request path: %s\n", path) - resp, err := client.POST(context.Background(), path, reqBody) - if err != nil { - fmt.Printf("invoke failed:\n") - fmt.Printf(" path: %s\n", path) - fmt.Printf(" object_type: %s\n", t.objectType) - fmt.Printf(" object_id: %s\n", t.objectID) - fmt.Printf(" root_span_id: %s\n", t.rootSpanID) - fmt.Printf(" error: %v\n", err) - return nil, err - } - - fmt.Printf("response status (%s): %d\n", path, resp.StatusCode) - fmt.Printf("response headers (%s):\n%v\n", path, resp.Header) - bodyBytes, readErr := io.ReadAll(resp.Body) - _ = resp.Body.Close() - if readErr != nil { - fmt.Printf("read body failed:\n") - fmt.Printf(" path: %s\n", path) - fmt.Printf(" object_type: %s\n", t.objectType) - fmt.Printf(" object_id: %s\n", t.objectID) - fmt.Printf(" root_span_id: %s\n", t.rootSpanID) - fmt.Printf(" error: %v\n", readErr) - return nil, readErr - } - fmt.Printf("response body (%s):\n%s\n", path, string(bodyBytes)) - - var payload any - err = json.Unmarshal(bodyBytes, &payload) - if err != nil { - fmt.Printf("decode failed:\n") - fmt.Printf(" path: %s\n", path) - fmt.Printf(" object_type: %s\n", t.objectType) - fmt.Printf(" object_id: %s\n", t.objectID) - fmt.Printf(" root_span_id: %s\n", t.rootSpanID) - fmt.Printf(" error: %v\n", err) - fmt.Printf(" raw: %s\n", string(bodyBytes)) - return nil, err - } - fmt.Printf("parsed payload (%s):\n", path) - fmt.Printf(" type: %T\n", payload) - fmt.Printf(" value: %#v\n", payload) - return payload, nil -} - -func keys(m map[string]any) []string { - out := make([]string, 0, len(m)) - for k := range m { - out = append(out, k) - } - return out -} - func buildSpanFilter(rootSpanID string, spanTypeFilter []string) JSONObject { children := []JSONObject{ { diff --git a/eval/trace_test.go b/eval/trace_test.go index eeef0cd..62d559c 100644 --- a/eval/trace_test.go +++ b/eval/trace_test.go @@ -55,8 +55,9 @@ func TestTrace_GetThread_ReturnsThreadFromPreprocessorInvoke(t *testing.T) { server.URL, logger.Discard(), ) + apiClient := session.API() - trace := newEvalTrace(session, "experiment", "obj-123", "root-456", func() error { return nil }) + trace := newEvalTrace(apiClient, "experiment", "obj-123", "root-456", func() error { return nil }) thread := trace.GetThread() require.Len(t, thread, 2) @@ -83,8 +84,33 @@ func TestTrace_GetThread_ReturnsEmptyForNonArrayOutput(t *testing.T) { server.URL, logger.Discard(), ) + apiClient := session.API() - trace := newEvalTrace(session, "experiment", "obj-123", "root-456", func() error { return nil }) + trace := newEvalTrace(apiClient, "experiment", "obj-123", "root-456", func() error { return nil }) assert.Empty(t, trace.GetThread()) } + +func TestTrace_GetThread_ReturnsEmptyForNullOutput(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, err := w.Write([]byte("null")) + require.NoError(t, err) + })) + defer server.Close() + + session := auth.NewTestSession( + "test-key", + "org-id", + "org-name", + server.URL, + server.URL, + server.URL, + logger.Discard(), + ) + apiClient := session.API() + + trace := newEvalTrace(apiClient, "experiment", "obj-123", "root-456", func() error { return nil }) + assert.Empty(t, trace.GetThread()) +} diff --git a/examples/internal/README.md b/examples/internal/README.md index dfa4a5b..91c13a6 100644 --- a/examples/internal/README.md +++ b/examples/internal/README.md @@ -26,6 +26,7 @@ Comprehensive examples testing all features for each AI provider: - **[langchaingo-anthropic/](langchaingo-anthropic/)** - LangChainGo with Anthropic provider (uses forked langchaingo) - **[functions/](functions/)** - Functions API usage (loading tasks/scorers with FunctionOpts) +- **[trace-scorer/](trace-scorer/)** - Accessing per-case trace spans/thread in scorer logic - **[rewrite/](rewrite/)** - Manual tracing and evaluator API testing - **[email-evals/](email-evals/)** - Realistic eval example with complex scoring - **[eval-updates/](eval-updates/)** - Testing Update option for appending to experiments diff --git a/examples/internal/trace-scorer/main.go b/examples/internal/trace-scorer/main.go new file mode 100644 index 0000000..9e06ce7 --- /dev/null +++ b/examples/internal/trace-scorer/main.go @@ -0,0 +1,90 @@ +package main + +import ( + "context" + "log" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/trace" + + braintrust "github.com/braintrustdata/braintrust-sdk-go" + "github.com/braintrustdata/braintrust-sdk-go/eval" +) + +func main() { + ctx := context.Background() + + tp := trace.NewTracerProvider() + defer tp.Shutdown(ctx) //nolint:errcheck + otel.SetTracerProvider(tp) + + client, err := braintrust.New(tp, + braintrust.WithProject("go-sdk-examples"), + braintrust.WithBlockingLogin(true), + ) + if err != nil { + log.Fatalf("failed to create Braintrust client: %v", err) + } + + evaluator := braintrust.NewEvaluator[string, string](client) + + task := eval.T(func(ctx context.Context, input string) (string, error) { + tracer := otel.Tracer("trace-scorer-example") + _, span := tracer.Start(ctx, "task-work") + defer span.End() + span.SetAttributes( + attribute.String("span_attributes.type", "custom"), + attribute.String("example.input", input), + ) + return "hello " + input, nil + }) + + traceAwareScorer := eval.NewScorer("trace_aware", func(ctx context.Context, tr eval.TaskResult[string, string]) (eval.Scores, error) { + if tr.Trace == nil { + return eval.Scores{{ + Name: "trace_aware", + Score: 0, + Metadata: map[string]any{ + "error": "trace is nil", + }, + }}, nil + } + + allSpans := tr.Trace.GetSpans(nil) + customSpans := tr.Trace.GetSpans([]string{"custom"}) + thread := tr.Trace.GetThread() + + log.Printf("trace info: spans=%d custom_spans=%d thread=%d", len(allSpans), len(customSpans), len(thread)) + + score := 0.0 + if len(allSpans) > 0 { + score = 1.0 + } + + return eval.Scores{{ + Name: "trace_aware", + Score: score, + Metadata: map[string]any{ + "span_count": len(allSpans), + "custom_span_count": len(customSpans), + "thread_count": len(thread), + }, + }}, nil + }) + + _, err = evaluator.Run(ctx, eval.Opts[string, string]{ + Experiment: "internal-trace-scorer-demo", + Dataset: eval.NewDataset([]eval.Case[string, string]{ + {Input: "world", Expected: "hello world"}, + {Input: "team", Expected: "hello team"}, + }), + Task: task, + Scorers: []eval.Scorer[string, string]{traceAwareScorer}, + }) + if err != nil { + log.Fatalf("eval failed: %v", err) + } + + log.Println("trace-scorer example completed") +} diff --git a/internal/auth/auth_test.go b/internal/auth/auth_test.go index 0d3cca9..dc36415 100644 --- a/internal/auth/auth_test.go +++ b/internal/auth/auth_test.go @@ -353,6 +353,25 @@ func TestSession_APIInfo(t *testing.T) { }) } +func TestSession_API(t *testing.T) { + t.Parallel() + + session := NewTestSession( + "test-key-123", + "org-id", + "org-name", + "https://api.braintrust.dev", + "https://www.braintrust.dev", + "https://www.braintrust.dev", + logger.Discard(), + ) + + apiClient := session.API() + require.NotNil(t, apiClient) + require.NotNil(t, apiClient.Functions()) + require.NotNil(t, apiClient.Objects()) +} + // TestSession_OrgName tests that OrgName() returns org name after login func TestSession_OrgName(t *testing.T) { t.Parallel() diff --git a/internal/auth/session.go b/internal/auth/session.go index 5f7f1f6..7cfbe67 100644 --- a/internal/auth/session.go +++ b/internal/auth/session.go @@ -5,6 +5,7 @@ import ( "fmt" "sync" + "github.com/braintrustdata/braintrust-sdk-go/api" "github.com/braintrustdata/braintrust-sdk-go/internal/https" "github.com/braintrustdata/braintrust-sdk-go/logger" ) @@ -125,6 +126,18 @@ func (s *Session) APIInfo() APIInfo { } } +// API returns an API client configured from the current session. +// It uses APIInfo() so it works before login completes and automatically +// picks up server-provided APIURL once available. +func (s *Session) API() *api.API { + apiInfo := s.APIInfo() + return api.NewClient( + apiInfo.APIKey, + api.WithAPIURL(apiInfo.APIURL), + api.WithLogger(s.logger), + ) +} + func (s *Session) getLoginResult() (bool, *loginResult) { s.mu.RLock() defer s.mu.RUnlock() From 7c1deacbc73add08141daa30754b4acdde29c2a2 Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Wed, 4 Mar 2026 15:58:54 -0800 Subject: [PATCH 5/8] Changes. --- eval/eval.go | 6 +- eval/eval_test.go | 2 +- eval/functions_api.go | 14 +- eval/spans.go | 252 +++++++++ eval/{trace_test.go => spans_test.go} | 28 +- eval/task.go | 40 +- .../TestFunctionsAPI_EndToEnd_MixedTypes.yaml | 480 +++++++++--------- eval/trace.go | 266 ---------- examples/internal/trace-scorer/main.go | 23 +- 9 files changed, 584 insertions(+), 527 deletions(-) create mode 100644 eval/spans.go rename eval/{trace_test.go => spans_test.go} (73%) delete mode 100644 eval/trace.go diff --git a/eval/eval.go b/eval/eval.go index 1ed6066..38c1a5b 100644 --- a/eval/eval.go +++ b/eval/eval.go @@ -440,7 +440,7 @@ func (e *eval[I, R]) runCase(ctx context.Context, span oteltrace.Span, c Case[I, return err } output := taskResult.Output - taskResult.Trace = newEvalTrace( + taskResult.fetcher = newSpanFetcher( e.apiClient, "experiment", e.experimentID, @@ -553,10 +553,6 @@ func (e *eval[I, R]) runScorers(ctx context.Context, taskResult TaskResult[I, R] if err := setJSONAttr(span, "braintrust.span_attributes", scoreSpanAttrs); err != nil { return nil, err } - if taskResult.Trace == nil { - taskResult.Trace = newTrace() - } - var scores []Score var errs []error diff --git a/eval/eval_test.go b/eval/eval_test.go index 68fe2b2..109942d 100644 --- a/eval/eval_test.go +++ b/eval/eval_test.go @@ -307,7 +307,7 @@ func TestEval_Run_TraceRefUsesRootTraceID(t *testing.T) { return testOutput{Result: "output-" + input.Value}, nil }) scorer := NewScorer("thread", func(ctx context.Context, result TaskResult[testInput, testOutput]) (Scores, error) { - _ = result.Trace.GetThread() + _, _ = result.Thread(ctx) return S(1), nil }) diff --git a/eval/functions_api.go b/eval/functions_api.go index b3dd186..bd33deb 100644 --- a/eval/functions_api.go +++ b/eval/functions_api.go @@ -115,9 +115,13 @@ func (f *FunctionsAPI[I, R]) Scorer(ctx context.Context, opts FunctionOpts) (Sco // Create a scorer that invokes the function scorerFunc := func(ctx context.Context, result TaskResult[I, R]) (Scores, error) { - trace := result.Trace - if trace == nil { - trace = newTrace() + spans, err := result.Spans(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get spans: %w", err) + } + thread, err := result.Thread(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get thread: %w", err) } // Build scorer input @@ -126,8 +130,8 @@ func (f *FunctionsAPI[I, R]) Scorer(ctx context.Context, opts FunctionOpts) (Sco "output": result.Output, "expected": result.Expected, "trace": map[string]any{ - "spans": trace.GetSpans(nil), - "thread": trace.GetThread(), + "spans": spans, + "thread": thread, }, } diff --git a/eval/spans.go b/eval/spans.go new file mode 100644 index 0000000..a26dc19 --- /dev/null +++ b/eval/spans.go @@ -0,0 +1,252 @@ +package eval + +import ( + "context" + "encoding/json" + "sync" + + "github.com/braintrustdata/braintrust-sdk-go/api" + functionsapi "github.com/braintrustdata/braintrust-sdk-go/api/functions" + "github.com/braintrustdata/braintrust-sdk-go/api/objects" +) + +// Span represents a single span from a Braintrust trace. +type Span struct { + ID string `json:"id"` + SpanID string `json:"span_id"` + RootSpanID string `json:"root_span_id"` + SpanParents []string `json:"span_parents"` + SpanAttributes map[string]any `json:"span_attributes"` + Input any `json:"input"` + Output any `json:"output"` + Metadata map[string]any `json:"metadata"` +} + +// flushState wraps sync.Once so that spanFetcher (and therefore TaskResult) +// remains safe to copy by value. +type flushState struct { + once sync.Once + err error +} + +// spanFetcher retrieves span and thread data from the Braintrust API. +// It is unexported and attached to TaskResult as a pointer so that +// TaskResult stays copyable by value. +type spanFetcher struct { + apiClient *api.API + objectType string + objectID string + rootSpanID string + flush *flushState + flushFn func() error +} + +func newSpanFetcher( + apiClient *api.API, + objectType string, + objectID string, + rootSpanID string, + ensureSpansFlushed func() error, +) *spanFetcher { + return &spanFetcher{ + apiClient: apiClient, + objectType: objectType, + objectID: objectID, + rootSpanID: rootSpanID, + flush: &flushState{}, + flushFn: ensureSpansFlushed, + } +} + +// Spans returns spans for the provided span types. +func (f *spanFetcher) Spans(ctx context.Context, spanTypes []string) ([]Span, error) { + if f.objectType == "" || f.objectID == "" || f.rootSpanID == "" || f.apiClient == nil { + return nil, nil + } + + if err := f.ensureSpansReady(); err != nil { + return nil, err + } + + return f.fetchSpans(ctx, spanTypes) +} + +// Thread returns thread entries associated with the case. +func (f *spanFetcher) Thread(ctx context.Context) ([]map[string]any, error) { + if f.objectType == "" || f.objectID == "" || f.rootSpanID == "" || f.apiClient == nil { + return nil, nil + } + + if err := f.ensureSpansReady(); err != nil { + return nil, err + } + + return f.fetchThread(ctx) +} + +func (f *spanFetcher) ensureSpansReady() error { + f.flush.once.Do(func() { + if f.flushFn == nil { + return + } + f.flush.err = f.flushFn() + }) + return f.flush.err +} + +func (f *spanFetcher) fetchSpans(ctx context.Context, spanTypes []string) ([]Span, error) { + var all []Span + cursor := "" + + for { + req := objects.FetchParams{ + Limit: 1000, + Filter: buildSpanFilter(f.rootSpanID, spanTypes), + } + if cursor != "" { + req.Cursor = cursor + } + + payload, err := f.apiClient.Objects().Fetch(ctx, f.objectType, f.objectID, req) + if err != nil { + return nil, err + } + + rows := payload.Events + if len(rows) == 0 { + rows = payload.Rows + } + if len(rows) == 0 { + rows = payload.Objects + } + + for _, row := range rows { + if isScorerPurpose(row) { + continue + } + span, err := rowToSpan(row) + if err != nil { + return nil, err + } + all = append(all, span) + } + + if payload.Cursor == "" { + break + } + cursor = payload.Cursor + } + + return all, nil +} + +func (f *spanFetcher) fetchThread(ctx context.Context) ([]map[string]any, error) { + payload, err := f.apiClient.Functions().InvokeGlobal(ctx, functionsapi.InvokeGlobalParams{ + GlobalFunction: "project_default", + FunctionType: "preprocessor", + Mode: "json", + Input: map[string]any{ + "trace_ref": map[string]any{ + "object_type": f.objectType, + "object_id": f.objectID, + "root_span_id": f.rootSpanID, + }, + }, + }) + if err != nil { + return nil, err + } + + values, ok := payload.([]any) + if !ok { + return nil, nil + } + + thread := make([]map[string]any, 0, len(values)) + for _, value := range values { + if item, ok := value.(map[string]any); ok { + thread = append(thread, item) + } + } + return thread, nil +} + +// rowToSpan converts a raw API row into a typed Span via JSON round-trip. +func rowToSpan(row map[string]any) (Span, error) { + b, err := json.Marshal(row) + if err != nil { + return Span{}, err + } + var s Span + if err := json.Unmarshal(b, &s); err != nil { + return Span{}, err + } + return s, nil +} + +func buildSpanFilter(rootSpanID string, spanTypeFilter []string) map[string]any { + children := []map[string]any{ + { + "op": "eq", + "left": map[string]any{ + "op": "ident", + "name": []string{"root_span_id"}, + }, + "right": map[string]any{ + "op": "literal", + "value": rootSpanID, + }, + }, + { + "op": "or", + "children": []map[string]any{ + { + "op": "isnull", + "expr": map[string]any{ + "op": "ident", + "name": []string{"span_attributes", "purpose"}, + }, + }, + { + "op": "ne", + "left": map[string]any{ + "op": "ident", + "name": []string{"span_attributes", "purpose"}, + }, + "right": map[string]any{ + "op": "literal", + "value": "scorer", + }, + }, + }, + }, + } + + if len(spanTypeFilter) > 0 { + children = append(children, map[string]any{ + "op": "in", + "left": map[string]any{ + "op": "ident", + "name": []string{"span_attributes", "type"}, + }, + "right": map[string]any{ + "op": "literal", + "value": spanTypeFilter, + }, + }) + } + + return map[string]any{ + "op": "and", + "children": children, + } +} + +func isScorerPurpose(row map[string]any) bool { + attrs, ok := row["span_attributes"].(map[string]any) + if !ok || attrs == nil { + return false + } + purpose, ok := attrs["purpose"].(string) + return ok && purpose == "scorer" +} diff --git a/eval/trace_test.go b/eval/spans_test.go similarity index 73% rename from eval/trace_test.go rename to eval/spans_test.go index 62d559c..22d7610 100644 --- a/eval/trace_test.go +++ b/eval/spans_test.go @@ -1,6 +1,7 @@ package eval import ( + "context" "encoding/json" "net/http" "net/http/httptest" @@ -13,7 +14,7 @@ import ( "github.com/braintrustdata/braintrust-sdk-go/logger" ) -func TestTrace_GetThread_ReturnsThreadFromPreprocessorInvoke(t *testing.T) { +func TestSpanFetcher_Thread_ReturnsThreadFromPreprocessorInvoke(t *testing.T) { t.Parallel() server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -57,15 +58,17 @@ func TestTrace_GetThread_ReturnsThreadFromPreprocessorInvoke(t *testing.T) { ) apiClient := session.API() - trace := newEvalTrace(apiClient, "experiment", "obj-123", "root-456", func() error { return nil }) + fetcher := newSpanFetcher(apiClient, "experiment", "obj-123", "root-456", func() error { return nil }) - thread := trace.GetThread() + ctx := context.Background() + thread, err := fetcher.Thread(ctx) + require.NoError(t, err) require.Len(t, thread, 2) assert.Equal(t, "system", thread[0]["role"]) assert.Equal(t, "user", thread[1]["role"]) } -func TestTrace_GetThread_ReturnsEmptyForNonArrayOutput(t *testing.T) { +func TestSpanFetcher_Thread_ReturnsNilForNonArrayOutput(t *testing.T) { t.Parallel() server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -86,12 +89,15 @@ func TestTrace_GetThread_ReturnsEmptyForNonArrayOutput(t *testing.T) { ) apiClient := session.API() - trace := newEvalTrace(apiClient, "experiment", "obj-123", "root-456", func() error { return nil }) + fetcher := newSpanFetcher(apiClient, "experiment", "obj-123", "root-456", func() error { return nil }) - assert.Empty(t, trace.GetThread()) + ctx := context.Background() + thread, err := fetcher.Thread(ctx) + require.NoError(t, err) + assert.Nil(t, thread) } -func TestTrace_GetThread_ReturnsEmptyForNullOutput(t *testing.T) { +func TestSpanFetcher_Thread_ReturnsNilForNullOutput(t *testing.T) { t.Parallel() server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -111,6 +117,10 @@ func TestTrace_GetThread_ReturnsEmptyForNullOutput(t *testing.T) { ) apiClient := session.API() - trace := newEvalTrace(apiClient, "experiment", "obj-123", "root-456", func() error { return nil }) - assert.Empty(t, trace.GetThread()) + fetcher := newSpanFetcher(apiClient, "experiment", "obj-123", "root-456", func() error { return nil }) + + ctx := context.Background() + thread, err := fetcher.Thread(ctx) + require.NoError(t, err) + assert.Nil(t, thread) } diff --git a/eval/task.go b/eval/task.go index 2f02dc3..8018ba2 100644 --- a/eval/task.go +++ b/eval/task.go @@ -41,11 +41,49 @@ type TaskResult[I, R any] struct { Expected R // What we expected Output R // What the task actually returned Metadata Metadata // Case metadata - Trace Trace // Eval trace context for scorers // UserData is custom application context from the task. // This field is NOT logged and isn't supported outside the context of the Go SDK. UserData any + + fetcher *spanFetcher // unexported, pointer for nil-check + safe copy +} + +// SpanQueryOpt is a functional option for configuring a Spans query. +type SpanQueryOpt func(*spansQuery) + +type spansQuery struct { + types []string +} + +// WithSpanTypes filters spans by span_attributes.type (e.g. "llm", "function", "custom"). +// Multiple types are OR'd together. Omit to get all spans. +func WithSpanTypes(types ...string) SpanQueryOpt { + return func(q *spansQuery) { + q.types = types + } +} + +// Spans returns spans from the trace. +// Returns nil, nil if no trace data is available (e.g. no API client configured). +func (r TaskResult[I, R]) Spans(ctx context.Context, opts ...SpanQueryOpt) ([]Span, error) { + if r.fetcher == nil { + return nil, nil + } + var q spansQuery + for _, opt := range opts { + opt(&q) + } + return r.fetcher.Spans(ctx, q.types) +} + +// Thread returns thread entries associated with this case's trace. +// Returns nil, nil if no trace data is available (e.g. no API client configured). +func (r TaskResult[I, R]) Thread(ctx context.Context) ([]map[string]any, error) { + if r.fetcher == nil { + return nil, nil + } + return r.fetcher.Thread(ctx) } // T is a convenience function for writing short task functions ([TaskFunc]) that only diff --git a/eval/testdata/cassettes/TestFunctionsAPI_EndToEnd_MixedTypes.yaml b/eval/testdata/cassettes/TestFunctionsAPI_EndToEnd_MixedTypes.yaml index f875cd8..6aed0ed 100644 --- a/eval/testdata/cassettes/TestFunctionsAPI_EndToEnd_MixedTypes.yaml +++ b/eval/testdata/cassettes/TestFunctionsAPI_EndToEnd_MixedTypes.yaml @@ -15,6 +15,8 @@ interactions: body: '{"name":"go-sdk-tests"}' form: {} headers: + Accept: + - application/json Content-Type: - application/json url: https://api.braintrust.dev/v1/project @@ -27,42 +29,42 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"go-sdk-tests","created":"2025-11-04T13:38:56.532Z","deleted_at":null,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","settings":null}' + body: '{"id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"go-sdk-tests","description":null,"created":"2025-11-04T13:38:56.532Z","deleted_at":null,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","settings":null}' headers: Access-Control-Allow-Credentials: - "true" Access-Control-Expose-Headers: - - x-bt-cursor,x-bt-found-existing,x-bt-query-plan + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms Content-Type: - application/json; charset=utf-8 Date: - - Wed, 12 Nov 2025 15:00:25 GMT + - Wed, 04 Mar 2026 23:21:49 GMT Etag: - - W/"eb-poL8yUftdUFahhL/Peyi6gN8N/E" + - W/"fe-rfCZvNlcUhLHZTl4/HHBhOEJFv4" Vary: - Origin, Accept-Encoding Via: - - 1.1 3b0649a8bee506c1d7498462d39e6c44.cloudfront.net (CloudFront), 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 182d3a3dbb6658c964ee75cd45a42242.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - T74ZkESnIAMEPdw= + - ZuK2KHm8oAMEEoA= X-Amz-Cf-Id: - - bq6E0LTeZeS6CjqqKRuVB06w81pRQd3dIkmhwK_p91JliQSGkLJ7rA== + - jWZsokv3ZxS4HPG3cLU9hgdurbSgjs58jJ8FBQUuJT_YiE2TzGeFHg== X-Amz-Cf-Pop: - - JFK50-P5 - - JFK50-P2 + - HIO52-P2 + - HIO52-P4 X-Amzn-Requestid: - - 58587d56-8adf-42cb-9708-ab6b36db4da5 + - 03a6d1c4-4797-499a-b5ab-21e9c397874f X-Amzn-Trace-Id: - - Root=1-6914a109-0a8d601052e6e10c4220a61f;Parent=170ad496f1585e98;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8be8d-352c02c97cdbbc93522c381d;Parent=75e5bd0fc8466f5e;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Found-Existing: - "true" X-Bt-Internal-Trace-Id: - - 6914a109000000001d4b1e1cfdec0f4a + - 69a8be8d00000000360707728f6cc5fb X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 160.601542ms + duration: 391.960958ms - id: 1 request: proto: HTTP/1.1 @@ -76,9 +78,11 @@ interactions: request_uri: "" body: "" form: {} - headers: {} - url: https://api.braintrust.dev/v1/function?limit=1&project_name=go-sdk-tests&slug=TestFunctionsAPI_EndToEnd_MixedTypes-task - method: GET + headers: + Accept: + - application/json + url: https://api.braintrust.dev/api/apikey/login + method: POST response: proto: HTTP/2.0 proto_major: 2 @@ -87,40 +91,40 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"objects":[]}' + body: '{"org_info":[{"id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"matt-test-org","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}' headers: Access-Control-Allow-Credentials: - "true" Access-Control-Expose-Headers: - - x-bt-cursor,x-bt-found-existing,x-bt-query-plan + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms Content-Type: - application/json; charset=utf-8 Date: - - Wed, 12 Nov 2025 15:00:25 GMT + - Wed, 04 Mar 2026 23:21:49 GMT Etag: - - W/"e-xZKibKAiOxxBbzTm2byfFNRkvtA" + - W/"101-EqXzt+vlRFUt5HCzQY7qxp9rZU0" Vary: - Origin, Accept-Encoding Via: - - 1.1 f9aa0e4086fcbefc20f307d96a8e3b44.cloudfront.net (CloudFront), 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 f36cc119cb86b2f70c315ca53fd1b4ee.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - T74ZlGNGIAMERPw= + - ZuK2KEeYoAMEU9Q= X-Amz-Cf-Id: - - T-kQF4EqoQcJNC6XpTwzujkD0Eb_t9OstjxxI5FZrvWIS5sx6I7rLg== + - ZfUiPDuoPPBIMv1lh8whWl5dwQpKWmCT9q4U9KxJ1nm8AybK4qQ8Wg== X-Amz-Cf-Pop: - - JFK50-P5 - - JFK50-P2 + - HIO52-P2 + - HIO52-P4 X-Amzn-Requestid: - - 8457ba24-fcb7-4fe5-9982-3d5668220f05 + - 8a72e6e6-f503-4acd-80ba-5ac9cfbef0a1 X-Amzn-Trace-Id: - - Root=1-6914a109-4ba7da5643c0654b248e24f4;Parent=2c4929839e1a788a;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8be8d-7bf27e082959357234a0565f;Parent=12a03d9b082b8c0b;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 6914a109000000007e1a8d75bcd327d1 + - 69a8be8d000000000b7e01fdb7cb5f1e X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 138.443417ms + duration: 480.561958ms - id: 2 request: proto: HTTP/1.1 @@ -134,9 +138,11 @@ interactions: request_uri: "" body: "" form: {} - headers: {} - url: https://api.braintrust.dev/api/apikey/login - method: POST + headers: + Accept: + - application/json + url: https://api.braintrust.dev/v1/function?limit=1&project_name=go-sdk-tests&slug=TestFunctionsAPI_EndToEnd_MixedTypes-task + method: GET response: proto: HTTP/2.0 proto_major: 2 @@ -145,40 +151,40 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"org_info":[{"id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"matt-test-org","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}' + body: '{"objects":[]}' headers: Access-Control-Allow-Credentials: - "true" Access-Control-Expose-Headers: - - x-bt-cursor,x-bt-found-existing,x-bt-query-plan + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms Content-Type: - application/json; charset=utf-8 Date: - - Wed, 12 Nov 2025 15:00:25 GMT + - Wed, 04 Mar 2026 23:21:49 GMT Etag: - - W/"101-EqXzt+vlRFUt5HCzQY7qxp9rZU0" + - W/"e-xZKibKAiOxxBbzTm2byfFNRkvtA" Vary: - Origin, Accept-Encoding Via: - - 1.1 f8debc28b6c73eb3dc7540e2ac2f0e18.cloudfront.net (CloudFront), 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 89b24af8db05335e68292856e0a53668.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - T74ZkHBJoAMEPBg= + - ZuK2NF1goAMEE5w= X-Amz-Cf-Id: - - NkjLZjEVuk42hwd7EsbR07NSziPr4Mhwy8_YQWIyWDjb3U2N0kLP4A== + - yPc5E6dJ4LYQ8JinP5gqIYri9KygZPM88m50ip0cZEXb25Cc8KPSSw== X-Amz-Cf-Pop: - - JFK50-P5 - - JFK50-P2 + - HIO52-P2 + - HIO52-P4 X-Amzn-Requestid: - - ce1757d6-f991-4c45-94a4-1fc76a7abd4d + - 97bfee2c-f896-4e53-b186-bd91972b299e X-Amzn-Trace-Id: - - Root=1-6914a109-2c0f14125abbdc713a3f7a6f;Parent=4384fe985ff6ac96;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8be8d-538c7ed55fe4df4d4206b759;Parent=5840344d74348928;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 6914a1090000000072cc3183fcea9ee1 + - 69a8be8d0000000066474d8596ffcad6 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 299.309167ms + duration: 232.55375ms - id: 3 request: proto: HTTP/1.1 @@ -193,6 +199,8 @@ interactions: body: '{"project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"E2E Task","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-task","function_data":{"type":"prompt"},"prompt_data":{"options":{"model":"gpt-4o-mini","params":{"max_tokens":100,"response_format":{"type":"json_object"},"temperature":0}},"prompt":{"messages":[{"content":"You answer questions with JSON. Return ONLY the JSON object, no markdown, no code blocks, no backticks.","role":"system"},{"content":"Question: {{input.question}}. Return ONLY JSON like {\"answer\": \"your answer\", \"confidence\": 0.9}. No markdown formatting.","role":"user"}],"type":"chat"}}}' form: {} headers: + Accept: + - application/json Content-Type: - application/json url: https://api.braintrust.dev/v1/function @@ -205,40 +213,40 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-task","name":"E2E Task","function_data":{"type":"prompt"},"prompt_data":{"options":{"model":"gpt-4o-mini","params":{"max_tokens":100,"response_format":{"type":"json_object"},"temperature":0}},"prompt":{"messages":[{"content":"You answer questions with JSON. Return ONLY the JSON object, no markdown, no code blocks, no backticks.","role":"system"},{"content":"Question: {{input.question}}. Return ONLY JSON like {\"answer\": \"your answer\", \"confidence\": 0.9}. No markdown formatting.","role":"user"}],"type":"chat"}},"id":"29072339-6591-46f1-a865-3fcce6c44f40","created":"2025-11-12T15:00:26.063Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_xact_id":"1000196129575022738"}' + body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-task","name":"E2E Task","function_data":{"type":"prompt"},"prompt_data":{"options":{"model":"gpt-4o-mini","params":{"max_tokens":100,"response_format":{"type":"json_object"},"temperature":0}},"prompt":{"messages":[{"content":"You answer questions with JSON. Return ONLY the JSON object, no markdown, no code blocks, no backticks.","role":"system"},{"content":"Question: {{input.question}}. Return ONLY JSON like {\"answer\": \"your answer\", \"confidence\": 0.9}. No markdown formatting.","role":"user"}],"type":"chat"}},"id":"5cee0811-48f0-453a-9103-365d3a3344a8","created":"2026-03-04T23:21:50.043Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_xact_id":"1000196765725398940"}' headers: Access-Control-Allow-Credentials: - "true" Access-Control-Expose-Headers: - - x-bt-cursor,x-bt-found-existing,x-bt-query-plan + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms Content-Type: - application/json; charset=utf-8 Date: - - Wed, 12 Nov 2025 15:00:26 GMT + - Wed, 04 Mar 2026 23:21:50 GMT Etag: - - W/"31a-2sPpzg0Qu9SZPTYx3IyIzx1xF5E" + - W/"31a-em6V5YuFILXI9rS58D+nzORFiA0" Vary: - Origin, Accept-Encoding Via: - - 1.1 98bc8180e0431e8f05afc9802305f1d2.cloudfront.net (CloudFront), 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 1f941fcf288b6d0259a0f708c955afae.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - T74ZnGZkIAMENaQ= + - ZuK2PHhSIAMEVLw= X-Amz-Cf-Id: - - KRoRqECIY27YqIwiuE_LbGUAPaZ6RmkV2S_M7rH11Fwk7C4i3RTIOg== + - ZDDsmz6LjAbzKjQitjfYbfoz3xWwmzTOWevStw2PX_Ruvz-O8YzMgw== X-Amz-Cf-Pop: - - JFK50-P5 - - JFK50-P2 + - HIO52-P2 + - HIO52-P4 X-Amzn-Requestid: - - 2b4bb93b-05d0-43e0-8bba-dd670625e4a8 + - ae1cdbfa-9533-491b-a210-655f70100808 X-Amzn-Trace-Id: - - Root=1-6914a109-6ffe8a575ced2c766fbb325b;Parent=51dd41033bedff9c;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8be8d-5e7ab4084a98fc7e11979895;Parent=68c3a4687841f2ce;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 6914a1090000000005877586dd2100fa + - 69a8be8d00000000139b12966908e47c X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 360.401958ms + duration: 651.189959ms - id: 4 request: proto: HTTP/1.1 @@ -252,7 +260,9 @@ interactions: request_uri: "" body: "" form: {} - headers: {} + headers: + Accept: + - application/json url: https://api.braintrust.dev/v1/function?limit=1&project_name=go-sdk-tests&slug=TestFunctionsAPI_EndToEnd_MixedTypes-scorer method: GET response: @@ -268,35 +278,35 @@ interactions: Access-Control-Allow-Credentials: - "true" Access-Control-Expose-Headers: - - x-bt-cursor,x-bt-found-existing,x-bt-query-plan + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms Content-Type: - application/json; charset=utf-8 Date: - - Wed, 12 Nov 2025 15:00:26 GMT + - Wed, 04 Mar 2026 23:21:50 GMT Etag: - W/"e-xZKibKAiOxxBbzTm2byfFNRkvtA" Vary: - Origin, Accept-Encoding Via: - - 1.1 c4d0da6268789cfda9bb5da1f3f8fc58.cloudfront.net (CloudFront), 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 250b49a977a2df6676d3fbf2508fc16e.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - T74ZqGTdoAMEHhw= + - ZuK2VEELoAMEikA= X-Amz-Cf-Id: - - hEqXcSW2c4EF1D2r7kGMSbF-_IfaxDmv-X6qu3mLW6KzgIGz5sdCEw== + - fmWVBVgyFZvv7kImOOMPmVxrNgP0drC8ezMcv4_WOf_8l-hsoR5qMQ== X-Amz-Cf-Pop: - - JFK50-P5 - - JFK50-P2 + - HIO52-P2 + - HIO52-P4 X-Amzn-Requestid: - - 2a4c0f5e-8d2b-416f-82c5-7032aeecc557 + - 42802556-4bbb-42ab-884c-b0d43e040527 X-Amzn-Trace-Id: - - Root=1-6914a10a-4b7fa5e172b2d71169091733;Parent=7060252e74711618;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8be8e-72e30f915d29afdc00e1ed5d;Parent=24a6846642e872de;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 6914a10a000000007a91e819becd7717 + - 69a8be8e000000006707a464c766dd54 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 171.598958ms + duration: 306.8885ms - id: 5 request: proto: HTTP/1.1 @@ -311,6 +321,8 @@ interactions: body: '{"project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"E2E Scorer","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-scorer","function_type":"scorer","function_data":{"type":"prompt"},"prompt_data":{"options":{"model":"gpt-4o-mini","params":{"temperature":0}},"parser":{"choice_scores":{"correct":1,"incorrect":0},"type":"llm_classifier","use_cot":false},"prompt":{"messages":[{"content":"You are a scorer.","role":"system"},{"content":"Is the output.answer field non-empty? Choose ''correct'' if yes, ''incorrect'' if no.","role":"user"}],"type":"chat"}}}' form: {} headers: + Accept: + - application/json Content-Type: - application/json url: https://api.braintrust.dev/v1/function @@ -323,40 +335,40 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-scorer","name":"E2E Scorer","function_type":"scorer","function_data":{"type":"prompt"},"prompt_data":{"options":{"model":"gpt-4o-mini","params":{"temperature":0}},"parser":{"choice_scores":{"correct":1,"incorrect":0},"type":"llm_classifier","use_cot":false},"prompt":{"messages":[{"content":"You are a scorer.","role":"system"},{"content":"Is the output.answer field non-empty? Choose ''correct'' if yes, ''incorrect'' if no.","role":"user"}],"type":"chat"}},"id":"063f91ca-beb0-42dd-8f39-d34bebea6d98","created":"2025-11-12T15:00:26.612Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_xact_id":"1000196129575023135"}' + body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-scorer","name":"E2E Scorer","function_type":"scorer","function_data":{"type":"prompt"},"prompt_data":{"options":{"model":"gpt-4o-mini","params":{"temperature":0}},"parser":{"choice_scores":{"correct":1,"incorrect":0},"type":"llm_classifier","use_cot":false},"prompt":{"messages":[{"content":"You are a scorer.","role":"system"},{"content":"Is the output.answer field non-empty? Choose ''correct'' if yes, ''incorrect'' if no.","role":"user"}],"type":"chat"}},"id":"3d00adc8-eb0f-4372-813b-6c945725b4e9","created":"2026-03-04T23:21:51.032Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_xact_id":"1000196765725465873"}' headers: Access-Control-Allow-Credentials: - "true" Access-Control-Expose-Headers: - - x-bt-cursor,x-bt-found-existing,x-bt-query-plan + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms Content-Type: - application/json; charset=utf-8 Date: - - Wed, 12 Nov 2025 15:00:26 GMT + - Wed, 04 Mar 2026 23:21:51 GMT Etag: - - W/"2d8-93Sx/tzEYoS276h1cUOf6qrdEvM" + - W/"2d8-awAllBkTnpQ96D7hwSR6xXa0aPQ" Vary: - Origin, Accept-Encoding Via: - - 1.1 b5fe18267507cb61755963d8928a60f4.cloudfront.net (CloudFront), 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 1f941fcf288b6d0259a0f708c955afae.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - T74ZsE7jIAMETeQ= + - ZuK2YFBqIAMECUQ= X-Amz-Cf-Id: - - IaxeYDwGKAKbDCYNco4K7FSWX22C9IQ3j1L85nTsPyWtJo6jTgAETw== + - HvgjPdZvS4v0qMcdgmgwnNtEUCKrMrxCpkg5qYrasgOKaw3cH2ngfQ== X-Amz-Cf-Pop: - - JFK50-P5 - - JFK50-P2 + - HIO52-P2 + - HIO52-P4 X-Amzn-Requestid: - - be90eaa1-31f7-49ea-a5c5-0e3140ad5aa2 + - 5473fc69-f832-40a3-8a3f-0a28618881a0 X-Amzn-Trace-Id: - - Root=1-6914a10a-103b89b5424920f4364c4d58;Parent=61cc9c1bc2f71c31;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8be8e-2898f58a067be1bb75ae4722;Parent=477e6824dc537dd7;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 6914a10a0000000018968c383f8d7194 + - 69a8be8e000000006e58465ac0f2e835 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 442.612292ms + duration: 511.484625ms - id: 6 request: proto: HTTP/1.1 @@ -370,7 +382,9 @@ interactions: request_uri: "" body: "" form: {} - headers: {} + headers: + Accept: + - application/json url: https://api.braintrust.dev/v1/function?limit=1&project_name=go-sdk-tests&slug=TestFunctionsAPI_EndToEnd_MixedTypes-task method: GET response: @@ -381,40 +395,40 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"objects":[{"id":"29072339-6591-46f1-a865-3fcce6c44f40","_xact_id":"1000196129575022738","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","log_id":"p","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"E2E Task","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-task","description":null,"created":"2025-11-12T15:00:26.063Z","prompt_data":{"prompt":{"type":"chat","messages":[{"role":"system","content":"You answer questions with JSON. Return ONLY the JSON object, no markdown, no code blocks, no backticks."},{"role":"user","content":"Question: {{input.question}}. Return ONLY JSON like {\"answer\": \"your answer\", \"confidence\": 0.9}. No markdown formatting."}]},"options":{"model":"gpt-4o-mini","params":{"max_tokens":100,"temperature":0,"response_format":{"type":"json_object"}}}},"tags":null,"metadata":null,"function_type":null,"function_data":{"type":"prompt"},"origin":null,"function_schema":null}]}' + body: '{"objects":[{"id":"5cee0811-48f0-453a-9103-365d3a3344a8","_xact_id":"1000196765725398940","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","log_id":"p","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"E2E Task","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-task","description":null,"created":"2026-03-04T23:21:50.043Z","prompt_data":{"prompt":{"type":"chat","messages":[{"role":"system","content":"You answer questions with JSON. Return ONLY the JSON object, no markdown, no code blocks, no backticks."},{"role":"user","content":"Question: {{input.question}}. Return ONLY JSON like {\"answer\": \"your answer\", \"confidence\": 0.9}. No markdown formatting."}]},"options":{"model":"gpt-4o-mini","params":{"max_tokens":100,"temperature":0,"response_format":{"type":"json_object"}}}},"tags":null,"metadata":null,"function_type":null,"function_data":{"type":"prompt"},"origin":null,"function_schema":null}]}' headers: Access-Control-Allow-Credentials: - "true" Access-Control-Expose-Headers: - - x-bt-cursor,x-bt-found-existing,x-bt-query-plan + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms Content-Type: - application/json; charset=utf-8 Date: - - Wed, 12 Nov 2025 15:00:27 GMT + - Wed, 04 Mar 2026 23:21:51 GMT Etag: - - W/"391-e7g1AM+1gReVj71N32+mVYVwDuE" + - W/"391-RyipEwunzCjsJA5GBlrnPSNhTww" Vary: - Origin, Accept-Encoding Via: - - 1.1 ab734ad5d81cc9d470b6176a05dd968e.cloudfront.net (CloudFront), 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 c6aabec83f5c081149a8843767dacc52.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - T74ZwGwpoAMEtrg= + - ZuK2eGs0IAMERLA= X-Amz-Cf-Id: - - jDGU3joTBIFHc2tfXwVbwZdLpnzfKH-qwfY4wXxkBfP3BrN5_L8dwg== + - ocvFx_HyALBROUSPocHR5JzBGcwJ8X2zH8NTWF-bfENi7rQNPuzpCA== X-Amz-Cf-Pop: - - JFK50-P5 - - JFK50-P2 + - HIO52-P2 + - HIO52-P4 X-Amzn-Requestid: - - ca78451f-3b60-465f-a2a0-0e2afffaf555 + - b660676e-584c-4318-88df-5b348791814c X-Amzn-Trace-Id: - - Root=1-6914a10a-43f1a2ed147c8e9b2aa29e00;Parent=39b94375a3b02854;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8be8f-1b1c6e3d65fa34933d9a81b3;Parent=7687df9a6a5a0bf5;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 6914a10a00000000398c274d02a41e0b + - 69a8be8f0000000024155419a75408d6 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 516.321125ms + duration: 514.074125ms - id: 7 request: proto: HTTP/1.1 @@ -428,7 +442,9 @@ interactions: request_uri: "" body: "" form: {} - headers: {} + headers: + Accept: + - application/json url: https://api.braintrust.dev/v1/function?limit=1&project_name=go-sdk-tests&slug=TestFunctionsAPI_EndToEnd_MixedTypes-scorer method: GET response: @@ -439,40 +455,40 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"objects":[{"id":"063f91ca-beb0-42dd-8f39-d34bebea6d98","_xact_id":"1000196129575023135","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","log_id":"p","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"E2E Scorer","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-scorer","description":null,"created":"2025-11-12T15:00:26.612Z","prompt_data":{"parser":{"type":"llm_classifier","use_cot":false,"choice_scores":{"correct":1,"incorrect":0}},"prompt":{"type":"chat","messages":[{"role":"system","content":"You are a scorer."},{"role":"user","content":"Is the output.answer field non-empty? Choose ''correct'' if yes, ''incorrect'' if no."}]},"options":{"model":"gpt-4o-mini","params":{"temperature":0}}},"tags":null,"metadata":null,"function_type":"scorer","function_data":{"type":"prompt"},"origin":null,"function_schema":null}]}' + body: '{"objects":[{"id":"3d00adc8-eb0f-4372-813b-6c945725b4e9","_xact_id":"1000196765725465873","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","log_id":"p","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"E2E Scorer","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-scorer","description":null,"created":"2026-03-04T23:21:51.032Z","prompt_data":{"parser":{"type":"llm_classifier","use_cot":false,"choice_scores":{"correct":1,"incorrect":0}},"prompt":{"type":"chat","messages":[{"role":"system","content":"You are a scorer."},{"role":"user","content":"Is the output.answer field non-empty? Choose ''correct'' if yes, ''incorrect'' if no."}]},"options":{"model":"gpt-4o-mini","params":{"temperature":0}}},"tags":null,"metadata":null,"function_type":"scorer","function_data":{"type":"prompt"},"origin":null,"function_schema":null}]}' headers: Access-Control-Allow-Credentials: - "true" Access-Control-Expose-Headers: - - x-bt-cursor,x-bt-found-existing,x-bt-query-plan + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms Content-Type: - application/json; charset=utf-8 Date: - - Wed, 12 Nov 2025 15:00:27 GMT + - Wed, 04 Mar 2026 23:21:52 GMT Etag: - - W/"33a-5OmlqaB1nSGvD+Kl+SBxtCIA+PI" + - W/"33a-wcYHtnEK7jjqqUDbTMEaqim3OVU" Vary: - Origin, Accept-Encoding Via: - - 1.1 8a9cdb228e33f8d52a4b42c56ca26590.cloudfront.net (CloudFront), 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 7f51caabae8141bdcde4283a42be2a56.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - T74Z2FYeoAMEp7Q= + - ZuK2jHh4IAMEmsg= X-Amz-Cf-Id: - - RxOZRJE8PdEN4BGUY1SbmOPZe7F3HXX0nwKynn2HNtKJfdzTrnWg0A== + - kCtNzh1u-zQQUd85o8cxrtitwd31_l-04R7qKx_qcbvzMAUTsMzW3A== X-Amz-Cf-Pop: - - JFK50-P5 - - JFK50-P2 + - HIO52-P2 + - HIO52-P4 X-Amzn-Requestid: - - 2e590e88-06e4-4ccd-9979-1b7f88d769e6 + - 040f1e98-c179-4cbf-9eae-90ab73e8979c X-Amzn-Trace-Id: - - Root=1-6914a10b-31ac38366013571e2face76b;Parent=243c6f347d1cef8a;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8be8f-4d8bbfe516dbae1d72cbbacc;Parent=7b07aa7b52a3bf33;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 6914a10b000000003ad3068347d3e5ce + - 69a8be8f000000004cdebb19a9953ee8 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 503.660916ms + duration: 509.136834ms - id: 8 request: proto: HTTP/1.1 @@ -487,6 +503,8 @@ interactions: body: '{"name":"go-sdk-tests"}' form: {} headers: + Accept: + - application/json Content-Type: - application/json url: https://api.braintrust.dev/v1/project @@ -499,42 +517,42 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"go-sdk-tests","created":"2025-11-04T13:38:56.532Z","deleted_at":null,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","settings":null}' + body: '{"id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"go-sdk-tests","description":null,"created":"2025-11-04T13:38:56.532Z","deleted_at":null,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","settings":null}' headers: Access-Control-Allow-Credentials: - "true" Access-Control-Expose-Headers: - - x-bt-cursor,x-bt-found-existing,x-bt-query-plan + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms Content-Type: - application/json; charset=utf-8 Date: - - Wed, 12 Nov 2025 15:00:28 GMT + - Wed, 04 Mar 2026 23:21:52 GMT Etag: - - W/"eb-poL8yUftdUFahhL/Peyi6gN8N/E" + - W/"fe-rfCZvNlcUhLHZTl4/HHBhOEJFv4" Vary: - Origin, Accept-Encoding Via: - - 1.1 98bc8180e0431e8f05afc9802305f1d2.cloudfront.net (CloudFront), 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 26c9d43b9089eee93b9e4ad4293d02c0.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - T74Z7EH5IAMEbjQ= + - ZuK2oHJ7IAMES0Q= X-Amz-Cf-Id: - - 1fiCjYEQxXwVKKgmJsnE_vuRpmqWc8CvN9okoPxHPb_pZorSKpvc9w== + - 7L7ybREqakzXlS3hXhcvQZRM_l3geyu5aHIJiO55jhTUqIArDuVYjg== X-Amz-Cf-Pop: - - JFK50-P5 - - JFK50-P2 + - HIO52-P2 + - HIO52-P4 X-Amzn-Requestid: - - f27c857a-5f1d-40c7-b587-cfd807827e3e + - 8b1f87d6-ac18-4b1f-bf25-8c7c3450353b X-Amzn-Trace-Id: - - Root=1-6914a10b-4eb6ea5f3c632efd109ca197;Parent=0b8bae4768799a79;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8be90-4041adc21bb30fe3386bb63a;Parent=0d63d3a7dfd2f620;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Found-Existing: - "true" X-Bt-Internal-Trace-Id: - - 6914a10b00000000577c5d0dc7cfe5ab + - 69a8be900000000017695c353fc4e50e X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 422.061917ms + duration: 277.811125ms - id: 9 request: proto: HTTP/1.1 @@ -549,6 +567,8 @@ interactions: body: '{"project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"test-e2e-exp","ensure_new":true}' form: {} headers: + Accept: + - application/json Content-Type: - application/json url: https://api.braintrust.dev/v1/experiment @@ -561,40 +581,40 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"id":"fb3f7727-c60f-48e4-ad30-2a2befaf85f8","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"test-e2e-exp-20b5b9c1","description":null,"created":"2025-11-12T15:00:28.439Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"public":false,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","metadata":null,"tags":null}' + body: '{"id":"18d8ffff-1a3b-4ba4-b714-b9fd7e86b508","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"test-e2e-exp-83e080a6","description":null,"created":"2026-03-04T23:21:52.805Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"public":false,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","metadata":null,"tags":null}' headers: Access-Control-Allow-Credentials: - "true" Access-Control-Expose-Headers: - - x-bt-cursor,x-bt-found-existing,x-bt-query-plan + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms Content-Type: - application/json; charset=utf-8 Date: - - Wed, 12 Nov 2025 15:00:28 GMT + - Wed, 04 Mar 2026 23:21:52 GMT Etag: - - W/"17f-L7Es4ehBG+vrDKjUmJB1DsxDWq4" + - W/"17f-N3C0LL/w6WWLkMx7P4tk/BG8aes" Vary: - Origin, Accept-Encoding Via: - - 1.1 68f2eed06d7ecb02b863cacb0da2fc28.cloudfront.net (CloudFront), 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 b7b9fc5331efc8b070db0bf27b36820e.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - T74Z_Ez-oAMET0w= + - ZuK2rFFjoAMEXHw= X-Amz-Cf-Id: - - UGhQcSE7IuKz2_tj0tf_rcYpZ0QDw3lvdiQ1N0RPkDxdWNaztSC20w== + - Ulb4MEaqIEnEk1wCNcMJf3OrWhDy17HC14lVzDFaD4Ini2r2sHdpPQ== X-Amz-Cf-Pop: - - JFK50-P5 - - JFK50-P2 + - HIO52-P2 + - HIO52-P4 X-Amzn-Requestid: - - 16de2b29-27de-476c-a409-4e06087ba8a7 + - 7f1c978b-acad-4b96-8924-d485a1016180 X-Amzn-Trace-Id: - - Root=1-6914a10c-5ed59e9b120cb3ac32c7627d;Parent=762376db910c8dc6;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8be90-576e008041d837662508e60d;Parent=30ac92dc3364863d;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 6914a10c000000006f99ae55efce4c51 + - 69a8be900000000010d5c5728824f6ba X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 191.883375ms + duration: 224.047208ms - id: 10 request: proto: HTTP/1.1 @@ -609,9 +629,11 @@ interactions: body: '{"input":{"question":"What is the capital of France?"}}' form: {} headers: + Accept: + - application/json Content-Type: - application/json - url: https://api.braintrust.dev/v1/function/29072339-6591-46f1-a865-3fcce6c44f40/invoke + url: https://api.braintrust.dev/v1/function/5cee0811-48f0-453a-9103-365d3a3344a8/invoke method: POST response: proto: HTTP/2.0 @@ -630,37 +652,37 @@ interactions: Cf-Cache-Status: - DYNAMIC Cf-Ray: - - 99d6e6329a156905-IAD + - 9d749eaf7f87f282-IAD Content-Type: - application/json Date: - - Wed, 12 Nov 2025 15:00:32 GMT + - Wed, 04 Mar 2026 23:21:54 GMT Openai-Organization: - braintrust-data Openai-Processing-Ms: - - "2381" + - "110" Openai-Project: - proj_wMRY6YpEiASXMxPIIcI9nQRi Openai-Version: - "2020-10-01" Set-Cookie: - - _cfuvid=NE7TD4ro6b4V.0U1ROSVW.E3d2j30m6rR3dBRFbIKko-1762959631814-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + - __cf_bm=LO0KPhzriFQNW2.yAhWD7HrQMpwnbPtZtv2JqJG1VF0-1772666513.8365242-1.0.1.1-EFUlSlC6kIDUiVxO8vtWoegTf.KpSML0.lD_2Nh_7PdO_C43HS767rcggvHtsuzPl09rQgT2YmA_3HpCAyeymuwnwudvUGkPamfEy3wXleyuBineGjVLkM7TrPpmzZgW; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 04 Mar 2026 23:51:54 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Via: - - 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Cf-Id: - - E67Jp_C0n039UteDcOg19wp3zDtbgeMBYC3e-6FyVvyv1ziwymE2fg== + - RWaHncyuNRmWj45UhmZpYHMeFWkbRgpWQzZ8Sr175K0GDGw0Fn1MLQ== X-Amz-Cf-Pop: - - JFK50-P2 + - HIO52-P4 X-Amzn-Requestid: - - 71841cc7-fce1-4e39-b685-8634737e6ecb + - 5d9d5311-6ea0-429c-b180-16ddacd5f96f X-Amzn-Trace-Id: - - Root=1-6914a10c-06a5a96a7bb5516b6a74c8fe;Parent=767671b96874afb8;Sampled=0;Lineage=1:8be8f50d:0 + - Root=1-69a8be91-3499dbfb5ceabb7f763b1f30;Parent=1b79cc62450e043c;Sampled=0;Lineage=1:8be8f50d:0 X-Bt-Cached: - MISS X-Bt-Function-Creds-Cached: - - HIT + - MISS X-Bt-Function-Meta-Cached: - MISS X-Bt-Used-Endpoint: @@ -669,8 +691,6 @@ interactions: - Miss from cloudfront X-Content-Type-Options: - nosniff - X-Envoy-Upstream-Service-Time: - - "2448" X-Openai-Proxy-Wasm: - v0.1 X-Ratelimit-Limit-Requests: @@ -686,27 +706,29 @@ interactions: X-Ratelimit-Reset-Tokens: - 0s X-Request-Id: - - req_e27adf5c9f95417f9db6e20812f8fe97 + - req_e36d9b8d401c453d9cd56fe5b477ca26 status: 200 OK code: 200 - duration: 4.09908225s + duration: 1.602242666s - id: 11 request: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 156 + content_length: 193 transfer_encoding: [] trailer: {} host: api.braintrust.dev remote_addr: "" request_uri: "" - body: '{"input":{"expected":{"answer":"Paris","confidence":1},"input":{"question":"What is the capital of France?"},"output":{"answer":"Paris","confidence":0.95}}}' + body: '{"input":{"expected":{"answer":"Paris","confidence":1},"input":{"question":"What is the capital of France?"},"output":{"answer":"Paris","confidence":0.95},"trace":{"spans":null,"thread":null}}}' form: {} headers: + Accept: + - application/json Content-Type: - application/json - url: https://api.braintrust.dev/v1/function/063f91ca-beb0-42dd-8f39-d34bebea6d98/invoke + url: https://api.braintrust.dev/v1/function/3d00adc8-eb0f-4372-813b-6c945725b4e9/invoke method: POST response: proto: HTTP/2.0 @@ -725,33 +747,33 @@ interactions: Cf-Cache-Status: - DYNAMIC Cf-Ray: - - 99d6e64aea71ca3a-IAD + - 9d749ebca8d2d6f4-IAD Content-Type: - application/json Date: - - Wed, 12 Nov 2025 15:00:33 GMT + - Wed, 04 Mar 2026 23:21:56 GMT Openai-Organization: - braintrust-data Openai-Processing-Ms: - - "454" + - "123" Openai-Project: - proj_wMRY6YpEiASXMxPIIcI9nQRi Openai-Version: - "2020-10-01" Set-Cookie: - - _cfuvid=zyE3rObDpfQwUEIP.8RY1X_2uU.vlZAnGI6rvvJAzMU-1762959633692-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + - __cf_bm=nwKAkxilJMNyMbsGSeHHvgExBVDhmXpHmOQtFuxbSX4-1772666515.9499893-1.0.1.1-Bm2sPQHXASrKshvzzuhQF9VZ8DqHVGoiRG5Uwsba3soQHTrFTmeID0Df3SXejLkCx9X2kHZbkZ5hW5Ob_UpAb39cUYzvmm_8gKWM3PgE02q5UXEI1rj5cFBv_Xtvh.uU; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 04 Mar 2026 23:51:56 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Via: - - 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Cf-Id: - - hDZ0MULEIZgOhWsrhJXQvId1nQvhVDfjNNQtsvsv6FzVrLY_bxyVDQ== + - ZoY-vQI297qhQaE7LXu3u1jfB9KZOJ3E0IwtRqgJefxioJJYMtn2jg== X-Amz-Cf-Pop: - - JFK50-P2 + - HIO52-P4 X-Amzn-Requestid: - - 44068fc3-d8a6-4c5a-b017-fdcdeffcbf9e + - 5c13920b-dd1c-4dbd-b4d9-aac889fd4785 X-Amzn-Trace-Id: - - Root=1-6914a110-362efa2713f9ddaa5b185c18;Parent=0f8f33e95e5897f9;Sampled=0;Lineage=1:8be8f50d:0 + - Root=1-69a8be93-609c95050c1ce1b870f9c683;Parent=5f54dcba4942d245;Sampled=0;Lineage=1:8be8f50d:0 X-Bt-Cached: - MISS X-Bt-Function-Creds-Cached: @@ -764,8 +786,6 @@ interactions: - Miss from cloudfront X-Content-Type-Options: - nosniff - X-Envoy-Upstream-Service-Time: - - "492" X-Openai-Proxy-Wasm: - v0.1 X-Ratelimit-Limit-Requests: @@ -781,10 +801,10 @@ interactions: X-Ratelimit-Reset-Tokens: - 0s X-Request-Id: - - req_ad28be45002847c28e945e4ee86af84e + - req_ca7960f0b73541b6a1199f7a214bd13d status: 200 OK code: 200 - duration: 1.349148834s + duration: 1.048096458s - id: 12 request: proto: HTTP/1.1 @@ -799,9 +819,11 @@ interactions: body: '{"input":{"question":"What is 2+2?"}}' form: {} headers: + Accept: + - application/json Content-Type: - application/json - url: https://api.braintrust.dev/v1/function/29072339-6591-46f1-a865-3fcce6c44f40/invoke + url: https://api.braintrust.dev/v1/function/5cee0811-48f0-453a-9103-365d3a3344a8/invoke method: POST response: proto: HTTP/2.0 @@ -820,33 +842,33 @@ interactions: Cf-Cache-Status: - DYNAMIC Cf-Ray: - - 99d6e650aa80ca3a-IAD + - 9d749ec0282f81f7-IAD Content-Type: - application/json Date: - - Wed, 12 Nov 2025 15:00:35 GMT + - Wed, 04 Mar 2026 23:21:57 GMT Openai-Organization: - braintrust-data Openai-Processing-Ms: - - "344" + - "179" Openai-Project: - proj_wMRY6YpEiASXMxPIIcI9nQRi Openai-Version: - "2020-10-01" Set-Cookie: - - _cfuvid=JxkU8ffxPlbTo.Y1dlirjKsZNWdrgMMaXLvJdQDT8dA-1762959634507-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + - __cf_bm=F8eKRNSUyQ6oplk5F8bLOEeuei2OnG84BP4rPx95kEQ-1772666516.5062973-1.0.1.1-Y4FcjyIVXYTzAI.otuQDnN5diTj__wU4PVl9BAV5fRhkC2buoh7iaHxSkfPsH2B5ie7GA.2LQpxAlu4_pZbCzXTgdKC.x3gSccgt8Lq1aY5gKlzWCWkheNKwkEtECk3r; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 04 Mar 2026 23:51:56 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Via: - - 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Cf-Id: - - iD530VSXpcaD5KeIWZi43EMVLosCgVwFvA7fR0l4joV5Z_og0pVqoQ== + - m4CiDSSPMI8aL9m6JHz82XPPMdrQDMMszFqKcEvlGESemf6nDrbbig== X-Amz-Cf-Pop: - - JFK50-P2 + - HIO52-P4 X-Amzn-Requestid: - - ff69756f-81db-43aa-a27a-d59ba9af4ed2 + - eb232cba-57d6-4268-8e0b-54521e1285c3 X-Amzn-Trace-Id: - - Root=1-6914a111-4884681227fd631c76e27993;Parent=4ecb9c56aa491a66;Sampled=0;Lineage=1:8be8f50d:0 + - Root=1-69a8be94-7fbc34524c79007824074629;Parent=75ebbb6dbdbc8e3b;Sampled=0;Lineage=1:8be8f50d:0 X-Bt-Cached: - MISS X-Bt-Function-Creds-Cached: @@ -859,8 +881,6 @@ interactions: - Miss from cloudfront X-Content-Type-Options: - nosniff - X-Envoy-Upstream-Service-Time: - - "410" X-Openai-Proxy-Wasm: - v0.1 X-Ratelimit-Limit-Requests: @@ -876,27 +896,29 @@ interactions: X-Ratelimit-Reset-Tokens: - 0s X-Request-Id: - - req_10a6daacecde47339f732766d7c53f51 + - req_5185fd4c69894c918aa1b5cbae7f3aba status: 200 OK code: 200 - duration: 1.036700792s + duration: 792.825583ms - id: 13 request: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 129 + content_length: 166 transfer_encoding: [] trailer: {} host: api.braintrust.dev remote_addr: "" request_uri: "" - body: '{"input":{"expected":{"answer":"4","confidence":1},"input":{"question":"What is 2+2?"},"output":{"answer":"4","confidence":0.9}}}' + body: '{"input":{"expected":{"answer":"4","confidence":1},"input":{"question":"What is 2+2?"},"output":{"answer":"4","confidence":0.9},"trace":{"spans":null,"thread":null}}}' form: {} headers: + Accept: + - application/json Content-Type: - application/json - url: https://api.braintrust.dev/v1/function/063f91ca-beb0-42dd-8f39-d34bebea6d98/invoke + url: https://api.braintrust.dev/v1/function/3d00adc8-eb0f-4372-813b-6c945725b4e9/invoke method: POST response: proto: HTTP/2.0 @@ -915,33 +937,33 @@ interactions: Cf-Cache-Status: - DYNAMIC Cf-Ray: - - 99d6e64aea71ca3a-IAD + - 9d749ebca8d2d6f4-IAD Content-Type: - application/json Date: - - Wed, 12 Nov 2025 15:00:35 GMT + - Wed, 04 Mar 2026 23:21:57 GMT Openai-Organization: - braintrust-data Openai-Processing-Ms: - - "454" + - "123" Openai-Project: - proj_wMRY6YpEiASXMxPIIcI9nQRi Openai-Version: - "2020-10-01" Set-Cookie: - - _cfuvid=zyE3rObDpfQwUEIP.8RY1X_2uU.vlZAnGI6rvvJAzMU-1762959633692-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + - __cf_bm=nwKAkxilJMNyMbsGSeHHvgExBVDhmXpHmOQtFuxbSX4-1772666515.9499893-1.0.1.1-Bm2sPQHXASrKshvzzuhQF9VZ8DqHVGoiRG5Uwsba3soQHTrFTmeID0Df3SXejLkCx9X2kHZbkZ5hW5Ob_UpAb39cUYzvmm_8gKWM3PgE02q5UXEI1rj5cFBv_Xtvh.uU; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 04 Mar 2026 23:51:56 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Via: - - 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Cf-Id: - - yPg3IbXp-0FmfY1gkHSnxO3A8E-IFTUpNe2gCE_L92yNuOAHeU-lpg== + - fo-cSb9v46Gv5OBWZxUhQFjeWpvZYjNL7kXJmw8qHYThSjfyDLk6UA== X-Amz-Cf-Pop: - - JFK50-P2 + - HIO52-P4 X-Amzn-Requestid: - - cc91aca2-62bd-4001-a951-7682b978f1c3 + - 3e2ef1a4-7aa8-468e-b1d6-72e64baad31b X-Amzn-Trace-Id: - - Root=1-6914a113-364b3c292d0830400435f7a7;Parent=3694160fa7c7879b;Sampled=0;Lineage=1:8be8f50d:0 + - Root=1-69a8be95-47030e95164a33e54d201933;Parent=2b922a1e88f3d143;Sampled=0;Lineage=1:8be8f50d:0 X-Bt-Cached: - HIT X-Bt-Function-Creds-Cached: @@ -954,8 +976,6 @@ interactions: - Miss from cloudfront X-Content-Type-Options: - nosniff - X-Envoy-Upstream-Service-Time: - - "492" X-Openai-Proxy-Wasm: - v0.1 X-Ratelimit-Limit-Requests: @@ -971,10 +991,10 @@ interactions: X-Ratelimit-Reset-Tokens: - 0s X-Request-Id: - - req_ad28be45002847c28e945e4ee86af84e + - req_ca7960f0b73541b6a1199f7a214bd13d status: 200 OK code: 200 - duration: 78.633375ms + duration: 287.115334ms - id: 14 request: proto: HTTP/1.1 @@ -988,8 +1008,10 @@ interactions: request_uri: "" body: "" form: {} - headers: {} - url: https://api.braintrust.dev/v1/function/063f91ca-beb0-42dd-8f39-d34bebea6d98 + headers: + Accept: + - application/json + url: https://api.braintrust.dev/v1/function/3d00adc8-eb0f-4372-813b-6c945725b4e9 method: DELETE response: proto: HTTP/2.0 @@ -999,40 +1021,40 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-scorer","id":"063f91ca-beb0-42dd-8f39-d34bebea6d98","created":"2025-11-12T15:00:35.359Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_object_delete":true,"_xact_id":"1000196129575620471"}' + body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-scorer","id":"3d00adc8-eb0f-4372-813b-6c945725b4e9","created":"2026-03-04T23:21:58.096Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_object_delete":true,"_xact_id":"1000196765725936784"}' headers: Access-Control-Allow-Credentials: - "true" Access-Control-Expose-Headers: - - x-bt-cursor,x-bt-found-existing,x-bt-query-plan + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms Content-Type: - application/json; charset=utf-8 Date: - - Wed, 12 Nov 2025 15:00:35 GMT + - Wed, 04 Mar 2026 23:21:58 GMT Etag: - - W/"12f-GRQ22KEM0pPNmGtvOoGOw13UPS0" + - W/"12f-62LNmt00v79FTwwQkGZZHscWEUY" Vary: - Origin, Accept-Encoding Via: - - 1.1 241db89625f6ef70a00b0e19e0cfc332.cloudfront.net (CloudFront), 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 a235f5b7b15b9c8025af96fb0081dd58.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - T74bDFEvIAMEOmw= + - ZuK3fEw7oAMEK8w= X-Amz-Cf-Id: - - jNxtKHFW2FjfMdYjHCocTRn60nBxn8tcePbnrrNr3wZQhaIwJHyeSg== + - oBRJX3_ixcRTUd09ROH_0vbf3zHntR80qfWi8mHuh2Pjic3Y-F5Szw== X-Amz-Cf-Pop: - - JFK50-P5 - - JFK50-P2 + - HIO52-P2 + - HIO52-P4 X-Amzn-Requestid: - - be21dd98-89a8-43b0-aa04-5d559e20c2af + - bd730343-0524-416d-9fbd-aff3324f50ae X-Amzn-Trace-Id: - - Root=1-6914a113-3325bc4f5643cc641ab3f0f7;Parent=0d732b51a32cb479;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8be95-5b2ce8473e88bc2d13746c0d;Parent=48f58108cf7adaae;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 6914a113000000004489d85ebd9e608b + - 69a8be96000000004a3d019d789a1ee5 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 444.713459ms + duration: 407.340458ms - id: 15 request: proto: HTTP/1.1 @@ -1046,8 +1068,10 @@ interactions: request_uri: "" body: "" form: {} - headers: {} - url: https://api.braintrust.dev/v1/function/29072339-6591-46f1-a865-3fcce6c44f40 + headers: + Accept: + - application/json + url: https://api.braintrust.dev/v1/function/5cee0811-48f0-453a-9103-365d3a3344a8 method: DELETE response: proto: HTTP/2.0 @@ -1057,37 +1081,37 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-task","id":"29072339-6591-46f1-a865-3fcce6c44f40","created":"2025-11-12T15:00:35.677Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_object_delete":true,"_xact_id":"1000196129575620769"}' + body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-task","id":"5cee0811-48f0-453a-9103-365d3a3344a8","created":"2026-03-04T23:21:58.518Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_object_delete":true,"_xact_id":"1000196765725937232"}' headers: Access-Control-Allow-Credentials: - "true" Access-Control-Expose-Headers: - - x-bt-cursor,x-bt-found-existing,x-bt-query-plan + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms Content-Type: - application/json; charset=utf-8 Date: - - Wed, 12 Nov 2025 15:00:35 GMT + - Wed, 04 Mar 2026 23:21:58 GMT Etag: - - W/"12d-2FirTT1wW4vV/jHPRbLESlnaqbk" + - W/"12d-TuqS0Cq/1Ac5KXhG/HAE290GWVg" Vary: - Origin, Accept-Encoding Via: - - 1.1 95708ab75ec6181aa75086df530332d6.cloudfront.net (CloudFront), 1.1 f458ab1245bb4f257969c1da8e708f88.cloudfront.net (CloudFront) + - 1.1 4e7012bff211fc1604763d0935533d32.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - T74bHEb8oAMEAXQ= + - ZuK3kEuKIAMEikA= X-Amz-Cf-Id: - - Q0lt3Z8AyXxzsTadS3pWziJIyiM-64121IeiC67dXQILQbwbC3Es6g== + - eoDLcs5b3_s9QheqGINw1_IOiN-rB1KBpRhmhMVTAWlfNqPVkjF4gg== X-Amz-Cf-Pop: - - JFK50-P5 - - JFK50-P2 + - HIO52-P2 + - HIO52-P4 X-Amzn-Requestid: - - d55ab3a0-ceca-40c4-b4b5-2acc03f8f424 + - fe176c1b-8a42-4559-9ef4-6f036ad9330f X-Amzn-Trace-Id: - - Root=1-6914a113-4fa9083f7568fc8b6d9d309e;Parent=70453b5d4c7330f1;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8be96-05bf6d0c2ac2df910d310753;Parent=77dff1cb3a95ec19;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 6914a1130000000050092a3a6c82a87b + - 69a8be9600000000584f4df6018f99fd X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 374.398208ms + duration: 481.053292ms diff --git a/eval/trace.go b/eval/trace.go deleted file mode 100644 index 0775429..0000000 --- a/eval/trace.go +++ /dev/null @@ -1,266 +0,0 @@ -package eval - -import ( - "context" - "sync" - - "github.com/braintrustdata/braintrust-sdk-go/api" - functionsapi "github.com/braintrustdata/braintrust-sdk-go/api/functions" - "github.com/braintrustdata/braintrust-sdk-go/api/objects" -) - -// JSONObject represents a JSON object for trace payloads. -type JSONObject = map[string]any - -// Trace provides access to trace data for scorers. -type Trace interface { - // GetSpans returns spans for the provided span types. - GetSpans(spanTypes []string) []JSONObject - // GetThread returns thread entries associated with the case. - GetThread() []JSONObject -} - -type noopTrace struct{} - -func newTrace() Trace { - return noopTrace{} -} - -func (t noopTrace) GetSpans(spanTypes []string) []JSONObject { - return []JSONObject{} -} - -func (t noopTrace) GetThread() []JSONObject { - return []JSONObject{} -} - -type traceImpl struct { - objectType string - objectID string - rootSpanID string - - apiClient *api.API - ensureSpansFlushed func() error - - flushOnce sync.Once - flushErr error -} - -func newEvalTrace( - apiClient *api.API, - objectType string, - objectID string, - rootSpanID string, - ensureSpansFlushed func() error, -) Trace { - return &traceImpl{ - objectType: objectType, - objectID: objectID, - rootSpanID: rootSpanID, - apiClient: apiClient, - ensureSpansFlushed: ensureSpansFlushed, - } -} - -func (t *traceImpl) GetSpans(spanTypes []string) []JSONObject { - if t.objectType == "" || t.objectID == "" || t.rootSpanID == "" || t.apiClient == nil { - return []JSONObject{} - } - - if err := t.ensureSpansReady(); err != nil { - return []JSONObject{} - } - - spans, err := t.fetchSpans(spanTypes) - if err != nil { - return []JSONObject{} - } - return spans -} - -func (t *traceImpl) GetThread() []JSONObject { - if t.objectType == "" || t.objectID == "" || t.rootSpanID == "" || t.apiClient == nil { - return []JSONObject{} - } - - if err := t.ensureSpansReady(); err != nil { - return []JSONObject{} - } - - thread, err := t.fetchThread() - if err != nil { - return []JSONObject{} - } - return thread -} - -func (t *traceImpl) ensureSpansReady() error { - t.flushOnce.Do(func() { - if t.ensureSpansFlushed == nil { - return - } - t.flushErr = t.ensureSpansFlushed() - }) - return t.flushErr -} - -func (t *traceImpl) fetchSpans(spanTypes []string) ([]JSONObject, error) { - var all []JSONObject - cursor := "" - - for { - req := objects.FetchParams{ - Limit: 1000, - Filter: buildSpanFilter(t.rootSpanID, spanTypes), - } - if cursor != "" { - req.Cursor = cursor - } - - payload, err := t.apiClient.Objects().Fetch(context.Background(), t.objectType, t.objectID, req) - if err != nil { - return nil, err - } - - rows := payload.Events - if len(rows) == 0 { - rows = payload.Rows - } - if len(rows) == 0 { - rows = payload.Objects - } - - for _, row := range rows { - if isScorerPurpose(row) { - continue - } - all = append(all, projectSpanRow(row)) - } - - if payload.Cursor == "" { - break - } - cursor = payload.Cursor - } - - return all, nil -} - -func (t *traceImpl) fetchThread() ([]JSONObject, error) { - payload, err := t.apiClient.Functions().InvokeGlobal(context.Background(), functionsapi.InvokeGlobalParams{ - GlobalFunction: "project_default", - FunctionType: "preprocessor", - Mode: "json", - Input: map[string]any{ - "trace_ref": map[string]any{ - "object_type": t.objectType, - "object_id": t.objectID, - "root_span_id": t.rootSpanID, - }, - }, - }) - if err != nil { - return nil, err - } - - values, ok := payload.([]any) - if !ok { - return []JSONObject{}, nil - } - - thread := make([]JSONObject, 0, len(values)) - for _, value := range values { - if item, ok := value.(map[string]any); ok { - thread = append(thread, item) - } - } - return thread, nil -} - -func buildSpanFilter(rootSpanID string, spanTypeFilter []string) JSONObject { - children := []JSONObject{ - { - "op": "eq", - "left": JSONObject{ - "op": "ident", - "name": []string{"root_span_id"}, - }, - "right": JSONObject{ - "op": "literal", - "value": rootSpanID, - }, - }, - { - "op": "or", - "children": []JSONObject{ - { - "op": "isnull", - "expr": JSONObject{ - "op": "ident", - "name": []string{"span_attributes", "purpose"}, - }, - }, - { - "op": "ne", - "left": JSONObject{ - "op": "ident", - "name": []string{"span_attributes", "purpose"}, - }, - "right": JSONObject{ - "op": "literal", - "value": "scorer", - }, - }, - }, - }, - } - - if len(spanTypeFilter) > 0 { - children = append(children, JSONObject{ - "op": "in", - "left": JSONObject{ - "op": "ident", - "name": []string{"span_attributes", "type"}, - }, - "right": JSONObject{ - "op": "literal", - "value": spanTypeFilter, - }, - }) - } - - return JSONObject{ - "op": "and", - "children": children, - } -} - -func isScorerPurpose(row JSONObject) bool { - attrs, ok := row["span_attributes"].(map[string]any) - if !ok || attrs == nil { - return false - } - purpose, ok := attrs["purpose"].(string) - return ok && purpose == "scorer" -} - -func projectSpanRow(row JSONObject) JSONObject { - out := JSONObject{} - for _, key := range []string{ - "input", - "output", - "metadata", - "span_id", - "span_parents", - "span_attributes", - "id", - "_xact_id", - "_pagination_key", - "root_span_id", - } { - if value, ok := row[key]; ok { - out[key] = value - } - } - return out -} diff --git a/examples/internal/trace-scorer/main.go b/examples/internal/trace-scorer/main.go index 9e06ce7..62304ad 100644 --- a/examples/internal/trace-scorer/main.go +++ b/examples/internal/trace-scorer/main.go @@ -41,19 +41,18 @@ func main() { }) traceAwareScorer := eval.NewScorer("trace_aware", func(ctx context.Context, tr eval.TaskResult[string, string]) (eval.Scores, error) { - if tr.Trace == nil { - return eval.Scores{{ - Name: "trace_aware", - Score: 0, - Metadata: map[string]any{ - "error": "trace is nil", - }, - }}, nil + allSpans, err := tr.Spans(ctx) + if err != nil { + return nil, err + } + customSpans, err := tr.Spans(ctx, eval.WithSpanTypes("custom")) + if err != nil { + return nil, err + } + thread, err := tr.Thread(ctx) + if err != nil { + return nil, err } - - allSpans := tr.Trace.GetSpans(nil) - customSpans := tr.Trace.GetSpans([]string{"custom"}) - thread := tr.Trace.GetThread() log.Printf("trace info: spans=%d custom_spans=%d thread=%d", len(allSpans), len(customSpans), len(thread)) From 60d4f16c88cb1e9bc89c494acccbae416b1eec88 Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Wed, 4 Mar 2026 16:21:02 -0800 Subject: [PATCH 6/8] more --- api/functions/functions.go | 12 +- api/functions/functions_test.go | 84 ---- api/objects/objects_test.go | 72 ++-- .../TestObjects_Fetch_Integration.yaml | 381 ++++++++++++++++++ eval/spans_test.go | 156 +++---- .../cassettes/TestSpans_Integration.yaml | 253 ++++++++++++ 6 files changed, 732 insertions(+), 226 deletions(-) create mode 100644 api/objects/testdata/cassettes/TestObjects_Fetch_Integration.yaml create mode 100644 eval/testdata/cassettes/TestSpans_Integration.yaml diff --git a/api/functions/functions.go b/api/functions/functions.go index 2ded42e..9a171c2 100644 --- a/api/functions/functions.go +++ b/api/functions/functions.go @@ -3,7 +3,6 @@ package functions import ( "context" "encoding/json" - "errors" "fmt" "io" @@ -112,16 +111,7 @@ func (a *API) InvokeGlobal(ctx context.Context, req InvokeGlobalParams) (any, er return nil, fmt.Errorf("global function is required") } - out, err := a.invokePath(ctx, "/function/invoke", req) - if err == nil { - return out, nil - } - - var httpErr *https.HTTPError - if errors.As(err, &httpErr) && httpErr.StatusCode == 404 { - return a.invokePath(ctx, "/v1/function/invoke", req) - } - return nil, err + return a.invokePath(ctx, "/function/invoke", req) } // Delete deletes a function by ID. diff --git a/api/functions/functions_test.go b/api/functions/functions_test.go index 636f9dd..867a04c 100644 --- a/api/functions/functions_test.go +++ b/api/functions/functions_test.go @@ -2,9 +2,6 @@ package functions import ( "context" - "encoding/json" - "net/http" - "net/http/httptest" "testing" "github.com/stretchr/testify/assert" @@ -323,87 +320,6 @@ func TestFunctions_Invoke_Validation(t *testing.T) { assert.Contains(t, err.Error(), "required") } -func TestFunctions_InvokeGlobal_PostsExpectedPayload(t *testing.T) { - t.Parallel() - - ctx := context.Background() - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - require.Equal(t, http.MethodPost, r.Method) - require.Equal(t, "/function/invoke", r.URL.Path) - assert.Equal(t, "application/json", r.Header.Get("Accept")) - - var body map[string]any - require.NoError(t, json.NewDecoder(r.Body).Decode(&body)) - assert.Equal(t, "project_default", body["global_function"]) - assert.Equal(t, "preprocessor", body["function_type"]) - assert.Equal(t, "json", body["mode"]) - - input, ok := body["input"].(map[string]any) - require.True(t, ok) - assert.Equal(t, "abc", input["x"]) - - require.NoError(t, json.NewEncoder(w).Encode(map[string]any{ - "output": []map[string]any{ - {"role": "system", "content": "hello"}, - }, - })) - })) - defer server.Close() - - client := New(https.NewClient("test-key", server.URL, logger.Discard())) - - output, err := client.InvokeGlobal(ctx, InvokeGlobalParams{ - GlobalFunction: "project_default", - FunctionType: "preprocessor", - Mode: "json", - Input: map[string]any{"x": "abc"}, - }) - require.NoError(t, err) - - values, ok := output.([]any) - require.True(t, ok) - require.Len(t, values, 1) - - first, ok := values[0].(map[string]any) - require.True(t, ok) - assert.Equal(t, "system", first["role"]) -} - -func TestFunctions_InvokeGlobal_FallbackToV1On404(t *testing.T) { - t.Parallel() - - ctx := context.Background() - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - switch r.URL.Path { - case "/function/invoke": - http.NotFound(w, r) - case "/v1/function/invoke": - require.NoError(t, json.NewEncoder(w).Encode(map[string]any{ - "output": []map[string]any{ - {"role": "user", "content": "fallback"}, - }, - })) - default: - t.Fatalf("unexpected path: %s", r.URL.Path) - } - })) - defer server.Close() - - client := New(https.NewClient("test-key", server.URL, logger.Discard())) - - output, err := client.InvokeGlobal(ctx, InvokeGlobalParams{ - GlobalFunction: "project_default", - FunctionType: "preprocessor", - Mode: "json", - Input: map[string]any{"x": "abc"}, - }) - require.NoError(t, err) - - values, ok := output.([]any) - require.True(t, ok) - require.Len(t, values, 1) -} - func TestFunctions_InvokeGlobal_Validation(t *testing.T) { t.Parallel() diff --git a/api/objects/objects_test.go b/api/objects/objects_test.go index b31a52f..0c0c327 100644 --- a/api/objects/objects_test.go +++ b/api/objects/objects_test.go @@ -2,52 +2,64 @@ package objects import ( "context" - "encoding/json" - "net/http" - "net/http/httptest" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/braintrustdata/braintrust-sdk-go/api/datasets" + "github.com/braintrustdata/braintrust-sdk-go/api/projects" "github.com/braintrustdata/braintrust-sdk-go/internal/https" + "github.com/braintrustdata/braintrust-sdk-go/internal/vcr" "github.com/braintrustdata/braintrust-sdk-go/logger" ) -func TestObjects_Fetch_PostsExpectedRequest(t *testing.T) { - t.Parallel() +const integrationTestProject = "go-sdk-tests" - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - require.Equal(t, http.MethodPost, r.Method) - require.Equal(t, "/v1/experiment/exp-123/fetch", r.URL.Path) +func TestObjects_Fetch_Integration(t *testing.T) { + t.Parallel() - var body map[string]any - require.NoError(t, json.NewDecoder(r.Body).Decode(&body)) - assert.Equal(t, float64(1000), body["limit"]) + ctx := context.Background() + client := vcr.GetHTTPSClient(t) + api := New(client) - filter, ok := body["filter"].(map[string]any) - require.True(t, ok) - assert.Equal(t, "and", filter["op"]) + // Create a project and dataset with events + projectsAPI := projects.New(client) + project, err := projectsAPI.Create(ctx, projects.CreateParams{Name: integrationTestProject}) + require.NoError(t, err) - require.NoError(t, json.NewEncoder(w).Encode(FetchResponse{ - Events: []map[string]any{{"id": "row-1"}}, - Cursor: "next", - })) - })) - defer server.Close() + datasetsAPI := datasets.New(client) + dataset, err := datasetsAPI.Create(ctx, datasets.CreateParams{ + ProjectID: project.ID, + Name: "test-objects-fetch", + }) + require.NoError(t, err) + defer func() { _ = datasetsAPI.Delete(ctx, dataset.ID) }() - api := New(https.NewClient("test-key", server.URL, logger.Discard())) - resp, err := api.Fetch(context.Background(), "experiment", "exp-123", FetchParams{ - Limit: 1000, - Filter: map[string]any{ - "op": "and", - }, + err = datasetsAPI.InsertEvents(ctx, dataset.ID, []datasets.Event{ + {Input: map[string]any{"q": "1"}, Expected: map[string]any{"a": "1"}}, + {Input: map[string]any{"q": "2"}, Expected: map[string]any{"a": "2"}}, }) require.NoError(t, err) - require.NotNil(t, resp) - require.Len(t, resp.Events, 1) - assert.Equal(t, "row-1", resp.Events[0]["id"]) - assert.Equal(t, "next", resp.Cursor) + + // Fetch via the generic objects API (retry for eventual consistency) + var rows []map[string]any + for i := 0; i < 3; i++ { + resp, err := api.Fetch(ctx, "dataset", dataset.ID, FetchParams{Limit: 10}) + require.NoError(t, err) + require.NotNil(t, resp) + + rows = resp.Events + if len(rows) == 0 { + rows = resp.Rows + } + if len(rows) >= 2 { + break + } + time.Sleep(500 * time.Millisecond) + } + assert.GreaterOrEqual(t, len(rows), 2) } func TestObjects_Fetch_Validation(t *testing.T) { diff --git a/api/objects/testdata/cassettes/TestObjects_Fetch_Integration.yaml b/api/objects/testdata/cassettes/TestObjects_Fetch_Integration.yaml new file mode 100644 index 0000000..55977bd --- /dev/null +++ b/api/objects/testdata/cassettes/TestObjects_Fetch_Integration.yaml @@ -0,0 +1,381 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 23 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"name":"go-sdk-tests"}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/v1/project + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"go-sdk-tests","description":null,"created":"2025-11-04T13:38:56.532Z","deleted_at":null,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","settings":null}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json; charset=utf-8 + Date: + - Thu, 05 Mar 2026 00:05:43 GMT + Etag: + - W/"fe-rfCZvNlcUhLHZTl4/HHBhOEJFv4" + Vary: + - Origin, Accept-Encoding + Via: + - 1.1 4894bef31db1c311602a51393339af0a.cloudfront.net (CloudFront), 1.1 9b38ff4b39c6c0a269c601916dab060e.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuRRxGLyIAMEQQg= + X-Amz-Cf-Id: + - NtBwyeB08Y-ItpW1HDvPMWbMOmv_FBRYMy9xHjSReqVWlDXpRyIUNA== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - 159f6093-569b-4354-b9dc-73ef6bb9f39f + X-Amzn-Trace-Id: + - Root=1-69a8c8d7-5d22036c0ec1651c690bcc27;Parent=2fa26f4153aa1552;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Found-Existing: + - "true" + X-Bt-Internal-Trace-Id: + - 69a8c8d70000000068ada928481af99a + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 309.069292ms + - id: 1 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 81 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"test-objects-fetch"}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/v1/dataset + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"id":"42c8e29a-cb66-48bb-b096-231ef67ee693","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"test-objects-fetch","description":null,"created":"2026-03-05T00:05:44.048Z","deleted_at":null,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","metadata":null,"url_slug":"test-objects-fetch"}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json; charset=utf-8 + Date: + - Thu, 05 Mar 2026 00:05:44 GMT + Etag: + - W/"128-9dZLrIF+s7Enz6YOFwnL3vpsTXc" + Vary: + - Origin, Accept-Encoding + Via: + - 1.1 ac695892d6ed07904483819bdb88134e.cloudfront.net (CloudFront), 1.1 9b38ff4b39c6c0a269c601916dab060e.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuRRzHd4IAMEacw= + X-Amz-Cf-Id: + - Yrzhrg7VhMkScb55u7DCB1oLDIS-xb1wik8zT-oZUACQn9uvHDg8Qw== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - a4f176f0-a12a-4c08-b0f2-731d6e00b343 + X-Amzn-Trace-Id: + - Root=1-69a8c8d7-2c52d7f4730e93202c195117;Parent=6becba8bd3da90cf;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Internal-Trace-Id: + - 69a8c8d700000000199c4daf38897fc2 + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 207.960541ms + - id: 2 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 94 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"events":[{"input":{"q":"1"},"expected":{"a":"1"}},{"input":{"q":"2"},"expected":{"a":"2"}}]}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/v1/dataset/42c8e29a-cb66-48bb-b096-231ef67ee693/insert + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"row_ids":["12cbf61f-e2ca-4230-89c6-2160b78849c1","575f3273-9fdd-4f49-800f-a547cb47b56f"]}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json; charset=utf-8 + Date: + - Thu, 05 Mar 2026 00:05:44 GMT + Etag: + - W/"5b-gNVplGTUVPDxEk/AfzY8tvQvPOI" + Vary: + - Origin, Accept-Encoding + Via: + - 1.1 1f941fcf288b6d0259a0f708c955afae.cloudfront.net (CloudFront), 1.1 9b38ff4b39c6c0a269c601916dab060e.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuRR1Ha5oAMEB0w= + X-Amz-Cf-Id: + - yeLfCHQO4arThyvdAJEQrDHVijcnfh_QaxhDlMqG_0HDrgKrAMCvdg== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - 30c48abb-b9ca-4a2b-bc52-9e7c5880a6a7 + X-Amzn-Trace-Id: + - Root=1-69a8c8d8-76d732a45576b7ef455759c6;Parent=7b066d8767d21069;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Internal-Trace-Id: + - 69a8c8d8000000001d21567b603fc11f + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 487.507666ms + - id: 3 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 12 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"limit":10}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/v1/dataset/42c8e29a-cb66-48bb-b096-231ef67ee693/fetch + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"events":[]}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json + Date: + - Thu, 05 Mar 2026 00:05:54 GMT + Vary: + - Origin + Via: + - 1.1 f9cbfbc3568832d017c09dbd4649932c.cloudfront.net (CloudFront), 1.1 9b38ff4b39c6c0a269c601916dab060e.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuRR6E2ZoAMEB3A= + X-Amz-Cf-Id: + - 7kZYUDCQBe6m-b8nSMUDwsmt57_U-ic9Jjk4VzM69j0RtP6YcbXwQA== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - 6c5b3f9f-f095-4f4d-b21d-92880ca539e2 + X-Amzn-Trace-Id: + - Root=1-69a8c8d8-13d31e21678d56ae2609f81b;Parent=5995c5e98fce254b;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Api-Duration-Ms: + - "10021" + X-Bt-Brainstore-Duration-Ms: + - "10014" + X-Bt-Internal-Trace-Id: + - 69a8c8d80000000071fdc7c63e3f47c0 + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 10.149685667s + - id: 4 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 12 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"limit":10}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/v1/dataset/42c8e29a-cb66-48bb-b096-231ef67ee693/fetch + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"events":[{"_pagination_key":"p07613556004171874305","_xact_id":"1000196765898043957","audit_data":[{"_xact_id":"1000196765898043957","audit_data":{"action":"upsert"},"metadata":{},"source":"api"}],"classifications":null,"comments":null,"created":"2026-03-05T00:05:44.185Z","dataset_id":"42c8e29a-cb66-48bb-b096-231ef67ee693","expected":{"a":"2"},"facets":null,"id":"575f3273-9fdd-4f49-800f-a547cb47b56f","input":{"q":"2"},"is_root":true,"metadata":null,"origin":null,"project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","root_span_id":"1f320e46-2283-4b4e-9cad-cc67913655fb","span_id":"1f320e46-2283-4b4e-9cad-cc67913655fb","tags":null},{"_pagination_key":"p07613556004171874304","_xact_id":"1000196765898043957","audit_data":[{"_xact_id":"1000196765898043957","audit_data":{"action":"upsert"},"metadata":{},"source":"api"}],"classifications":null,"comments":null,"created":"2026-03-05T00:05:44.185Z","dataset_id":"42c8e29a-cb66-48bb-b096-231ef67ee693","expected":{"a":"1"},"facets":null,"id":"12cbf61f-e2ca-4230-89c6-2160b78849c1","input":{"q":"1"},"is_root":true,"metadata":null,"origin":null,"project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","root_span_id":"413c27c2-037c-4d73-8683-a141624db7ce","span_id":"413c27c2-037c-4d73-8683-a141624db7ce","tags":null}],"cursor":"aajI2PI1AAA"}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json + Date: + - Thu, 05 Mar 2026 00:05:55 GMT + Vary: + - Origin + Via: + - 1.1 f9cbfbc3568832d017c09dbd4649932c.cloudfront.net (CloudFront), 1.1 9b38ff4b39c6c0a269c601916dab060e.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuRTlGI9oAMEVLw= + X-Amz-Cf-Id: + - 3Yq-Do-eXArDGVLEJTgMjkZ7C4ENHC7ZndH91GAKFx9Rz9Ccxjk_aA== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - 57a8aa34-5e33-4702-894e-89d84cf58b5d + X-Amzn-Trace-Id: + - Root=1-69a8c8e3-5acc56e165bdccc2735a345b;Parent=23befe53b999f942;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Api-Duration-Ms: + - "182" + X-Bt-Brainstore-Duration-Ms: + - "176" + X-Bt-Cursor: + - aajI2PI1AAA + X-Bt-Internal-Trace-Id: + - 69a8c8e300000000180e18ff0d85357f + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 322.868542ms + - id: 5 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + Accept: + - application/json + url: https://api.braintrust.dev/v1/dataset/42c8e29a-cb66-48bb-b096-231ef67ee693 + method: DELETE + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"id":"42c8e29a-cb66-48bb-b096-231ef67ee693","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"test-objects-fetch","description":null,"created":"2026-03-05T00:05:44.048Z","deleted_at":"2026-03-05T00:05:55.772Z","user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","metadata":null,"url_slug":"test-objects-fetch"}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json; charset=utf-8 + Date: + - Thu, 05 Mar 2026 00:05:55 GMT + Etag: + - W/"13e-pm9nW/A6zwttzgo7ca8oGurjkb8" + Vary: + - Origin, Accept-Encoding + Via: + - 1.1 e2ad8d56b8dbdb69144113ad1c008e02.cloudfront.net (CloudFront), 1.1 9b38ff4b39c6c0a269c601916dab060e.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuRToEcPoAMEvDg= + X-Amz-Cf-Id: + - jBBVVlYMA19Tav5xEVdiVUcpnX_lq4G05piS3jyflZGAW3pX5B9vOw== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - a50493f3-8dca-4e87-b882-823fe0023540 + X-Amzn-Trace-Id: + - Root=1-69a8c8e3-565cf3126a0d7e2e07594ff7;Parent=6f9cc76e0c721e0a;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Internal-Trace-Id: + - 69a8c8e3000000003a48e11d1467a996 + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 289.345792ms diff --git a/eval/spans_test.go b/eval/spans_test.go index 22d7610..57105ed 100644 --- a/eval/spans_test.go +++ b/eval/spans_test.go @@ -2,125 +2,79 @@ package eval import ( "context" - "encoding/json" - "net/http" - "net/http/httptest" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/sdk/trace" - "github.com/braintrustdata/braintrust-sdk-go/internal/auth" - "github.com/braintrustdata/braintrust-sdk-go/logger" + "github.com/braintrustdata/braintrust-sdk-go/api/projects" ) -func TestSpanFetcher_Thread_ReturnsThreadFromPreprocessorInvoke(t *testing.T) { +func TestSpans_Integration(t *testing.T) { + session, apiClient := setupIntegrationTest(t) t.Parallel() - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - require.Equal(t, http.MethodPost, r.Method) - require.Equal(t, "/function/invoke", r.URL.Path) - assert.Equal(t, "application/json", r.Header.Get("Accept")) - - var body map[string]any - require.NoError(t, json.NewDecoder(r.Body).Decode(&body)) - - assert.Equal(t, "project_default", body["global_function"]) - assert.Equal(t, "preprocessor", body["function_type"]) - assert.Equal(t, "json", body["mode"]) - - input, ok := body["input"].(map[string]any) - require.True(t, ok) - - traceRef, ok := input["trace_ref"].(map[string]any) - require.True(t, ok) - assert.Equal(t, "experiment", traceRef["object_type"]) - assert.Equal(t, "obj-123", traceRef["object_id"]) - assert.Equal(t, "root-456", traceRef["root_span_id"]) - - require.NoError(t, json.NewEncoder(w).Encode(map[string]any{ - "output": []map[string]any{ - {"role": "system", "content": "hello"}, - {"role": "user", "content": "hi"}, - }, - })) - })) - defer server.Close() - - session := auth.NewTestSession( - "test-key", - "org-id", - "org-name", - server.URL, - server.URL, - server.URL, - logger.Discard(), - ) - apiClient := session.API() - - fetcher := newSpanFetcher(apiClient, "experiment", "obj-123", "root-456", func() error { return nil }) - ctx := context.Background() - thread, err := fetcher.Thread(ctx) - require.NoError(t, err) - require.Len(t, thread, 2) - assert.Equal(t, "system", thread[0]["role"]) - assert.Equal(t, "user", thread[1]["role"]) -} -func TestSpanFetcher_Thread_ReturnsNilForNonArrayOutput(t *testing.T) { - t.Parallel() + _, err := apiClient.Projects().Create(ctx, projects.CreateParams{Name: integrationTestProject}) + require.NoError(t, err) - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - require.NoError(t, json.NewEncoder(w).Encode(map[string]any{ - "output": map[string]any{"not": "an array"}, - })) - })) - defer server.Close() - - session := auth.NewTestSession( - "test-key", - "org-id", - "org-name", - server.URL, - server.URL, - server.URL, - logger.Discard(), - ) - apiClient := session.API() - - fetcher := newSpanFetcher(apiClient, "experiment", "obj-123", "root-456", func() error { return nil }) + tp := trace.NewTracerProvider() + defer func() { _ = tp.Shutdown(ctx) }() + + evaluator := NewEvaluator[string, string](session, tp, apiClient, integrationTestProject) + result, err := evaluator.Run(ctx, Opts[string, string]{ + Experiment: "test-spans", + Dataset: NewDataset([]Case[string, string]{ + {Input: "hello", Expected: "hello"}, + }), + Task: T(func(ctx context.Context, input string) (string, error) { + return input, nil + }), + Scorers: []Scorer[string, string]{ + NewScorer("spans-test", func(ctx context.Context, tr TaskResult[string, string]) (Scores, error) { + // Fetch all spans + spans, err := tr.Spans(ctx) + require.NoError(t, err) + + // Fetch thread (will be nil for simple task, but should not error) + thread, err := tr.Thread(ctx) + require.NoError(t, err) + + t.Logf("spans=%d thread=%d", len(spans), len(thread)) + + // Filter by nonexistent type — should return empty + filtered, err := tr.Spans(ctx, WithSpanTypes("nonexistent")) + require.NoError(t, err) + assert.Empty(t, filtered) + + return S(1.0), nil + }), + }, + Quiet: true, + }) - ctx := context.Background() - thread, err := fetcher.Thread(ctx) require.NoError(t, err) - assert.Nil(t, thread) + require.NotNil(t, result) } -func TestSpanFetcher_Thread_ReturnsNilForNullOutput(t *testing.T) { +func TestSpans_NilFetcher(t *testing.T) { t.Parallel() - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - _, err := w.Write([]byte("null")) - require.NoError(t, err) - })) - defer server.Close() - - session := auth.NewTestSession( - "test-key", - "org-id", - "org-name", - server.URL, - server.URL, - server.URL, - logger.Discard(), - ) - apiClient := session.API() - - fetcher := newSpanFetcher(apiClient, "experiment", "obj-123", "root-456", func() error { return nil }) - ctx := context.Background() - thread, err := fetcher.Thread(ctx) - require.NoError(t, err) + + // TaskResult with no fetcher should return nil, nil + tr := TaskResult[string, string]{ + Input: "hello", + Output: "hello", + } + + spans, err := tr.Spans(ctx) + assert.NoError(t, err) + assert.Nil(t, spans) + + thread, err := tr.Thread(ctx) + assert.NoError(t, err) assert.Nil(t, thread) } diff --git a/eval/testdata/cassettes/TestSpans_Integration.yaml b/eval/testdata/cassettes/TestSpans_Integration.yaml new file mode 100644 index 0000000..d2e58e6 --- /dev/null +++ b/eval/testdata/cassettes/TestSpans_Integration.yaml @@ -0,0 +1,253 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 23 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"name":"go-sdk-tests"}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/v1/project + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"go-sdk-tests","description":null,"created":"2025-11-04T13:38:56.532Z","deleted_at":null,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","settings":null}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json; charset=utf-8 + Date: + - Thu, 05 Mar 2026 00:10:01 GMT + Etag: + - W/"fe-rfCZvNlcUhLHZTl4/HHBhOEJFv4" + Vary: + - Origin, Accept-Encoding + Via: + - 1.1 4e7012bff211fc1604763d0935533d32.cloudfront.net (CloudFront), 1.1 3caf9df4ca497afd40efb87f8957a7fa.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuR6FH-7IAMEMKQ= + X-Amz-Cf-Id: + - g07TENfJU2EYn1m69IOORXaRQbMrYoDBF3eeL76e33l_OTdg-6TuvQ== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - 028ea526-fd6c-4e3c-9a74-abc85da09560 + X-Amzn-Trace-Id: + - Root=1-69a8c9d9-30b886d50dade00f511cba07;Parent=411bf1e494ceef4e;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Found-Existing: + - "true" + X-Bt-Internal-Trace-Id: + - 69a8c9d90000000071cafbe183512841 + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 356.208959ms + - id: 1 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: "" + form: {} + headers: + Accept: + - application/json + url: https://api.braintrust.dev/api/apikey/login + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"org_info":[{"id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"matt-test-org","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json; charset=utf-8 + Date: + - Thu, 05 Mar 2026 00:10:01 GMT + Etag: + - W/"101-EqXzt+vlRFUt5HCzQY7qxp9rZU0" + Vary: + - Origin, Accept-Encoding + Via: + - 1.1 c6aabec83f5c081149a8843767dacc52.cloudfront.net (CloudFront), 1.1 3caf9df4ca497afd40efb87f8957a7fa.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuR6FE_soAMEF0w= + X-Amz-Cf-Id: + - VEFgHkf2_HoKU1tVnkwK0vGt1NQrakZCCA0yCYWMa0qU0H-CvKnZtQ== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - 267d9a25-2507-4505-a0f9-7fb063adf47d + X-Amzn-Trace-Id: + - Root=1-69a8c9d9-1e3ca805294cae2011c85b13;Parent=6477d7efce5ea41c;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Internal-Trace-Id: + - 69a8c9d900000000128f46ab7b71df04 + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 405.929209ms + - id: 2 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 23 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"name":"go-sdk-tests"}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/v1/project + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"go-sdk-tests","description":null,"created":"2025-11-04T13:38:56.532Z","deleted_at":null,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","settings":null}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json; charset=utf-8 + Date: + - Thu, 05 Mar 2026 00:10:02 GMT + Etag: + - W/"fe-rfCZvNlcUhLHZTl4/HHBhOEJFv4" + Vary: + - Origin, Accept-Encoding + Via: + - 1.1 eb6e5a827e45274130b33c12b0d48aaa.cloudfront.net (CloudFront), 1.1 3caf9df4ca497afd40efb87f8957a7fa.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuR6JEejIAMEacw= + X-Amz-Cf-Id: + - KE7N82w4HOuOeCQ5P2AwfOa0atQ881eJDFp61p5rb6tjJmFJyVSmjg== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - e6d13ede-981d-446c-aee8-b0872c5bc1e9 + X-Amzn-Trace-Id: + - Root=1-69a8c9da-3984a653291e44866d249af8;Parent=4a6ed661a29837c4;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Found-Existing: + - "true" + X-Bt-Internal-Trace-Id: + - 69a8c9da0000000019146f2c246507f0 + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 353.668041ms + - id: 3 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 91 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"test-spans","ensure_new":true}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/v1/experiment + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"id":"bcd37563-913d-4192-8df4-ad21f7789cc1","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"test-spans","description":null,"created":"2026-03-05T00:10:02.544Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"public":false,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","metadata":null,"tags":null}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json; charset=utf-8 + Date: + - Thu, 05 Mar 2026 00:10:02 GMT + Etag: + - W/"174-TF3v5bwhAKfv+utVChBaVQS1WcM" + Vary: + - Origin, Accept-Encoding + Via: + - 1.1 d220e3f3d93439a8c69225156c6ae800.cloudfront.net (CloudFront), 1.1 3caf9df4ca497afd40efb87f8957a7fa.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuR6MEnaIAMERow= + X-Amz-Cf-Id: + - AF-BhtVgA2KfHNhg62eKvo0iscKUINaY2wwfvVs0UxF7J48r0MVURg== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - e4809472-0925-442d-9e5b-4e42db5eafeb + X-Amzn-Trace-Id: + - Root=1-69a8c9da-1d9d3d407c1400fa4a320d9a;Parent=0aacc90c97a54f6b;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Internal-Trace-Id: + - 69a8c9da0000000028661bb250455f79 + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 383.825417ms From ab108a5dc2e239dc32e05958e5ff76b1b9d22191 Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Wed, 4 Mar 2026 16:53:19 -0800 Subject: [PATCH 7/8] fix test api client --- eval/eval.go | 12 +- eval/eval_test.go | 5 + .../TestFunctionsAPI_EndToEnd_MixedTypes.yaml | 598 ++++++++++++------ .../cassettes/TestSpans_Integration.yaml | 249 +++++++- 4 files changed, 642 insertions(+), 222 deletions(-) diff --git a/eval/eval.go b/eval/eval.go index 38c1a5b..1c3b6c2 100644 --- a/eval/eval.go +++ b/eval/eval.go @@ -246,9 +246,11 @@ type nextCase[I, R any] struct { // newEval creates a new eval executor from concrete parameters (low-level constructor). // This is the shared code path used by both newEvalOpts (production) and testNewEval (tests). +// FIXME: we shouldn't pass session and API() — collapse into a single dependency. func newEval[I, R any]( s *auth.Session, tracer oteltrace.Tracer, + apiClient *api.API, experimentID string, experimentName string, projectID string, @@ -260,11 +262,6 @@ func newEval[I, R any]( parallelism int, quiet bool, ) *eval[I, R] { - var traceAPI *api.API - if s != nil { - traceAPI = s.API() - } - // Build parent span option parent := bttrace.NewParent(bttrace.ParentTypeExperimentID, experimentID) startSpanOpt := oteltrace.WithAttributes(parent.Attr()) @@ -289,7 +286,7 @@ func newEval[I, R any]( datasetID: datasetID, task: task, scorers: scorers, - apiClient: traceAPI, + apiClient: apiClient, tracer: tracer, startSpanOpt: startSpanOpt, ensureFlush: ensureFlush, @@ -323,6 +320,7 @@ func newEvalOpts[I, R any](ctx context.Context, s *auth.Session, tp *trace.Trace return newEval( s, tracer, + apiClient, exp.ID, exp.Name, projectID, @@ -739,6 +737,7 @@ func minInt(a, b int) int { func testNewEval[I, R any]( s *auth.Session, tracer oteltrace.Tracer, + apiClient *api.API, experimentID string, experimentName string, projectID string, @@ -752,6 +751,7 @@ func testNewEval[I, R any]( return newEval( s, tracer, + apiClient, experimentID, experimentName, projectID, diff --git a/eval/eval_test.go b/eval/eval_test.go index 109942d..fe20c13 100644 --- a/eval/eval_test.go +++ b/eval/eval_test.go @@ -14,6 +14,7 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" + "github.com/braintrustdata/braintrust-sdk-go/api" "github.com/braintrustdata/braintrust-sdk-go/internal/auth" "github.com/braintrustdata/braintrust-sdk-go/internal/oteltest" "github.com/braintrustdata/braintrust-sdk-go/internal/tests" @@ -52,6 +53,7 @@ func newUnitTestEval[I, R any](t *testing.T, dataset Dataset[I, R], task TaskFun e := testNewEval( session, tracer, + nil, // no apiClient for unit tests "exp-12345678", // fake experiment ID "test-experiment", // fake experiment name "proj-87654321", // fake project ID @@ -311,9 +313,12 @@ func TestEval_Run_TraceRefUsesRootTraceID(t *testing.T) { return S(1), nil }) + apiClient := api.NewClient("test-key", api.WithAPIURL(server.URL)) + e := testNewEval( session, tracer, + apiClient, "exp-123", "test-exp", "proj-123", diff --git a/eval/testdata/cassettes/TestFunctionsAPI_EndToEnd_MixedTypes.yaml b/eval/testdata/cassettes/TestFunctionsAPI_EndToEnd_MixedTypes.yaml index 6aed0ed..09455df 100644 --- a/eval/testdata/cassettes/TestFunctionsAPI_EndToEnd_MixedTypes.yaml +++ b/eval/testdata/cassettes/TestFunctionsAPI_EndToEnd_MixedTypes.yaml @@ -6,20 +6,18 @@ interactions: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 23 + content_length: 0 transfer_encoding: [] trailer: {} host: api.braintrust.dev remote_addr: "" request_uri: "" - body: '{"name":"go-sdk-tests"}' + body: "" form: {} headers: Accept: - application/json - Content-Type: - - application/json - url: https://api.braintrust.dev/v1/project + url: https://api.braintrust.dev/api/apikey/login method: POST response: proto: HTTP/2.0 @@ -29,7 +27,7 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"go-sdk-tests","description":null,"created":"2025-11-04T13:38:56.532Z","deleted_at":null,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","settings":null}' + body: '{"org_info":[{"id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"matt-test-org","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}' headers: Access-Control-Allow-Credentials: - "true" @@ -38,50 +36,50 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Wed, 04 Mar 2026 23:21:49 GMT + - Thu, 05 Mar 2026 00:49:35 GMT Etag: - - W/"fe-rfCZvNlcUhLHZTl4/HHBhOEJFv4" + - W/"101-EqXzt+vlRFUt5HCzQY7qxp9rZU0" Vary: - Origin, Accept-Encoding Via: - - 1.1 182d3a3dbb6658c964ee75cd45a42242.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 2e87eef03ab555daefa684d946e111b4.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuK2KHm8oAMEEoA= + - ZuXs7GIdIAMEchQ= X-Amz-Cf-Id: - - jWZsokv3ZxS4HPG3cLU9hgdurbSgjs58jJ8FBQUuJT_YiE2TzGeFHg== + - V8BFdU20tZ-OtNCT8IR0Cjh0HOAHxZ6DixO_t_xx05s4zyyuhZoUBw== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - 03a6d1c4-4797-499a-b5ab-21e9c397874f + - ce478b10-34cf-436c-a070-693a814104c0 X-Amzn-Trace-Id: - - Root=1-69a8be8d-352c02c97cdbbc93522c381d;Parent=75e5bd0fc8466f5e;Sampled=0;Lineage=1:24be3d11:0 - X-Bt-Found-Existing: - - "true" + - Root=1-69a8d31f-4acc0edc52f76cbb3b5712fe;Parent=2e4f989f8fd73461;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 69a8be8d00000000360707728f6cc5fb + - 69a8d31f000000000a7548177a572e2c X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 391.960958ms + duration: 380.176208ms - id: 1 request: proto: HTTP/1.1 proto_major: 1 proto_minor: 1 - content_length: 0 + content_length: 23 transfer_encoding: [] trailer: {} host: api.braintrust.dev remote_addr: "" request_uri: "" - body: "" + body: '{"name":"go-sdk-tests"}' form: {} headers: Accept: - application/json - url: https://api.braintrust.dev/api/apikey/login + Content-Type: + - application/json + url: https://api.braintrust.dev/v1/project method: POST response: proto: HTTP/2.0 @@ -91,7 +89,7 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"org_info":[{"id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"matt-test-org","api_url":"https://api.braintrust.dev","git_metadata":null,"is_universal_api":null,"proxy_url":"https://api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}' + body: '{"id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"go-sdk-tests","description":null,"created":"2025-11-04T13:38:56.532Z","deleted_at":null,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","settings":null}' headers: Access-Control-Allow-Credentials: - "true" @@ -100,31 +98,33 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Wed, 04 Mar 2026 23:21:49 GMT + - Thu, 05 Mar 2026 00:49:35 GMT Etag: - - W/"101-EqXzt+vlRFUt5HCzQY7qxp9rZU0" + - W/"fe-rfCZvNlcUhLHZTl4/HHBhOEJFv4" Vary: - Origin, Accept-Encoding Via: - - 1.1 f36cc119cb86b2f70c315ca53fd1b4ee.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 7f51caabae8141bdcde4283a42be2a56.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuK2KEeYoAMEU9Q= + - ZuXs7HlSIAMEVig= X-Amz-Cf-Id: - - ZfUiPDuoPPBIMv1lh8whWl5dwQpKWmCT9q4U9KxJ1nm8AybK4qQ8Wg== + - 4yapvkcSyfJEzfYb8LVqsbRsixdYeI8jRbJXQo7UN8dFvnMb47iGWg== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - 8a72e6e6-f503-4acd-80ba-5ac9cfbef0a1 + - d06d5ac0-e676-4987-a21f-fccc52a4af3a X-Amzn-Trace-Id: - - Root=1-69a8be8d-7bf27e082959357234a0565f;Parent=12a03d9b082b8c0b;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d31f-34e52b9112481fec2f1595c1;Parent=1105d6a35631322e;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Found-Existing: + - "true" X-Bt-Internal-Trace-Id: - - 69a8be8d000000000b7e01fdb7cb5f1e + - 69a8d31f000000003c1b407edee5ddc4 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 480.561958ms + duration: 400.695125ms - id: 2 request: proto: HTTP/1.1 @@ -160,31 +160,31 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Wed, 04 Mar 2026 23:21:49 GMT + - Thu, 05 Mar 2026 00:49:35 GMT Etag: - W/"e-xZKibKAiOxxBbzTm2byfFNRkvtA" Vary: - Origin, Accept-Encoding Via: - - 1.1 89b24af8db05335e68292856e0a53668.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 fd441d5d42c4e243bf0b88902034e302.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuK2NF1goAMEE5w= + - ZuXs9FUboAMEGgA= X-Amz-Cf-Id: - - yPc5E6dJ4LYQ8JinP5gqIYri9KygZPM88m50ip0cZEXb25Cc8KPSSw== + - GUVQ6ds4Ahj4OccHimN5-a9m0qYiYrpDdmHV4oIykOYUs-n9arMx2w== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - 97bfee2c-f896-4e53-b186-bd91972b299e + - 76ec94a1-b2ed-451b-94d5-457c1f33d631 X-Amzn-Trace-Id: - - Root=1-69a8be8d-538c7ed55fe4df4d4206b759;Parent=5840344d74348928;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d31f-770acd6e4d3ce64f09aca409;Parent=49940aabbe447108;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 69a8be8d0000000066474d8596ffcad6 + - 69a8d31f0000000055a5e274c9362d08 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 232.55375ms + duration: 255.454416ms - id: 3 request: proto: HTTP/1.1 @@ -213,7 +213,7 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-task","name":"E2E Task","function_data":{"type":"prompt"},"prompt_data":{"options":{"model":"gpt-4o-mini","params":{"max_tokens":100,"response_format":{"type":"json_object"},"temperature":0}},"prompt":{"messages":[{"content":"You answer questions with JSON. Return ONLY the JSON object, no markdown, no code blocks, no backticks.","role":"system"},{"content":"Question: {{input.question}}. Return ONLY JSON like {\"answer\": \"your answer\", \"confidence\": 0.9}. No markdown formatting.","role":"user"}],"type":"chat"}},"id":"5cee0811-48f0-453a-9103-365d3a3344a8","created":"2026-03-04T23:21:50.043Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_xact_id":"1000196765725398940"}' + body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-task","name":"E2E Task","function_data":{"type":"prompt"},"prompt_data":{"options":{"model":"gpt-4o-mini","params":{"max_tokens":100,"response_format":{"type":"json_object"},"temperature":0}},"prompt":{"messages":[{"content":"You answer questions with JSON. Return ONLY the JSON object, no markdown, no code blocks, no backticks.","role":"system"},{"content":"Question: {{input.question}}. Return ONLY JSON like {\"answer\": \"your answer\", \"confidence\": 0.9}. No markdown formatting.","role":"user"}],"type":"chat"}},"id":"aeb4d39b-f38b-4a20-98d3-6e5adfe64af6","created":"2026-03-05T00:49:35.784Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_xact_id":"1000196766070421934"}' headers: Access-Control-Allow-Credentials: - "true" @@ -222,31 +222,31 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Wed, 04 Mar 2026 23:21:50 GMT + - Thu, 05 Mar 2026 00:49:36 GMT Etag: - - W/"31a-em6V5YuFILXI9rS58D+nzORFiA0" + - W/"31a-u63TMcnl+2zDildgbw2PNcNhP0U" Vary: - Origin, Accept-Encoding Via: - - 1.1 1f941fcf288b6d0259a0f708c955afae.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 89b24af8db05335e68292856e0a53668.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuK2PHhSIAMEVLw= + - ZuXtAE2cIAMEmWQ= X-Amz-Cf-Id: - - ZDDsmz6LjAbzKjQitjfYbfoz3xWwmzTOWevStw2PX_Ruvz-O8YzMgw== + - 6UUWmKmzXQe3OsZV81oR13W8Hncj4tWJss-3tD2sifgy3sc3bmlDOw== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - ae1cdbfa-9533-491b-a210-655f70100808 + - 3b42dc12-0c4d-42f8-bc3a-8e290491fdb7 X-Amzn-Trace-Id: - - Root=1-69a8be8d-5e7ab4084a98fc7e11979895;Parent=68c3a4687841f2ce;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d31f-06784b973e1189dc09902857;Parent=3173e56c2fd25c97;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 69a8be8d00000000139b12966908e47c + - 69a8d31f00000000028bd8cf775380e5 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 651.189959ms + duration: 464.592875ms - id: 4 request: proto: HTTP/1.1 @@ -282,31 +282,31 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Wed, 04 Mar 2026 23:21:50 GMT + - Thu, 05 Mar 2026 00:49:36 GMT Etag: - W/"e-xZKibKAiOxxBbzTm2byfFNRkvtA" Vary: - Origin, Accept-Encoding Via: - - 1.1 250b49a977a2df6676d3fbf2508fc16e.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 250b49a977a2df6676d3fbf2508fc16e.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuK2VEELoAMEikA= + - ZuXtGGvFIAMES0Q= X-Amz-Cf-Id: - - fmWVBVgyFZvv7kImOOMPmVxrNgP0drC8ezMcv4_WOf_8l-hsoR5qMQ== + - qVmNtkX4-RJXRI9KlxOJN5gHwqWTqP-v0dn-kPrlLEsRwwI8gEpp7A== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - 42802556-4bbb-42ab-884c-b0d43e040527 + - 653ec07c-d886-4f53-bb0e-43fc9812c710 X-Amzn-Trace-Id: - - Root=1-69a8be8e-72e30f915d29afdc00e1ed5d;Parent=24a6846642e872de;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d320-2723f5b1518c7f98388eacc4;Parent=7684b9b0f688ee60;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 69a8be8e000000006707a464c766dd54 + - 69a8d320000000000fa0909979f17aac X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 306.8885ms + duration: 385.217042ms - id: 5 request: proto: HTTP/1.1 @@ -335,7 +335,7 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-scorer","name":"E2E Scorer","function_type":"scorer","function_data":{"type":"prompt"},"prompt_data":{"options":{"model":"gpt-4o-mini","params":{"temperature":0}},"parser":{"choice_scores":{"correct":1,"incorrect":0},"type":"llm_classifier","use_cot":false},"prompt":{"messages":[{"content":"You are a scorer.","role":"system"},{"content":"Is the output.answer field non-empty? Choose ''correct'' if yes, ''incorrect'' if no.","role":"user"}],"type":"chat"}},"id":"3d00adc8-eb0f-4372-813b-6c945725b4e9","created":"2026-03-04T23:21:51.032Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_xact_id":"1000196765725465873"}' + body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-scorer","name":"E2E Scorer","function_type":"scorer","function_data":{"type":"prompt"},"prompt_data":{"options":{"model":"gpt-4o-mini","params":{"temperature":0}},"parser":{"choice_scores":{"correct":1,"incorrect":0},"type":"llm_classifier","use_cot":false},"prompt":{"messages":[{"content":"You are a scorer.","role":"system"},{"content":"Is the output.answer field non-empty? Choose ''correct'' if yes, ''incorrect'' if no.","role":"user"}],"type":"chat"}},"id":"c81984ec-a11b-4de4-86bd-6256985f82ab","created":"2026-03-05T00:49:36.599Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_xact_id":"1000196766070488487"}' headers: Access-Control-Allow-Credentials: - "true" @@ -344,31 +344,31 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Wed, 04 Mar 2026 23:21:51 GMT + - Thu, 05 Mar 2026 00:49:36 GMT Etag: - - W/"2d8-awAllBkTnpQ96D7hwSR6xXa0aPQ" + - W/"2d8-B2EIoXqQeewNJn70u+y9hMRF4tQ" Vary: - Origin, Accept-Encoding Via: - - 1.1 1f941fcf288b6d0259a0f708c955afae.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 8502ceae0080b3523f89d1a518a99726.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuK2YFBqIAMECUQ= + - ZuXtIE6woAMEmWQ= X-Amz-Cf-Id: - - HvgjPdZvS4v0qMcdgmgwnNtEUCKrMrxCpkg5qYrasgOKaw3cH2ngfQ== + - cE-fY0vCxOVVFNZHAN-FTwc6qQfOoWTaVzxjCft7i_ITnUaxcyxxjQ== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - 5473fc69-f832-40a3-8a3f-0a28618881a0 + - a0d74814-bab1-44f4-b04f-dd596c8f2573 X-Amzn-Trace-Id: - - Root=1-69a8be8e-2898f58a067be1bb75ae4722;Parent=477e6824dc537dd7;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d320-60e184231085f5a16ec41b17;Parent=36d8413b312ae1fb;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 69a8be8e000000006e58465ac0f2e835 + - 69a8d320000000001c1afabe800f4d16 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 511.484625ms + duration: 490.491708ms - id: 6 request: proto: HTTP/1.1 @@ -395,7 +395,7 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"objects":[{"id":"5cee0811-48f0-453a-9103-365d3a3344a8","_xact_id":"1000196765725398940","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","log_id":"p","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"E2E Task","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-task","description":null,"created":"2026-03-04T23:21:50.043Z","prompt_data":{"prompt":{"type":"chat","messages":[{"role":"system","content":"You answer questions with JSON. Return ONLY the JSON object, no markdown, no code blocks, no backticks."},{"role":"user","content":"Question: {{input.question}}. Return ONLY JSON like {\"answer\": \"your answer\", \"confidence\": 0.9}. No markdown formatting."}]},"options":{"model":"gpt-4o-mini","params":{"max_tokens":100,"temperature":0,"response_format":{"type":"json_object"}}}},"tags":null,"metadata":null,"function_type":null,"function_data":{"type":"prompt"},"origin":null,"function_schema":null}]}' + body: '{"objects":[{"id":"aeb4d39b-f38b-4a20-98d3-6e5adfe64af6","_xact_id":"1000196766070421934","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","log_id":"p","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"E2E Task","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-task","description":null,"created":"2026-03-05T00:49:35.784Z","prompt_data":{"prompt":{"type":"chat","messages":[{"role":"system","content":"You answer questions with JSON. Return ONLY the JSON object, no markdown, no code blocks, no backticks."},{"role":"user","content":"Question: {{input.question}}. Return ONLY JSON like {\"answer\": \"your answer\", \"confidence\": 0.9}. No markdown formatting."}]},"options":{"model":"gpt-4o-mini","params":{"max_tokens":100,"temperature":0,"response_format":{"type":"json_object"}}}},"tags":null,"metadata":null,"function_type":null,"function_data":{"type":"prompt"},"origin":null,"function_schema":null}]}' headers: Access-Control-Allow-Credentials: - "true" @@ -404,31 +404,31 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Wed, 04 Mar 2026 23:21:51 GMT + - Thu, 05 Mar 2026 00:49:37 GMT Etag: - - W/"391-RyipEwunzCjsJA5GBlrnPSNhTww" + - W/"391-h4a/5l/DbhX6bRvfOtLQO3uXUOw" Vary: - Origin, Accept-Encoding Via: - - 1.1 c6aabec83f5c081149a8843767dacc52.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 5b69cd230a06f482da15abd9c53bb694.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuK2eGs0IAMERLA= + - ZuXtNGy0IAMElHQ= X-Amz-Cf-Id: - - ocvFx_HyALBROUSPocHR5JzBGcwJ8X2zH8NTWF-bfENi7rQNPuzpCA== + - qGJipj6FaKThoF9UtKBkahQ7YlUTrisaI4-nYB8rcBTruXt1oOg9Dg== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - b660676e-584c-4318-88df-5b348791814c + - 2c0107a5-0670-45f6-bc5d-aa30061a05df X-Amzn-Trace-Id: - - Root=1-69a8be8f-1b1c6e3d65fa34933d9a81b3;Parent=7687df9a6a5a0bf5;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d320-51a17bba7a615bcd511aec1b;Parent=635b6fc9fd247e2e;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 69a8be8f0000000024155419a75408d6 + - 69a8d32100000000403013d9aeddd3b5 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 514.074125ms + duration: 761.63ms - id: 7 request: proto: HTTP/1.1 @@ -455,7 +455,7 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"objects":[{"id":"3d00adc8-eb0f-4372-813b-6c945725b4e9","_xact_id":"1000196765725465873","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","log_id":"p","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"E2E Scorer","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-scorer","description":null,"created":"2026-03-04T23:21:51.032Z","prompt_data":{"parser":{"type":"llm_classifier","use_cot":false,"choice_scores":{"correct":1,"incorrect":0}},"prompt":{"type":"chat","messages":[{"role":"system","content":"You are a scorer."},{"role":"user","content":"Is the output.answer field non-empty? Choose ''correct'' if yes, ''incorrect'' if no."}]},"options":{"model":"gpt-4o-mini","params":{"temperature":0}}},"tags":null,"metadata":null,"function_type":"scorer","function_data":{"type":"prompt"},"origin":null,"function_schema":null}]}' + body: '{"objects":[{"id":"c81984ec-a11b-4de4-86bd-6256985f82ab","_xact_id":"1000196766070488487","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","log_id":"p","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","name":"E2E Scorer","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-scorer","description":null,"created":"2026-03-05T00:49:36.599Z","prompt_data":{"parser":{"type":"llm_classifier","use_cot":false,"choice_scores":{"correct":1,"incorrect":0}},"prompt":{"type":"chat","messages":[{"role":"system","content":"You are a scorer."},{"role":"user","content":"Is the output.answer field non-empty? Choose ''correct'' if yes, ''incorrect'' if no."}]},"options":{"model":"gpt-4o-mini","params":{"temperature":0}}},"tags":null,"metadata":null,"function_type":"scorer","function_data":{"type":"prompt"},"origin":null,"function_schema":null}]}' headers: Access-Control-Allow-Credentials: - "true" @@ -464,31 +464,31 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Wed, 04 Mar 2026 23:21:52 GMT + - Thu, 05 Mar 2026 00:49:38 GMT Etag: - - W/"33a-wcYHtnEK7jjqqUDbTMEaqim3OVU" + - W/"33a-H0msJQDiJn46t+m73Iy7uiLCpsY" Vary: - Origin, Accept-Encoding Via: - - 1.1 7f51caabae8141bdcde4283a42be2a56.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 49798ef4b8dd64fece36e067d09f69ec.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuK2jHh4IAMEmsg= + - ZuXtVEIZIAMEVLw= X-Amz-Cf-Id: - - kCtNzh1u-zQQUd85o8cxrtitwd31_l-04R7qKx_qcbvzMAUTsMzW3A== + - A0mRWWyNZ7WYTt3okSZdWDDUG5MCR7nb9ajX9NZMS8vk1Qw0wkzWIw== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - 040f1e98-c179-4cbf-9eae-90ab73e8979c + - 35e30c44-dc20-4d93-9565-37d20c99577c X-Amzn-Trace-Id: - - Root=1-69a8be8f-4d8bbfe516dbae1d72cbbacc;Parent=7b07aa7b52a3bf33;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d321-4c374a286eff9c654d9969cf;Parent=30855eddda5adf37;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 69a8be8f000000004cdebb19a9953ee8 + - 69a8d3210000000007fdcd5bf0c22773 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 509.136834ms + duration: 560.324541ms - id: 8 request: proto: HTTP/1.1 @@ -526,33 +526,33 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Wed, 04 Mar 2026 23:21:52 GMT + - Thu, 05 Mar 2026 00:49:38 GMT Etag: - W/"fe-rfCZvNlcUhLHZTl4/HHBhOEJFv4" Vary: - Origin, Accept-Encoding Via: - - 1.1 26c9d43b9089eee93b9e4ad4293d02c0.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 049ca50de603d43d8c9d0f7716efb414.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuK2oHJ7IAMES0Q= + - ZuXtbHzIoAMEvDg= X-Amz-Cf-Id: - - 7L7ybREqakzXlS3hXhcvQZRM_l3geyu5aHIJiO55jhTUqIArDuVYjg== + - klgxSRri84hGF9kapjaqIg6i1Ri2EEp_SWMpaLSpNRpjXCJcebPLHQ== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - 8b1f87d6-ac18-4b1f-bf25-8c7c3450353b + - 2f235ae0-c0e7-4b2a-8495-7c30d63f5711 X-Amzn-Trace-Id: - - Root=1-69a8be90-4041adc21bb30fe3386bb63a;Parent=0d63d3a7dfd2f620;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d322-396cd87b4272cc4d4f168dee;Parent=79819f16d5be5f3c;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Found-Existing: - "true" X-Bt-Internal-Trace-Id: - - 69a8be900000000017695c353fc4e50e + - 69a8d322000000003291e2aa28d7234d X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 277.811125ms + duration: 251.787541ms - id: 9 request: proto: HTTP/1.1 @@ -581,7 +581,7 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"id":"18d8ffff-1a3b-4ba4-b714-b9fd7e86b508","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"test-e2e-exp-83e080a6","description":null,"created":"2026-03-04T23:21:52.805Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"public":false,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","metadata":null,"tags":null}' + body: '{"id":"ff367ddf-d12b-46c9-a6f2-8f5d82b29a37","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"test-e2e-exp-340b67ab","description":null,"created":"2026-03-05T00:49:38.648Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"public":false,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","metadata":null,"tags":null}' headers: Access-Control-Allow-Credentials: - "true" @@ -590,31 +590,31 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Wed, 04 Mar 2026 23:21:52 GMT + - Thu, 05 Mar 2026 00:49:38 GMT Etag: - - W/"17f-N3C0LL/w6WWLkMx7P4tk/BG8aes" + - W/"17f-wrYluRYq+KO7HXrtgUiusBTRuZA" Vary: - Origin, Accept-Encoding Via: - - 1.1 b7b9fc5331efc8b070db0bf27b36820e.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 8bf233dd8a97bd754666b427b6d19d34.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuK2rFFjoAMEXHw= + - ZuXtdEchIAMEbdQ= X-Amz-Cf-Id: - - Ulb4MEaqIEnEk1wCNcMJf3OrWhDy17HC14lVzDFaD4Ini2r2sHdpPQ== + - D1JdbYpTvzAQF5X16NKGW3LTkTXuCvWXYsvgUFOjIDMckiLNUxm1YA== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - 7f1c978b-acad-4b96-8924-d485a1016180 + - 2a9e8d0c-a1b9-402c-9aa9-86f51bed9984 X-Amzn-Trace-Id: - - Root=1-69a8be90-576e008041d837662508e60d;Parent=30ac92dc3364863d;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d322-041b88d171c632c83d37680f;Parent=5f3803e9f29308fb;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 69a8be900000000010d5c5728824f6ba + - 69a8d3220000000058ae9162354e6eb9 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 224.047208ms + duration: 213.041709ms - id: 10 request: proto: HTTP/1.1 @@ -633,7 +633,7 @@ interactions: - application/json Content-Type: - application/json - url: https://api.braintrust.dev/v1/function/5cee0811-48f0-453a-9103-365d3a3344a8/invoke + url: https://api.braintrust.dev/v1/function/aeb4d39b-f38b-4a20-98d3-6e5adfe64af6/invoke method: POST response: proto: HTTP/2.0 @@ -646,43 +646,43 @@ interactions: body: '{"answer":"Paris","confidence":0.95}' headers: Age: - - "0" + - "801" Cache-Control: - max-age=604800 Cf-Cache-Status: - DYNAMIC Cf-Ray: - - 9d749eaf7f87f282-IAD + - 9d750b66cc02916d-IAD Content-Type: - application/json Date: - - Wed, 04 Mar 2026 23:21:54 GMT + - Thu, 05 Mar 2026 00:49:39 GMT Openai-Organization: - braintrust-data Openai-Processing-Ms: - - "110" + - "10236" Openai-Project: - proj_wMRY6YpEiASXMxPIIcI9nQRi Openai-Version: - "2020-10-01" Set-Cookie: - - __cf_bm=LO0KPhzriFQNW2.yAhWD7HrQMpwnbPtZtv2JqJG1VF0-1772666513.8365242-1.0.1.1-EFUlSlC6kIDUiVxO8vtWoegTf.KpSML0.lD_2Nh_7PdO_C43HS767rcggvHtsuzPl09rQgT2YmA_3HpCAyeymuwnwudvUGkPamfEy3wXleyuBineGjVLkM7TrPpmzZgW; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 04 Mar 2026 23:51:54 GMT + - __cf_bm=i5UVzxSetLUyp4DcqK6LjSXC_RQJLn2SO5snbn5oV3c-1772670966.8465185-1.0.1.1-Z._8XEIIfPy94NKxdDbozAp6NSONRaR_lK.7WfdrdosGJRXJEI6tESeTpkUBUEh2HjCkKYoB8RZJJjv4luMByyxubZ7GKEJ4jyvxnFOj8xVh7dhFi7.C8eujQCsobL7d; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Thu, 05 Mar 2026 01:06:17 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Via: - - 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Cf-Id: - - RWaHncyuNRmWj45UhmZpYHMeFWkbRgpWQzZ8Sr175K0GDGw0Fn1MLQ== + - bnlp3bUPgrMngQB4dSBn7NfuYvL741IoyTks0qzrWgk_lVD5uWVITQ== X-Amz-Cf-Pop: - HIO52-P4 X-Amzn-Requestid: - - 5d9d5311-6ea0-429c-b180-16ddacd5f96f + - 9b4138e7-b0b8-420e-bdf8-e4c2b8c6bdcd X-Amzn-Trace-Id: - - Root=1-69a8be91-3499dbfb5ceabb7f763b1f30;Parent=1b79cc62450e043c;Sampled=0;Lineage=1:8be8f50d:0 + - Root=1-69a8d322-50a0b91752d55b4b5623109b;Parent=183deb43e1a08726;Sampled=0;Lineage=1:8be8f50d:0 X-Bt-Cached: - - MISS + - HIT X-Bt-Function-Creds-Cached: - - MISS + - HIT X-Bt-Function-Meta-Cached: - MISS X-Bt-Used-Endpoint: @@ -706,11 +706,128 @@ interactions: X-Ratelimit-Reset-Tokens: - 0s X-Request-Id: - - req_e36d9b8d401c453d9cd56fe5b477ca26 + - req_31e7ebac94ef49d084548a5880b2cfcc status: 200 OK code: 200 - duration: 1.602242666s + duration: 864.584166ms - id: 11 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 388 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"limit":1000,"filter":{"children":[{"left":{"name":["root_span_id"],"op":"ident"},"op":"eq","right":{"op":"literal","value":"bb8d12c3758f8f0a1a46ce6864a322bb"}},{"children":[{"expr":{"name":["span_attributes","purpose"],"op":"ident"},"op":"isnull"},{"left":{"name":["span_attributes","purpose"],"op":"ident"},"op":"ne","right":{"op":"literal","value":"scorer"}}],"op":"or"}],"op":"and"}}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/v1/experiment/ff367ddf-d12b-46c9-a6f2-8f5d82b29a37/fetch + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"events":[]}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json + Date: + - Thu, 05 Mar 2026 00:49:39 GMT + Vary: + - Origin + Via: + - 1.1 fc36d22b58a363b02ecdd852a2e51610.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuXtoGt4IAMEK3Q= + X-Amz-Cf-Id: + - IgsfCIPp3YYyw1jj-GSTd2hloqzqx8Wh9W2-Px9mzX1d3OiBmWF1IQ== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - e4ed36bf-b431-4eef-b91c-29c3e48c2193 + X-Amzn-Trace-Id: + - Root=1-69a8d323-2020b1d5475f4915696bc98e;Parent=6dd93d8eb085d7ac;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Api-Duration-Ms: + - "206" + X-Bt-Brainstore-Duration-Ms: + - "76" + X-Bt-Internal-Trace-Id: + - 69a8d3230000000069140f55d034267f + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 409.460875ms + - id: 12 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 234 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"global_function":"project_default","function_type":"preprocessor","mode":"json","input":{"trace_ref":{"object_id":"ff367ddf-d12b-46c9-a6f2-8f5d82b29a37","object_type":"experiment","root_span_id":"bb8d12c3758f8f0a1a46ce6864a322bb"}}}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/function/invoke + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: false + body: "null" + headers: + Content-Type: + - application/json + Date: + - Thu, 05 Mar 2026 00:49:40 GMT + Via: + - 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) + X-Amz-Cf-Id: + - rZmnLLqq8SPahtJKCvc7m2JWs505ZH_lG0RB8ka8CqTqyc0psMCa4w== + X-Amz-Cf-Pop: + - HIO52-P4 + X-Amzn-Requestid: + - ef2b3ee3-c9fa-412d-8004-d1ef25cdef72 + X-Amzn-Trace-Id: + - Root=1-69a8d324-79f8b3b90101b34263d073a3;Parent=6738d2e5d888a5c5;Sampled=0;Lineage=1:8be8f50d:0 + X-Bt-Function-Creds-Cached: + - HIT + X-Bt-Function-Meta-Cached: + - HIT + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 409.483083ms + - id: 13 request: proto: HTTP/1.1 proto_major: 1 @@ -728,7 +845,7 @@ interactions: - application/json Content-Type: - application/json - url: https://api.braintrust.dev/v1/function/3d00adc8-eb0f-4372-813b-6c945725b4e9/invoke + url: https://api.braintrust.dev/v1/function/c81984ec-a11b-4de4-86bd-6256985f82ab/invoke method: POST response: proto: HTTP/2.0 @@ -741,41 +858,41 @@ interactions: body: '{"name":"E2E Scorer","score":0,"metadata":{"choice":"incorrect"}}' headers: Age: - - "0" + - "838" Cache-Control: - max-age=604800 Cf-Cache-Status: - DYNAMIC Cf-Ray: - - 9d749ebca8d2d6f4-IAD + - 9d750a9bff6f9c43-IAD Content-Type: - application/json Date: - - Wed, 04 Mar 2026 23:21:56 GMT + - Thu, 05 Mar 2026 00:49:41 GMT Openai-Organization: - braintrust-data Openai-Processing-Ms: - - "123" + - "8900" Openai-Project: - proj_wMRY6YpEiASXMxPIIcI9nQRi Openai-Version: - "2020-10-01" Set-Cookie: - - __cf_bm=nwKAkxilJMNyMbsGSeHHvgExBVDhmXpHmOQtFuxbSX4-1772666515.9499893-1.0.1.1-Bm2sPQHXASrKshvzzuhQF9VZ8DqHVGoiRG5Uwsba3soQHTrFTmeID0Df3SXejLkCx9X2kHZbkZ5hW5Ob_UpAb39cUYzvmm_8gKWM3PgE02q5UXEI1rj5cFBv_Xtvh.uU; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 04 Mar 2026 23:51:56 GMT + - __cf_bm=tx7xmc6tvsqv23DbDHziKzCYiKvZp8RylWd.q7yIwmw-1772670934.3944066-1.0.1.1-zXNNcHGbsyy5UMRIHorhUwgZ6.v52SoXtP5fqMgKVrNtWXSTyuLoQFGFJCIGRFO6HEmBbmsO8yj4bGTr50mkmG8i0yy88QJgjnOj8ZQE3o1HRBHGFTTUzNYOnpFl_USJ; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Thu, 05 Mar 2026 01:05:43 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Via: - - 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Cf-Id: - - ZoY-vQI297qhQaE7LXu3u1jfB9KZOJ3E0IwtRqgJefxioJJYMtn2jg== + - 7kg0XX5XT4LG-YTmZDBhpQRn2WpvGcuHvuD4XmyXULv4EhKeej-F5w== X-Amz-Cf-Pop: - HIO52-P4 X-Amzn-Requestid: - - 5c13920b-dd1c-4dbd-b4d9-aac889fd4785 + - 80ef3e2e-4906-4940-8880-4403a524807f X-Amzn-Trace-Id: - - Root=1-69a8be93-609c95050c1ce1b870f9c683;Parent=5f54dcba4942d245;Sampled=0;Lineage=1:8be8f50d:0 + - Root=1-69a8d324-7a207d4723e63e36047c6c9b;Parent=0dafe645b757b75d;Sampled=0;Lineage=1:8be8f50d:0 X-Bt-Cached: - - MISS + - HIT X-Bt-Function-Creds-Cached: - HIT X-Bt-Function-Meta-Cached: @@ -801,11 +918,11 @@ interactions: X-Ratelimit-Reset-Tokens: - 0s X-Request-Id: - - req_ca7960f0b73541b6a1199f7a214bd13d + - req_a365cda4d32449309ae473b2f6ff3280 status: 200 OK code: 200 - duration: 1.048096458s - - id: 12 + duration: 1.368409541s + - id: 14 request: proto: HTTP/1.1 proto_major: 1 @@ -823,7 +940,7 @@ interactions: - application/json Content-Type: - application/json - url: https://api.braintrust.dev/v1/function/5cee0811-48f0-453a-9103-365d3a3344a8/invoke + url: https://api.braintrust.dev/v1/function/aeb4d39b-f38b-4a20-98d3-6e5adfe64af6/invoke method: POST response: proto: HTTP/2.0 @@ -836,41 +953,41 @@ interactions: body: '{"answer":"4","confidence":0.9}' headers: Age: - - "0" + - "849" Cache-Control: - max-age=604800 Cf-Cache-Status: - DYNAMIC Cf-Ray: - - 9d749ec0282f81f7-IAD + - 9d750a43398191fa-IAD Content-Type: - application/json Date: - - Wed, 04 Mar 2026 23:21:57 GMT + - Thu, 05 Mar 2026 00:49:41 GMT Openai-Organization: - braintrust-data Openai-Processing-Ms: - - "179" + - "12004" Openai-Project: - proj_wMRY6YpEiASXMxPIIcI9nQRi Openai-Version: - "2020-10-01" Set-Cookie: - - __cf_bm=F8eKRNSUyQ6oplk5F8bLOEeuei2OnG84BP4rPx95kEQ-1772666516.5062973-1.0.1.1-Y4FcjyIVXYTzAI.otuQDnN5diTj__wU4PVl9BAV5fRhkC2buoh7iaHxSkfPsH2B5ie7GA.2LQpxAlu4_pZbCzXTgdKC.x3gSccgt8Lq1aY5gKlzWCWkheNKwkEtECk3r; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 04 Mar 2026 23:51:56 GMT + - __cf_bm=V248iJ3jN39AeublOy_9hFDHDRHQ80jHT3v2t7UvkjE-1772670920.1976304-1.0.1.1-YQiVpaP0YcdOHfuTgV4BXam6DkhcVozpymhppntp.8mP_il_AeCQ3SM5_pNlh0D0uyGa6HZx62LZ3.XiNPve9AxJHQ_Gvl1tYIQhoaxz4lSIOUwsTe14ZDU5OXZayoc5; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Thu, 05 Mar 2026 01:05:32 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Via: - - 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Cf-Id: - - m4CiDSSPMI8aL9m6JHz82XPPMdrQDMMszFqKcEvlGESemf6nDrbbig== + - VviYFJY0KoRTETtTee3hZyAlHDKGHrVnNtRYQO6TspE_oNOLdua4aQ== X-Amz-Cf-Pop: - HIO52-P4 X-Amzn-Requestid: - - eb232cba-57d6-4268-8e0b-54521e1285c3 + - be620b07-5736-4b73-a653-5210236da28d X-Amzn-Trace-Id: - - Root=1-69a8be94-7fbc34524c79007824074629;Parent=75ebbb6dbdbc8e3b;Sampled=0;Lineage=1:8be8f50d:0 + - Root=1-69a8d325-01226901060205a92278f04b;Parent=57c0d1f3928e192d;Sampled=0;Lineage=1:8be8f50d:0 X-Bt-Cached: - - MISS + - HIT X-Bt-Function-Creds-Cached: - HIT X-Bt-Function-Meta-Cached: @@ -896,11 +1013,128 @@ interactions: X-Ratelimit-Reset-Tokens: - 0s X-Request-Id: - - req_5185fd4c69894c918aa1b5cbae7f3aba + - req_d39fc3a5188f4da48078153da7bc8ae2 status: 200 OK code: 200 - duration: 792.825583ms - - id: 13 + duration: 268.10975ms + - id: 15 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 388 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"limit":1000,"filter":{"children":[{"left":{"name":["root_span_id"],"op":"ident"},"op":"eq","right":{"op":"literal","value":"b1395ed95cb69fd003ffbf2a4ca40d8f"}},{"children":[{"expr":{"name":["span_attributes","purpose"],"op":"ident"},"op":"isnull"},{"left":{"name":["span_attributes","purpose"],"op":"ident"},"op":"ne","right":{"op":"literal","value":"scorer"}}],"op":"or"}],"op":"and"}}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/v1/experiment/ff367ddf-d12b-46c9-a6f2-8f5d82b29a37/fetch + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"events":[]}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json + Date: + - Thu, 05 Mar 2026 00:49:42 GMT + Vary: + - Origin + Via: + - 1.1 d220e3f3d93439a8c69225156c6ae800.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuXuAHleoAMEFxw= + X-Amz-Cf-Id: + - 1Y-VICml3l2C3WxLlFmgiYI6m-AFj8l0HDBIyv4MXblpAE6UkQRGgw== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - 2cd3a378-79ee-404a-a23b-d6542b9db8e5 + X-Amzn-Trace-Id: + - Root=1-69a8d326-52cdc9832bad47f03d504789;Parent=57b4c3969bc6f512;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Api-Duration-Ms: + - "66" + X-Bt-Brainstore-Duration-Ms: + - "55" + X-Bt-Internal-Trace-Id: + - 69a8d326000000000dcaccad3c5e5d38 + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 195.153708ms + - id: 16 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 234 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"global_function":"project_default","function_type":"preprocessor","mode":"json","input":{"trace_ref":{"object_id":"ff367ddf-d12b-46c9-a6f2-8f5d82b29a37","object_type":"experiment","root_span_id":"b1395ed95cb69fd003ffbf2a4ca40d8f"}}}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/function/invoke + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: false + body: "null" + headers: + Content-Type: + - application/json + Date: + - Thu, 05 Mar 2026 00:49:42 GMT + Via: + - 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) + X-Amz-Cf-Id: + - zV2YJypge8uJzYMP5hfSqqhZAXEdZmJ0t-WL1J61qIvhMeaj-dZC3Q== + X-Amz-Cf-Pop: + - HIO52-P4 + X-Amzn-Requestid: + - d02aa017-c25e-43cd-9dd6-590484c7fb26 + X-Amzn-Trace-Id: + - Root=1-69a8d326-192c16b601fcf37f4a221e38;Parent=55c71e5158321430;Sampled=0;Lineage=1:8be8f50d:0 + X-Bt-Function-Creds-Cached: + - HIT + X-Bt-Function-Meta-Cached: + - HIT + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 336.941583ms + - id: 17 request: proto: HTTP/1.1 proto_major: 1 @@ -918,7 +1152,7 @@ interactions: - application/json Content-Type: - application/json - url: https://api.braintrust.dev/v1/function/3d00adc8-eb0f-4372-813b-6c945725b4e9/invoke + url: https://api.braintrust.dev/v1/function/c81984ec-a11b-4de4-86bd-6256985f82ab/invoke method: POST response: proto: HTTP/2.0 @@ -931,39 +1165,39 @@ interactions: body: '{"name":"E2E Scorer","score":0,"metadata":{"choice":"incorrect"}}' headers: Age: - - "1" + - "839" Cache-Control: - max-age=604800 Cf-Cache-Status: - DYNAMIC Cf-Ray: - - 9d749ebca8d2d6f4-IAD + - 9d750a9bff6f9c43-IAD Content-Type: - application/json Date: - - Wed, 04 Mar 2026 23:21:57 GMT + - Thu, 05 Mar 2026 00:49:42 GMT Openai-Organization: - braintrust-data Openai-Processing-Ms: - - "123" + - "8900" Openai-Project: - proj_wMRY6YpEiASXMxPIIcI9nQRi Openai-Version: - "2020-10-01" Set-Cookie: - - __cf_bm=nwKAkxilJMNyMbsGSeHHvgExBVDhmXpHmOQtFuxbSX4-1772666515.9499893-1.0.1.1-Bm2sPQHXASrKshvzzuhQF9VZ8DqHVGoiRG5Uwsba3soQHTrFTmeID0Df3SXejLkCx9X2kHZbkZ5hW5Ob_UpAb39cUYzvmm_8gKWM3PgE02q5UXEI1rj5cFBv_Xtvh.uU; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 04 Mar 2026 23:51:56 GMT + - __cf_bm=tx7xmc6tvsqv23DbDHziKzCYiKvZp8RylWd.q7yIwmw-1772670934.3944066-1.0.1.1-zXNNcHGbsyy5UMRIHorhUwgZ6.v52SoXtP5fqMgKVrNtWXSTyuLoQFGFJCIGRFO6HEmBbmsO8yj4bGTr50mkmG8i0yy88QJgjnOj8ZQE3o1HRBHGFTTUzNYOnpFl_USJ; HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Thu, 05 Mar 2026 01:05:43 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload Via: - - 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Cf-Id: - - fo-cSb9v46Gv5OBWZxUhQFjeWpvZYjNL7kXJmw8qHYThSjfyDLk6UA== + - rINawjlY_Qcgcq9unlilEIqT7Iza1JU0i8zcXvPDg02Ixg0JYMn80Q== X-Amz-Cf-Pop: - HIO52-P4 X-Amzn-Requestid: - - 3e2ef1a4-7aa8-468e-b1d6-72e64baad31b + - 1a79acc5-be7b-49c5-acfb-48eeb0b0f63c X-Amzn-Trace-Id: - - Root=1-69a8be95-47030e95164a33e54d201933;Parent=2b922a1e88f3d143;Sampled=0;Lineage=1:8be8f50d:0 + - Root=1-69a8d326-295306367dfb2b0815fc53e4;Parent=61ede213a067d874;Sampled=0;Lineage=1:8be8f50d:0 X-Bt-Cached: - HIT X-Bt-Function-Creds-Cached: @@ -991,11 +1225,11 @@ interactions: X-Ratelimit-Reset-Tokens: - 0s X-Request-Id: - - req_ca7960f0b73541b6a1199f7a214bd13d + - req_a365cda4d32449309ae473b2f6ff3280 status: 200 OK code: 200 - duration: 287.115334ms - - id: 14 + duration: 131.369417ms + - id: 18 request: proto: HTTP/1.1 proto_major: 1 @@ -1011,7 +1245,7 @@ interactions: headers: Accept: - application/json - url: https://api.braintrust.dev/v1/function/3d00adc8-eb0f-4372-813b-6c945725b4e9 + url: https://api.braintrust.dev/v1/function/c81984ec-a11b-4de4-86bd-6256985f82ab method: DELETE response: proto: HTTP/2.0 @@ -1021,7 +1255,7 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-scorer","id":"3d00adc8-eb0f-4372-813b-6c945725b4e9","created":"2026-03-04T23:21:58.096Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_object_delete":true,"_xact_id":"1000196765725936784"}' + body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-scorer","id":"c81984ec-a11b-4de4-86bd-6256985f82ab","created":"2026-03-05T00:49:42.865Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_object_delete":true,"_xact_id":"1000196766070890268"}' headers: Access-Control-Allow-Credentials: - "true" @@ -1030,32 +1264,32 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Wed, 04 Mar 2026 23:21:58 GMT + - Thu, 05 Mar 2026 00:49:43 GMT Etag: - - W/"12f-62LNmt00v79FTwwQkGZZHscWEUY" + - W/"12f-eyOPkORMDSf9v1c8fBMBAQ4+S4Y" Vary: - Origin, Accept-Encoding Via: - - 1.1 a235f5b7b15b9c8025af96fb0081dd58.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 4e7012bff211fc1604763d0935533d32.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuK3fEw7oAMEK8w= + - ZuXuHEm_IAMEf1A= X-Amz-Cf-Id: - - oBRJX3_ixcRTUd09ROH_0vbf3zHntR80qfWi8mHuh2Pjic3Y-F5Szw== + - -IAc9zrisQ6N2qE9islsmtyxSXwsS2Pus4epZzZXH0WNlHnRTYnk3g== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - bd730343-0524-416d-9fbd-aff3324f50ae + - 9132e99a-0d5e-4ea0-9f29-fcde8e2da9c8 X-Amzn-Trace-Id: - - Root=1-69a8be95-5b2ce8473e88bc2d13746c0d;Parent=48f58108cf7adaae;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d326-2ae634ed4a4cd77b6b2bbc4c;Parent=7bf21c0633367639;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 69a8be96000000004a3d019d789a1ee5 + - 69a8d326000000004881e5345c2f29e1 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 407.340458ms - - id: 15 + duration: 461.6655ms + - id: 19 request: proto: HTTP/1.1 proto_major: 1 @@ -1071,7 +1305,7 @@ interactions: headers: Accept: - application/json - url: https://api.braintrust.dev/v1/function/5cee0811-48f0-453a-9103-365d3a3344a8 + url: https://api.braintrust.dev/v1/function/aeb4d39b-f38b-4a20-98d3-6e5adfe64af6 method: DELETE response: proto: HTTP/2.0 @@ -1081,7 +1315,7 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-task","id":"5cee0811-48f0-453a-9103-365d3a3344a8","created":"2026-03-04T23:21:58.518Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_object_delete":true,"_xact_id":"1000196765725937232"}' + body: '{"log_id":"p","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","slug":"TestFunctionsAPI_EndToEnd_MixedTypes-task","id":"aeb4d39b-f38b-4a20-98d3-6e5adfe64af6","created":"2026-03-05T00:49:43.349Z","org_id":"5ba6d482-b475-4c66-8cd2-5815694764e3","_object_delete":true,"_xact_id":"1000196766070956283"}' headers: Access-Control-Allow-Credentials: - "true" @@ -1090,28 +1324,28 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Wed, 04 Mar 2026 23:21:58 GMT + - Thu, 05 Mar 2026 00:49:43 GMT Etag: - - W/"12d-TuqS0Cq/1Ac5KXhG/HAE290GWVg" + - W/"12d-sgKO/VJ7Qubf8mQ3P5Oc8xq9IbM" Vary: - Origin, Accept-Encoding Via: - - 1.1 4e7012bff211fc1604763d0935533d32.cloudfront.net (CloudFront), 1.1 11017c4db22106ac70e16ce75190a430.cloudfront.net (CloudFront) + - 1.1 49798ef4b8dd64fece36e067d09f69ec.cloudfront.net (CloudFront), 1.1 7ad3d6571deff4c3c83d7e4476fcc6d0.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuK3kEuKIAMEikA= + - ZuXuMGLdoAMEXUA= X-Amz-Cf-Id: - - eoDLcs5b3_s9QheqGINw1_IOiN-rB1KBpRhmhMVTAWlfNqPVkjF4gg== + - cZoAR92WxJGaRhMBwtEg03uYFMHqbl-GrmRE8LxwkOawMoWp1UCSww== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - fe176c1b-8a42-4559-9ef4-6f036ad9330f + - 22085cbb-9999-4414-87c2-0fde47ec694a X-Amzn-Trace-Id: - - Root=1-69a8be96-05bf6d0c2ac2df910d310753;Parent=77dff1cb3a95ec19;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d327-6d6fb38815df43ce30dc377e;Parent=1bee60d8d66b9597;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 69a8be9600000000584f4df6018f99fd + - 69a8d32700000000210fcaf1f12ae174 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 481.053292ms + duration: 442.361291ms diff --git a/eval/testdata/cassettes/TestSpans_Integration.yaml b/eval/testdata/cassettes/TestSpans_Integration.yaml index d2e58e6..9112835 100644 --- a/eval/testdata/cassettes/TestSpans_Integration.yaml +++ b/eval/testdata/cassettes/TestSpans_Integration.yaml @@ -38,33 +38,33 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Thu, 05 Mar 2026 00:10:01 GMT + - Thu, 05 Mar 2026 00:50:09 GMT Etag: - W/"fe-rfCZvNlcUhLHZTl4/HHBhOEJFv4" Vary: - Origin, Accept-Encoding Via: - - 1.1 4e7012bff211fc1604763d0935533d32.cloudfront.net (CloudFront), 1.1 3caf9df4ca497afd40efb87f8957a7fa.cloudfront.net (CloudFront) + - 1.1 05cf67c96e96cd376921ba5b65795a56.cloudfront.net (CloudFront), 1.1 41d11417b7470cfb79cc5b299692369a.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuR6FH-7IAMEMKQ= + - ZuXyTFmwIAMEuzQ= X-Amz-Cf-Id: - - g07TENfJU2EYn1m69IOORXaRQbMrYoDBF3eeL76e33l_OTdg-6TuvQ== + - NYo2tTxvnT1Xbbi-EFd0Gj4BGbea2FJ4PPfCa1He5bplt2PReKgeuA== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - 028ea526-fd6c-4e3c-9a74-abc85da09560 + - cd462757-3e29-48a4-a2f4-7bf6cc1f55e9 X-Amzn-Trace-Id: - - Root=1-69a8c9d9-30b886d50dade00f511cba07;Parent=411bf1e494ceef4e;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d341-29c17b58763b99ce076be82c;Parent=4a4bb0124fc8d829;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Found-Existing: - "true" X-Bt-Internal-Trace-Id: - - 69a8c9d90000000071cafbe183512841 + - 69a8d3410000000031a3a70e3e47fd68 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 356.208959ms + duration: 251.65825ms - id: 1 request: proto: HTTP/1.1 @@ -100,31 +100,31 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Thu, 05 Mar 2026 00:10:01 GMT + - Thu, 05 Mar 2026 00:50:09 GMT Etag: - W/"101-EqXzt+vlRFUt5HCzQY7qxp9rZU0" Vary: - Origin, Accept-Encoding Via: - - 1.1 c6aabec83f5c081149a8843767dacc52.cloudfront.net (CloudFront), 1.1 3caf9df4ca497afd40efb87f8957a7fa.cloudfront.net (CloudFront) + - 1.1 6a52d37737133b0b8a09947e5c586ec4.cloudfront.net (CloudFront), 1.1 41d11417b7470cfb79cc5b299692369a.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuR6FE_soAMEF0w= + - ZuXyVET4oAMEYtQ= X-Amz-Cf-Id: - - VEFgHkf2_HoKU1tVnkwK0vGt1NQrakZCCA0yCYWMa0qU0H-CvKnZtQ== + - r1DO6WNeJtDZyPPCupp27LaFhDufAErAwaARkZnfShZmF7iRE30BAA== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - 267d9a25-2507-4505-a0f9-7fb063adf47d + - a406de4e-b6b2-43f0-ba75-d02b6ebd8b96 X-Amzn-Trace-Id: - - Root=1-69a8c9d9-1e3ca805294cae2011c85b13;Parent=6477d7efce5ea41c;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d341-6b51e5791b9f54f473e53a79;Parent=45ea512dbb0d7c7c;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 69a8c9d900000000128f46ab7b71df04 + - 69a8d3410000000004bd74d6d772a7c2 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 405.929209ms + duration: 396.15425ms - id: 2 request: proto: HTTP/1.1 @@ -162,33 +162,33 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Thu, 05 Mar 2026 00:10:02 GMT + - Thu, 05 Mar 2026 00:50:09 GMT Etag: - W/"fe-rfCZvNlcUhLHZTl4/HHBhOEJFv4" Vary: - Origin, Accept-Encoding Via: - - 1.1 eb6e5a827e45274130b33c12b0d48aaa.cloudfront.net (CloudFront), 1.1 3caf9df4ca497afd40efb87f8957a7fa.cloudfront.net (CloudFront) + - 1.1 f36cc119cb86b2f70c315ca53fd1b4ee.cloudfront.net (CloudFront), 1.1 41d11417b7470cfb79cc5b299692369a.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuR6JEejIAMEacw= + - ZuXyVGDroAMEezQ= X-Amz-Cf-Id: - - KE7N82w4HOuOeCQ5P2AwfOa0atQ881eJDFp61p5rb6tjJmFJyVSmjg== + - 6UIuEM9SOkx0pJ_5cykpVwX4urAT6Bp21zzX1JmpWwUZIQ-kEJlPYA== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - e6d13ede-981d-446c-aee8-b0872c5bc1e9 + - a6671dab-832b-4c09-b63c-b55bb83244d1 X-Amzn-Trace-Id: - - Root=1-69a8c9da-3984a653291e44866d249af8;Parent=4a6ed661a29837c4;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d341-3cff57025b1a423c04d2e163;Parent=5e8a7e6172aec3f9;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Found-Existing: - "true" X-Bt-Internal-Trace-Id: - - 69a8c9da0000000019146f2c246507f0 + - 69a8d3410000000017ee2b0340f0378a X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 353.668041ms + duration: 234.860959ms - id: 3 request: proto: HTTP/1.1 @@ -217,7 +217,7 @@ interactions: trailer: {} content_length: -1 uncompressed: true - body: '{"id":"bcd37563-913d-4192-8df4-ad21f7789cc1","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"test-spans","description":null,"created":"2026-03-05T00:10:02.544Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"public":false,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","metadata":null,"tags":null}' + body: '{"id":"1ca73994-ead0-49be-92d4-8f8916477dcd","project_id":"842c2063-b9c7-4c40-9fb9-fa95f810c57f","name":"test-spans-5d390dc0","description":null,"created":"2026-03-05T00:50:10.051Z","repo_info":{},"commit":null,"base_exp_id":null,"deleted_at":null,"dataset_id":null,"dataset_version":null,"public":false,"user_id":"855483c6-68f0-4df4-a147-df9b4ea32e0c","metadata":null,"tags":null}' headers: Access-Control-Allow-Credentials: - "true" @@ -226,28 +226,209 @@ interactions: Content-Type: - application/json; charset=utf-8 Date: - - Thu, 05 Mar 2026 00:10:02 GMT + - Thu, 05 Mar 2026 00:50:10 GMT Etag: - - W/"174-TF3v5bwhAKfv+utVChBaVQS1WcM" + - W/"17d-cG0lelZAcNlJL1NknFljLpci0GY" Vary: - Origin, Accept-Encoding Via: - - 1.1 d220e3f3d93439a8c69225156c6ae800.cloudfront.net (CloudFront), 1.1 3caf9df4ca497afd40efb87f8957a7fa.cloudfront.net (CloudFront) + - 1.1 f36cc119cb86b2f70c315ca53fd1b4ee.cloudfront.net (CloudFront), 1.1 41d11417b7470cfb79cc5b299692369a.cloudfront.net (CloudFront) X-Amz-Apigw-Id: - - ZuR6MEnaIAMERow= + - ZuXyXFguoAMEA6A= X-Amz-Cf-Id: - - AF-BhtVgA2KfHNhg62eKvo0iscKUINaY2wwfvVs0UxF7J48r0MVURg== + - __z4NnnsWyqf6H5FKVIbLt66D2-JdsklpRvUx4lq4xSTfXnDHlrCTg== X-Amz-Cf-Pop: - HIO52-P2 - HIO52-P4 X-Amzn-Requestid: - - e4809472-0925-442d-9e5b-4e42db5eafeb + - e32ed372-3fa8-4432-96c1-fdeb5d934a54 X-Amzn-Trace-Id: - - Root=1-69a8c9da-1d9d3d407c1400fa4a320d9a;Parent=0aacc90c97a54f6b;Sampled=0;Lineage=1:24be3d11:0 + - Root=1-69a8d341-0f93b9c13fddcc1b36ac2295;Parent=139d89c07bf7b4a4;Sampled=0;Lineage=1:24be3d11:0 X-Bt-Internal-Trace-Id: - - 69a8c9da0000000028661bb250455f79 + - 69a8d3410000000053849cd8d26108c9 X-Cache: - Miss from cloudfront status: 200 OK code: 200 - duration: 383.825417ms + duration: 214.578166ms + - id: 4 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 388 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"limit":1000,"filter":{"children":[{"left":{"name":["root_span_id"],"op":"ident"},"op":"eq","right":{"op":"literal","value":"1faf54c2b37cad5df7ba1ece3757b934"}},{"children":[{"expr":{"name":["span_attributes","purpose"],"op":"ident"},"op":"isnull"},{"left":{"name":["span_attributes","purpose"],"op":"ident"},"op":"ne","right":{"op":"literal","value":"scorer"}}],"op":"or"}],"op":"and"}}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/v1/experiment/1ca73994-ead0-49be-92d4-8f8916477dcd/fetch + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"events":[]}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json + Date: + - Thu, 05 Mar 2026 00:50:10 GMT + Vary: + - Origin + Via: + - 1.1 44e3ef26e727fc044d711ef45aefcd72.cloudfront.net (CloudFront), 1.1 41d11417b7470cfb79cc5b299692369a.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuXyZE4GoAMEumw= + X-Amz-Cf-Id: + - eNAmkvqN5X3OGxr5eH3HxYEEcLEWWrBiHdDOqhiiKlUjzDyO6jOb8g== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - 5498c09d-c1a6-4595-aaba-0f76f4ffb01a + X-Amzn-Trace-Id: + - Root=1-69a8d342-0c6fdc4b635f440171233a03;Parent=755a00c73a89c003;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Api-Duration-Ms: + - "159" + X-Bt-Brainstore-Duration-Ms: + - "54" + X-Bt-Internal-Trace-Id: + - 69a8d3420000000006f38e1bc16804fe + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 287.293459ms + - id: 5 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 234 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"global_function":"project_default","function_type":"preprocessor","mode":"json","input":{"trace_ref":{"object_id":"1ca73994-ead0-49be-92d4-8f8916477dcd","object_type":"experiment","root_span_id":"1faf54c2b37cad5df7ba1ece3757b934"}}}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/function/invoke + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: false + body: "null" + headers: + Content-Type: + - application/json + Date: + - Thu, 05 Mar 2026 00:50:11 GMT + Via: + - 1.1 41d11417b7470cfb79cc5b299692369a.cloudfront.net (CloudFront) + X-Amz-Cf-Id: + - 1LvfwvIBefmZIy5-99ct3WL1oCdyR4NUFuaWJ-UMH7u4x9dTU9aZzA== + X-Amz-Cf-Pop: + - HIO52-P4 + X-Amzn-Requestid: + - 9749feee-9c37-4c42-8976-77daa0f5e043 + X-Amzn-Trace-Id: + - Root=1-69a8d342-7e4961f7070b84e71c8dbad1;Parent=45afc57b330e1497;Sampled=0;Lineage=1:8be8f50d:0 + X-Bt-Function-Creds-Cached: + - MISS + X-Bt-Function-Meta-Cached: + - MISS + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 706.176375ms + - id: 6 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 505 + transfer_encoding: [] + trailer: {} + host: api.braintrust.dev + remote_addr: "" + request_uri: "" + body: '{"limit":1000,"filter":{"children":[{"left":{"name":["root_span_id"],"op":"ident"},"op":"eq","right":{"op":"literal","value":"1faf54c2b37cad5df7ba1ece3757b934"}},{"children":[{"expr":{"name":["span_attributes","purpose"],"op":"ident"},"op":"isnull"},{"left":{"name":["span_attributes","purpose"],"op":"ident"},"op":"ne","right":{"op":"literal","value":"scorer"}}],"op":"or"},{"left":{"name":["span_attributes","type"],"op":"ident"},"op":"in","right":{"op":"literal","value":["nonexistent"]}}],"op":"and"}}' + form: {} + headers: + Accept: + - application/json + Content-Type: + - application/json + url: https://api.braintrust.dev/v1/experiment/1ca73994-ead0-49be-92d4-8f8916477dcd/fetch + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + transfer_encoding: [] + trailer: {} + content_length: -1 + uncompressed: true + body: '{"events":[]}' + headers: + Access-Control-Allow-Credentials: + - "true" + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + Content-Type: + - application/json + Date: + - Thu, 05 Mar 2026 00:50:11 GMT + Vary: + - Origin + Via: + - 1.1 f36cc119cb86b2f70c315ca53fd1b4ee.cloudfront.net (CloudFront), 1.1 41d11417b7470cfb79cc5b299692369a.cloudfront.net (CloudFront) + X-Amz-Apigw-Id: + - ZuXyjFyvoAMEK3g= + X-Amz-Cf-Id: + - OgFeXtXEvKJd0XUFKY6G3UPWY3i8fZk4S9cbdHpWBtpgW-4_lYJh9w== + X-Amz-Cf-Pop: + - HIO52-P2 + - HIO52-P4 + X-Amzn-Requestid: + - f3febb2b-e915-40ab-9085-0606c22d687e + X-Amzn-Trace-Id: + - Root=1-69a8d343-2aa8f6773604a56854850deb;Parent=36e358b31427137e;Sampled=0;Lineage=1:24be3d11:0 + X-Bt-Api-Duration-Ms: + - "66" + X-Bt-Brainstore-Duration-Ms: + - "54" + X-Bt-Internal-Trace-Id: + - 69a8d3430000000039aee098bfdc19aa + X-Cache: + - Miss from cloudfront + status: 200 OK + code: 200 + duration: 196.673416ms From beafc20ce0f8ad1bd4234afc704971160e2b732f Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Wed, 4 Mar 2026 17:07:39 -0800 Subject: [PATCH 8/8] more chnages --- api/functions/functions.go | 32 ++++++++-------- eval/eval.go | 7 ++-- eval/eval_test.go | 77 -------------------------------------- internal/auth/auth_test.go | 19 ---------- internal/auth/session.go | 13 ------- 5 files changed, 19 insertions(+), 129 deletions(-) diff --git a/api/functions/functions.go b/api/functions/functions.go index 9a171c2..cfc689f 100644 --- a/api/functions/functions.go +++ b/api/functions/functions.go @@ -114,22 +114,6 @@ func (a *API) InvokeGlobal(ctx context.Context, req InvokeGlobalParams) (any, er return a.invokePath(ctx, "/function/invoke", req) } -// Delete deletes a function by ID. -func (a *API) Delete(ctx context.Context, functionID string) error { - if functionID == "" { - return fmt.Errorf("function ID is required") - } - - path := fmt.Sprintf("/v1/function/%s", functionID) - resp, err := a.client.DELETE(ctx, path) - if err != nil { - return err - } - defer func() { _ = resp.Body.Close() }() - - return nil -} - func (a *API) invokePath(ctx context.Context, path string, req any) (any, error) { resp, err := a.client.POST(ctx, path, req) if err != nil { @@ -160,3 +144,19 @@ func decodeInvokeResponse(body []byte) (any, error) { } return output, nil } + +// Delete deletes a function by ID. +func (a *API) Delete(ctx context.Context, functionID string) error { + if functionID == "" { + return fmt.Errorf("function ID is required") + } + + path := fmt.Sprintf("/v1/function/%s", functionID) + resp, err := a.client.DELETE(ctx, path) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + + return nil +} diff --git a/eval/eval.go b/eval/eval.go index 1c3b6c2..e10c6fc 100644 --- a/eval/eval.go +++ b/eval/eval.go @@ -480,11 +480,10 @@ func (e *eval[I, R]) runCase(ctx context.Context, span oteltrace.Span, c Case[I, } func rootSpanIDFromSpan(span oteltrace.Span) string { - sc := span.SpanContext() - if sc.TraceID().IsValid() { - return sc.TraceID().String() + if span == nil { + return "" } - return sc.SpanID().String() + return span.SpanContext().TraceID().String() } // runTask executes the task function and creates a task span. diff --git a/eval/eval_test.go b/eval/eval_test.go index fe20c13..f6173d3 100644 --- a/eval/eval_test.go +++ b/eval/eval_test.go @@ -2,11 +2,8 @@ package eval import ( "context" - "encoding/json" "errors" "io" - "net/http" - "net/http/httptest" "testing" "github.com/stretchr/testify/assert" @@ -14,11 +11,8 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" - "github.com/braintrustdata/braintrust-sdk-go/api" - "github.com/braintrustdata/braintrust-sdk-go/internal/auth" "github.com/braintrustdata/braintrust-sdk-go/internal/oteltest" "github.com/braintrustdata/braintrust-sdk-go/internal/tests" - "github.com/braintrustdata/braintrust-sdk-go/logger" "github.com/braintrustdata/braintrust-sdk-go/trace" ) @@ -264,77 +258,6 @@ func TestNewEval_DefaultParallelism(t *testing.T) { assert.Equal(t, 1, ute2.eval.goroutines) } -func TestEval_Run_TraceRefUsesRootTraceID(t *testing.T) { - t.Parallel() - - var gotRootSpanID string - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - require.Equal(t, http.MethodPost, r.Method) - require.Equal(t, "/function/invoke", r.URL.Path) - - var body map[string]any - require.NoError(t, json.NewDecoder(r.Body).Decode(&body)) - - input, ok := body["input"].(map[string]any) - require.True(t, ok) - traceRef, ok := input["trace_ref"].(map[string]any) - require.True(t, ok) - gotRootSpanID, _ = traceRef["root_span_id"].(string) - - require.NoError(t, json.NewEncoder(w).Encode([]map[string]any{ - {"role": "user", "content": "hello"}, - })) - })) - defer server.Close() - - tp, _ := oteltest.Setup(t) - tracer := tp.Tracer(t.Name()) - session := auth.NewTestSession( - "test-key", - "org-id", - "org-name", - server.URL, - server.URL, - server.URL, - logger.Discard(), - ) - - cases := NewDataset([]Case[testInput, testOutput]{ - { - Input: testInput{Value: "abc"}, - Expected: testOutput{Result: "output-abc"}, - }, - }) - task := T(func(ctx context.Context, input testInput) (testOutput, error) { - return testOutput{Result: "output-" + input.Value}, nil - }) - scorer := NewScorer("thread", func(ctx context.Context, result TaskResult[testInput, testOutput]) (Scores, error) { - _, _ = result.Thread(ctx) - return S(1), nil - }) - - apiClient := api.NewClient("test-key", api.WithAPIURL(server.URL)) - - e := testNewEval( - session, - tracer, - apiClient, - "exp-123", - "test-exp", - "proj-123", - "test-proj", - cases, - task, - []Scorer[testInput, testOutput]{scorer}, - 1, - ) - - _, err := e.run(context.Background()) - require.NoError(t, err) - require.NotEmpty(t, gotRootSpanID) - assert.Regexp(t, "^[0-9a-f]{32}$", gotRootSpanID, "root_span_id should use trace/root ID format") -} - func TestEval_Run_TaskError(t *testing.T) { t.Parallel() diff --git a/internal/auth/auth_test.go b/internal/auth/auth_test.go index dc36415..0d3cca9 100644 --- a/internal/auth/auth_test.go +++ b/internal/auth/auth_test.go @@ -353,25 +353,6 @@ func TestSession_APIInfo(t *testing.T) { }) } -func TestSession_API(t *testing.T) { - t.Parallel() - - session := NewTestSession( - "test-key-123", - "org-id", - "org-name", - "https://api.braintrust.dev", - "https://www.braintrust.dev", - "https://www.braintrust.dev", - logger.Discard(), - ) - - apiClient := session.API() - require.NotNil(t, apiClient) - require.NotNil(t, apiClient.Functions()) - require.NotNil(t, apiClient.Objects()) -} - // TestSession_OrgName tests that OrgName() returns org name after login func TestSession_OrgName(t *testing.T) { t.Parallel() diff --git a/internal/auth/session.go b/internal/auth/session.go index 7cfbe67..5f7f1f6 100644 --- a/internal/auth/session.go +++ b/internal/auth/session.go @@ -5,7 +5,6 @@ import ( "fmt" "sync" - "github.com/braintrustdata/braintrust-sdk-go/api" "github.com/braintrustdata/braintrust-sdk-go/internal/https" "github.com/braintrustdata/braintrust-sdk-go/logger" ) @@ -126,18 +125,6 @@ func (s *Session) APIInfo() APIInfo { } } -// API returns an API client configured from the current session. -// It uses APIInfo() so it works before login completes and automatically -// picks up server-provided APIURL once available. -func (s *Session) API() *api.API { - apiInfo := s.APIInfo() - return api.NewClient( - apiInfo.APIKey, - api.WithAPIURL(apiInfo.APIURL), - api.WithLogger(s.logger), - ) -} - func (s *Session) getLoginResult() (bool, *loginResult) { s.mu.RLock() defer s.mu.RUnlock()