@@ -186,9 +186,9 @@ describe("runEvaluator", () => {
186186 undefined ,
187187 ) ;
188188
189- expect ( out . results . every ( ( r ) => Object . keys ( r . scores ) . length === 0 ) ) . toBe (
190- true ,
191- ) ;
189+ expect (
190+ out . results . every ( ( r ) => Object . keys ( r . scores ?? { } ) . length === 0 ) ,
191+ ) . toBe ( true ) ;
192192 } ) ;
193193
194194 describe ( "errorScoreHandler" , ( ) => {
@@ -216,8 +216,8 @@ describe("runEvaluator", () => {
216216 expect (
217217 out . results . every (
218218 ( r ) =>
219- Object . keys ( r . scores ) . length === 3 &&
220- Object . values ( r . scores ) . every ( ( v ) => v === 0 ) ,
219+ Object . keys ( r . scores ?? { } ) . length === 3 &&
220+ Object . values ( r . scores ?? { } ) . every ( ( v ) => v === 0 ) ,
221221 ) ,
222222 ) . toBe ( true ) ;
223223 } ) ;
@@ -245,10 +245,10 @@ describe("runEvaluator", () => {
245245 expect (
246246 out . results . every (
247247 ( r ) =>
248- Object . keys ( r . scores ) . length === 3 &&
249- r . scores . scorer_0 === 0 &&
250- r . scores . scorer_1 === 1 &&
251- r . scores . scorer_2 === 1 ,
248+ Object . keys ( r . scores ?? { } ) . length === 3 &&
249+ r . scores ? .scorer_0 === 0 &&
250+ r . scores ? .scorer_1 === 1 &&
251+ r . scores ? .scorer_2 === 1 ,
252252 ) ,
253253 ) . toBe ( true ) ;
254254 } ) ;
@@ -276,7 +276,7 @@ describe("runEvaluator", () => {
276276 ) ;
277277
278278 expect (
279- out . results . every ( ( r ) => Object . keys ( r . scores ) . length === 0 ) ,
279+ out . results . every ( ( r ) => Object . keys ( r . scores ?? { } ) . length === 0 ) ,
280280 ) . toBe ( true ) ;
281281 } ) ;
282282
@@ -303,8 +303,8 @@ describe("runEvaluator", () => {
303303 expect (
304304 out . results . every (
305305 ( r ) =>
306- Object . keys ( r . scores ) . length === 1 &&
307- r . scores . error_score === 1 ,
306+ Object . keys ( r . scores ?? { } ) . length === 1 &&
307+ r . scores ? .error_score === 1 ,
308308 ) ,
309309 ) . toBe ( true ) ;
310310 } ) ;
@@ -579,13 +579,13 @@ test("Eval with noSendLogs: true runs locally without creating experiment", asyn
579579 expect ( result . results ) . toHaveLength ( 2 ) ;
580580 expect ( result . results [ 0 ] . input ) . toBe ( "hello" ) ;
581581 expect ( result . results [ 0 ] . output ) . toBe ( "hello world" ) ;
582- expect ( result . results [ 0 ] . scores . exact_match ) . toBe ( 1 ) ;
583- expect ( result . results [ 0 ] . scores . simple_scorer ) . toBe ( 0.8 ) ;
582+ expect ( result . results [ 0 ] . scores ? .exact_match ) . toBe ( 1 ) ;
583+ expect ( result . results [ 0 ] . scores ? .simple_scorer ) . toBe ( 0.8 ) ;
584584
585585 expect ( result . results [ 1 ] . input ) . toBe ( "test" ) ;
586586 expect ( result . results [ 1 ] . output ) . toBe ( "test world" ) ;
587- expect ( result . results [ 1 ] . scores . exact_match ) . toBe ( 1 ) ;
588- expect ( result . results [ 1 ] . scores . simple_scorer ) . toBe ( 0.8 ) ;
587+ expect ( result . results [ 1 ] . scores ? .exact_match ) . toBe ( 1 ) ;
588+ expect ( result . results [ 1 ] . scores ? .simple_scorer ) . toBe ( 0.8 ) ;
589589
590590 // Verify it builds a local summary (no experimentUrl means local run)
591591 expect ( result . summary . projectName ) . toBe ( "test-no-logs" ) ;
@@ -660,10 +660,10 @@ test("Eval with returnResults: true collects all results", async () => {
660660 expect ( result . results ) . toHaveLength ( 2 ) ;
661661 expect ( result . results [ 0 ] . input ) . toBe ( "hello" ) ;
662662 expect ( result . results [ 0 ] . output ) . toBe ( "hello world" ) ;
663- expect ( result . results [ 0 ] . scores . exact_match ) . toBe ( 1 ) ;
663+ expect ( result . results [ 0 ] . scores ? .exact_match ) . toBe ( 1 ) ;
664664 expect ( result . results [ 1 ] . input ) . toBe ( "test" ) ;
665665 expect ( result . results [ 1 ] . output ) . toBe ( "test world" ) ;
666- expect ( result . results [ 1 ] . scores . exact_match ) . toBe ( 1 ) ;
666+ expect ( result . results [ 1 ] . scores ? .exact_match ) . toBe ( 1 ) ;
667667
668668 // Summary should also be correct
669669 expect ( result . summary . scores . exact_match . score ) . toBe ( 1 ) ;
@@ -862,7 +862,7 @@ test("scorer spans have purpose='scorer' attribute", async () => {
862862 ) ;
863863
864864 expect ( result . results ) . toHaveLength ( 1 ) ;
865- expect ( result . results [ 0 ] . scores . simple_scorer ) . toBe ( 1 ) ;
865+ expect ( result . results [ 0 ] . scores ? .simple_scorer ) . toBe ( 1 ) ;
866866
867867 await memoryLogger . flush ( ) ;
868868 const logs = await memoryLogger . drain ( ) ;
@@ -1635,6 +1635,7 @@ test("classifier-only evaluator populates classifications field", async () => {
16351635
16361636 expect ( result . results ) . toHaveLength ( 1 ) ;
16371637 const r = result . results [ 0 ] ;
1638+ expect ( r . scores ) . toBeUndefined ( ) ;
16381639 expect ( r . classifications ?. category ) . toEqual ( [
16391640 {
16401641 id : "greeting" ,
@@ -1661,7 +1662,7 @@ test("scorer-only evaluator populates scores field", async () => {
16611662 ) ;
16621663
16631664 expect ( result . results ) . toHaveLength ( 1 ) ;
1664- expect ( result . results [ 0 ] . scores . exact_match ) . toBe ( 1 ) ;
1665+ expect ( result . results [ 0 ] . scores ? .exact_match ) . toBe ( 1 ) ;
16651666 expect ( result . results [ 0 ] . classifications ) . toBeUndefined ( ) ;
16661667} ) ;
16671668
@@ -1713,27 +1714,8 @@ test("mixed evaluator populates both scores and classifications", async () => {
17131714 ) ;
17141715
17151716 expect ( result . results ) . toHaveLength ( 1 ) ;
1716- expect ( result . results [ 0 ] . scores . exact_match ) . toBe ( 1 ) ;
1717+ expect ( result . results [ 0 ] . scores ? .exact_match ) . toBe ( 1 ) ;
17171718 expect ( result . results [ 0 ] . classifications ?. category ) . toEqual ( [
17181719 { id : "greeting" , label : "Greeting" } ,
17191720 ] ) ;
17201721} ) ;
1721-
1722- test ( "malformed classifier output fails clearly" , async ( ) => {
1723- const result = await Eval (
1724- "test-invalid-classifier-output" ,
1725- {
1726- data : [ { input : "hello" } ] ,
1727- task : ( input ) => input ,
1728- classifiers : [ ( ) => ( { } ) as never ] ,
1729- } ,
1730- { noSendLogs : true , returnResults : true } ,
1731- ) ;
1732-
1733- expect ( result . results ) . toHaveLength ( 1 ) ;
1734- expect ( ( result . results [ 0 ] as any ) . metadata ?. classifier_errors ) . toMatchObject ( {
1735- classifier_0 : expect . stringMatching (
1736- / m u s t r e t u r n c l a s s i f i c a t i o n s w i t h a n o n - e m p t y s t r i n g n a m e / ,
1737- ) ,
1738- } ) ;
1739- } ) ;
0 commit comments