From 9011fd25e292b45361ab182b562965a8e4123419 Mon Sep 17 00:00:00 2001 From: Joey Ji Date: Fri, 13 Sep 2024 14:07:55 -0700 Subject: [PATCH] Update two columns --- data/leaderboard.csv | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/data/leaderboard.csv b/data/leaderboard.csv index fe8ad8d..7c54c40 100644 --- a/data/leaderboard.csv +++ b/data/leaderboard.csv @@ -1,9 +1,9 @@ Model,Count,End-to-End % Solved,Flag Success Count,Subtask Challenge Count,Subtask-Guided # Solved,Subtask-Guided % Solved,Sum of Subtask Run Scores,Subtasks % Solved,FST Standard,FST Subtask -GPT-4o,40,12.5,5,40,7,17.5,11.8,14,0:11,0:52 -OpenAI o1-preview,40,10,4,40,4,10,18.2,45.6,0:11,0:11 -Claude 3 Opus,40,10,4,40,5,12.5,14.4,17,0:11,0:11 -Claude 3.5 Sonnet,40,17.5,7,40,6,15,17.8,20.6,0:11,0:11 -Mixtral 8x22b Instruct,40,7.5,3,40,2,5,6.1,5.3,0:09,0:07 -Gemini 1.5 Pro,40,7.5,3,40,2,5,4.5,3,0:09,0:06 -Llama 3 70b Chat,40,5,2,40,3,7.5,3.2,4.3,0:09,0:11 -Llama 3.1 405B Instruct,40,7.5,3,40,6,15,8.2,6.7,0:09,0:11 +GPT-4o,40,12.5,5,40,7,17.5,11.8,28.7,0:11,0:52 +OpenAI o1-preview,40,10,4,40,4,10,18.9,46.8,0:11,0:11 +Claude 3 Opus,40,10,4,40,5,12.5,14.4,36.8,0:11,0:11 +Claude 3.5 Sonnet,40,17.5,7,40,6,15,17.8,43.9,0:11,0:11 +Mixtral 8x22b Instruct,40,7.5,3,40,2,5,6.1,15.2,0:09,0:07 +Gemini 1.5 Pro,40,7.5,3,40,2,5,4.5,11.7,0:09,0:06 +Llama 3 70b Chat,40,5,2,40,3,7.5,3.2,8.2,0:09,0:11 +Llama 3.1 405B Instruct,40,7.5,3,40,6,15,8.2,20.5,0:09,0:11