agiresearch · xizhu1022 · Apr 27, 2025 · Apr 3, 2025 · Apr 6, 2025 · Apr 6, 2025
diff --git a/README.md b/README.md
@@ -87,6 +87,11 @@ The AIOS-Agent SDK is designed for agent users and developers, enabling them to
 📝 See [here](https://docs.aios.foundation/getting-started/installation).
 
 Below are some useful commands to use
+- [List available LLMs](./cerebrum/commands/list_available_llms.py)
+    ```bash
+    list-available-llms
+    ```
+
 - [List agents from agenthub](./cerebrum/commands/list_agenthub_agents.py)
     ```bash
     list-agenthub-agents

diff --git a/benchmarks/agents/autogen.py b/benchmarks/agents/autogen.py
diff --git a/benchmarks/agents/pure_llm.py → benchmarks/agents/cot/cot.py b/benchmarks/agents/pure_llm.py → benchmarks/agents/cot/cot.py
@@ -2,7 +2,7 @@
 
 from litellm import completion
 
-class PureLLM:
+class CoT:
     def __init__(self, on_aios: bool = True):
         self.agent_name = "llm"
         self.on_aios = on_aios

diff --git a/benchmarks/agents/interpreter.py b/benchmarks/agents/interpreter.py
diff --git a/benchmarks/agents/metagpt.py b/benchmarks/agents/metagpt.py
diff --git a/benchmarks/agents/nano_manus/agent.py b/benchmarks/agents/nano_manus/agent.py
diff --git a/benchmarks/agents/owl/agent.py b/benchmarks/agents/owl/agent.py
@@ -0,0 +1,7 @@
+
+class OWLAgent:
+    def __init__(self):
+        pass
+
+    def run_gaia(self):
+        pass
diff --git a/benchmarks/agents/owl/role_playing.py b/benchmarks/agents/owl/role_playing.py
diff --git a/benchmarks/experiment_core.py b/benchmarks/experiment_core.py
@@ -1,14 +1,14 @@
 from typing import Any, Callable
 
-from pydantic.v1 import BaseModel
+from pydantic import BaseModel
 from tqdm import tqdm
 
-from .agents.pure_llm import PureLLM
+from .agents.react import ReActAgent
 
 AGENT_TYPE_MAPPING_AIOS = {
-    "swe:llm": PureLLM,
-    "humaneval:llm": PureLLM,
-    "gaia:llm": PureLLM,
+    "swe:react": ReActAgent,
+    "humaneval:react": ReActAgent,
+    "gaia:react": ReActAgent,
 }
 
 

diff --git a/benchmarks/gaia/inference.py b/benchmarks/gaia/inference.py
@@ -10,12 +10,14 @@
 def write_output_func(result_list: List, output_file: str):
     with open(output_file, "w", encoding="utf-8") as file:
         json.dump(result_list, file, ensure_ascii=False, indent=4)
-    logger.log(f"Write results num: {len(result_list)}", level="info")
+    # logger.log(f"Write results num: {len(result_list)}", level="info")
 
 
 def process_one_func(data, meta_data: MetaData):
-    agent: ExperimentAgent = AGENT_TYPE_MAPPING_AIOS[meta_data.agent_type](meta_data.on_aios)
-    result = agent.run_gaia(data["Question"])
+    agent = AGENT_TYPE_MAPPING_AIOS[meta_data.agent_type](meta_data.on_aios)
+
+    # breakpoint()
+    result = agent.run_gaia(**data)
 
     match = re.search(r'FINAL ANSWER: (.+)', result)
     if match:
@@ -43,7 +45,7 @@ def process_one_func(data, meta_data: MetaData):
         dataset=dataset,
         agent_type=agent_type,
         output_file=main_args.output_file,
-        on_aios=main_args.on_aios,
+        on_aios=main_args.on_aios
         # max_num=main_args.max_num,
         # aios_args=vars(global_args),
     )

diff --git a/benchmarks/gaia/run_evaluation.py b/benchmarks/gaia/run_evaluation.py
@@ -7,28 +7,32 @@
 def run_evaluation(input_file: str, output_file: str, data_name: str, split: str):
     dataset = load_dataset(data_name, "2023_all", split=split)
 
-    with open(input_file, "r", encoding="utf-8") as file:
-        predictions = json.load(file)
-
-    right_num = 0
-    error_predictions = []
-    for prediction, data in tqdm(zip(predictions, dataset)):
-        if prediction["result"] == data["Final answer"]:
-            right_num += 1
-        else:
-            error_predictions.append({
-                "task_id": data["task_id"],
-                "error_answer": prediction["result"],
-                "right_answer": data["Final answer"],
-            })
-
-    with open(output_file, "w", encoding="utf-8") as file:
-        json.dump(error_predictions, file, ensure_ascii=False, indent=4)
-
-    print(f"Total num: {len(predictions)} \n"
-               f"             Right num: {right_num} \n"
-               f"             Right Rate: {right_num/len(predictions)}"
-               , level="info")
+    # with open(input_file, "r", encoding="utf-8") as file:
+    #     predictions = json.load(file)
+
+    # right_num = 0
+    # error_predictions = []
+    # for prediction, data in tqdm(zip(predictions, dataset)):
+    #     if prediction["result"] == data["Final answer"]:
+    #         right_num += 1
+    #     else:
+    #         error_predictions.append({
+    #             "task_id": data["task_id"],
+    #             "error_answer": prediction["result"],
+    #             "right_answer": data["Final answer"],
+    #         })
+
+    # with open(output_file, "w", encoding="utf-8") as file:
+    #     json.dump(error_predictions, file, ensure_ascii=False, indent=4)
+
+    for data in tqdm(dataset):
+        answer = data["Final answer"]
+        breakpoint()
+
+    # print(f"Total num: {len(predictions)} \n"
+    #            f"             Right num: {right_num} \n"
+    #            f"             Right Rate: {right_num/len(predictions)}"
+    #            , level="info")
 
 
 if __name__ == '__main__':

diff --git a/benchmarks/gaia/run_exp.sh b/benchmarks/gaia/run_exp.sh
@@ -3,9 +3,16 @@
 python -m benchmarks.gaia.inference \
   --data_name gaia-benchmark/GAIA \
   --split validation \
-  --output_file benchmarks/gaia/llm_eval_prediction.json \
+  --output_file benchmarks/gaia/react_eval_prediction.json \
   --on_aios \
-  --agent_type llm
+  --agent_type react
+
+python -m benchmarks.agents.react \
+  --data_name gaia-benchmark/GAIA \
+  --split validation \
+  --output_file benchmarks/gaia/react_eval_prediction.json \
+  --on_aios \
+  --agent_type react
 
 # Step 2: Run the evaluation script
 # python -m benchmarks.gaia.inference \

diff --git a/benchmarks/swebench/inference.py b/benchmarks/swebench/inference.py
@@ -5,7 +5,7 @@
 
 from datasets import load_dataset
 
-from ..agents.pure_llm import PureLLM
+from ..agents.react import PureLLM
 from ..experiment_core import MetaData, AGENT_TYPE_MAPPING_AIOS, run_inference
 from ..utils import get_parser
 

diff --git a/benchmarks/utils.py b/benchmarks/utils.py
@@ -4,7 +4,7 @@ def get_parser():
     parser = argparse.ArgumentParser()
     parser.add_argument("--agent_type", type=str, default="interpreter")
     parser.add_argument("--data_name", type=str, default="gaia-benchmark/GAIA")
-    parser.add_argument("--split", type=str, default="test")
+    parser.add_argument("--split", type=str, default="validation")
     parser.add_argument("--output_file", type=str, default="prediction.json")
     parser.add_argument("--on_aios", action="store_true")
     parser.add_argument("--max_num", type=int, default=None)

diff --git a/cerebrum/commands/list_available_llms.py b/cerebrum/commands/list_available_llms.py
@@ -0,0 +1,58 @@
+from cerebrum.llm.apis import list_available_llms
+
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+from rich.text import Text
+from rich.box import ROUNDED
+
+import sys
+
+def list_agenthub_agents():
+    console = Console()
+
+    with console.status("[bold green]Listing available LLMs..."):
+        llms = list_available_llms()
+
+    if not llms:
+        console.print(Panel("[bold yellow]No LLMs found", title="LLM List"))
+        return
+
+    # Create a table with row separators and rounded borders
+    table = Table(
+        title="Available Agents in AgentHub",
+        box=ROUNDED,
+        show_header=True,
+        header_style="bold white on blue",
+        show_lines=True,  # This adds horizontal lines between rows
+    )
+
+    # Add columns to the table with adjusted widths
+    table.add_column("Name", style="cyan bold", no_wrap=True)
+    table.add_column("Backend", style="green", width=40, overflow="fold")
+    table.add_column("Hostname", style="blue", no_wrap=True)
+
+    # Add rows to the table
+    for llm in llms:
+        name = llm.get("name", "N/A")
+        backend = llm.get("backend", "N/A")
+        hostname = llm.get("hostname", "N/A")
+
+        table.add_row(name, backend, hostname)
+
+    # Print the table
+    console.print("\n")  # Add some space before the table
+    console.print(table)
+
+    # Print summary
+    summary = Text()
+    summary.append(f"\nTotal LLMs available: ", style="bold")
+    summary.append(f"{len(llms)}", style="bold green")
+    console.print(summary)
+    console.print("\n")  # Add some space after the summary
+
+def main():
+    list_agenthub_agents()
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/cerebrum/example/agents/academic_agent/agent.py b/cerebrum/example/agents/academic_agent/agent.py
@@ -159,7 +159,7 @@ def run(self, task_input):
                     else:
                         selected_tools = None
 
-                    breakpoint()
+                    # breakpoint()
 
                     if action_type == "call_tool":
                         response = llm_call_tool(

diff --git a/cerebrum/example/agents/academic_agent/config.json b/cerebrum/example/agents/academic_agent/config.json
@@ -9,7 +9,7 @@
     ],
     "meta": {
         "author": "example",
-        "version": "1.1.5",
+        "version": "1.1.7",
         "license": "CC0"
     },
     "build": {

diff --git a/cerebrum/example/agents/autogen_demo_agent/config.json b/cerebrum/example/agents/autogen_demo_agent/config.json
@@ -4,7 +4,7 @@
     "tools": [],
     "meta": {
         "author": "autogen",
-        "version": "0.0.3",
+        "version": "0.0.4",
         "license": "CC0"
     },
     "build": {