Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ The AIOS-Agent SDK is designed for agent users and developers, enabling them to
📝 See [here](https://docs.aios.foundation/getting-started/installation).

Below are some useful commands to use
- [List available LLMs](./cerebrum/commands/list_available_llms.py)
```bash
list-available-llms
```

- [List agents from agenthub](./cerebrum/commands/list_agenthub_agents.py)
```bash
list-agenthub-agents
Expand Down
Empty file added benchmarks/agents/autogen.py
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from litellm import completion

class PureLLM:
class CoT:
def __init__(self, on_aios: bool = True):
self.agent_name = "llm"
self.on_aios = on_aios
Expand Down
Empty file.
Empty file added benchmarks/agents/metagpt.py
Empty file.
Empty file.
7 changes: 7 additions & 0 deletions benchmarks/agents/owl/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

class OWLAgent:
def __init__(self):
pass

def run_gaia(self):
pass
1,079 changes: 1,079 additions & 0 deletions benchmarks/agents/owl/role_playing.py

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions benchmarks/experiment_core.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from typing import Any, Callable

from pydantic.v1 import BaseModel
from pydantic import BaseModel
from tqdm import tqdm

from .agents.pure_llm import PureLLM
from .agents.react import ReActAgent

AGENT_TYPE_MAPPING_AIOS = {
"swe:llm": PureLLM,
"humaneval:llm": PureLLM,
"gaia:llm": PureLLM,
"swe:react": ReActAgent,
"humaneval:react": ReActAgent,
"gaia:react": ReActAgent,
}


Expand Down
10 changes: 6 additions & 4 deletions benchmarks/gaia/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
def write_output_func(result_list: List, output_file: str):
with open(output_file, "w", encoding="utf-8") as file:
json.dump(result_list, file, ensure_ascii=False, indent=4)
logger.log(f"Write results num: {len(result_list)}", level="info")
# logger.log(f"Write results num: {len(result_list)}", level="info")


def process_one_func(data, meta_data: MetaData):
agent: ExperimentAgent = AGENT_TYPE_MAPPING_AIOS[meta_data.agent_type](meta_data.on_aios)
result = agent.run_gaia(data["Question"])
agent = AGENT_TYPE_MAPPING_AIOS[meta_data.agent_type](meta_data.on_aios)

# breakpoint()
result = agent.run_gaia(**data)

match = re.search(r'FINAL ANSWER: (.+)', result)
if match:
Expand Down Expand Up @@ -43,7 +45,7 @@ def process_one_func(data, meta_data: MetaData):
dataset=dataset,
agent_type=agent_type,
output_file=main_args.output_file,
on_aios=main_args.on_aios,
on_aios=main_args.on_aios
# max_num=main_args.max_num,
# aios_args=vars(global_args),
)
Expand Down
48 changes: 26 additions & 22 deletions benchmarks/gaia/run_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,32 @@
def run_evaluation(input_file: str, output_file: str, data_name: str, split: str):
dataset = load_dataset(data_name, "2023_all", split=split)

with open(input_file, "r", encoding="utf-8") as file:
predictions = json.load(file)

right_num = 0
error_predictions = []
for prediction, data in tqdm(zip(predictions, dataset)):
if prediction["result"] == data["Final answer"]:
right_num += 1
else:
error_predictions.append({
"task_id": data["task_id"],
"error_answer": prediction["result"],
"right_answer": data["Final answer"],
})

with open(output_file, "w", encoding="utf-8") as file:
json.dump(error_predictions, file, ensure_ascii=False, indent=4)

print(f"Total num: {len(predictions)} \n"
f" Right num: {right_num} \n"
f" Right Rate: {right_num/len(predictions)}"
, level="info")
# with open(input_file, "r", encoding="utf-8") as file:
# predictions = json.load(file)

# right_num = 0
# error_predictions = []
# for prediction, data in tqdm(zip(predictions, dataset)):
# if prediction["result"] == data["Final answer"]:
# right_num += 1
# else:
# error_predictions.append({
# "task_id": data["task_id"],
# "error_answer": prediction["result"],
# "right_answer": data["Final answer"],
# })

# with open(output_file, "w", encoding="utf-8") as file:
# json.dump(error_predictions, file, ensure_ascii=False, indent=4)

for data in tqdm(dataset):
answer = data["Final answer"]
breakpoint()

# print(f"Total num: {len(predictions)} \n"
# f" Right num: {right_num} \n"
# f" Right Rate: {right_num/len(predictions)}"
# , level="info")


if __name__ == '__main__':
Expand Down
11 changes: 9 additions & 2 deletions benchmarks/gaia/run_exp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,16 @@
python -m benchmarks.gaia.inference \
--data_name gaia-benchmark/GAIA \
--split validation \
--output_file benchmarks/gaia/llm_eval_prediction.json \
--output_file benchmarks/gaia/react_eval_prediction.json \
--on_aios \
--agent_type llm
--agent_type react

python -m benchmarks.agents.react \
--data_name gaia-benchmark/GAIA \
--split validation \
--output_file benchmarks/gaia/react_eval_prediction.json \
--on_aios \
--agent_type react

# Step 2: Run the evaluation script
# python -m benchmarks.gaia.inference \
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/swebench/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from datasets import load_dataset

from ..agents.pure_llm import PureLLM
from ..agents.react import PureLLM
from ..experiment_core import MetaData, AGENT_TYPE_MAPPING_AIOS, run_inference
from ..utils import get_parser

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ def get_parser():
parser = argparse.ArgumentParser()
parser.add_argument("--agent_type", type=str, default="interpreter")
parser.add_argument("--data_name", type=str, default="gaia-benchmark/GAIA")
parser.add_argument("--split", type=str, default="test")
parser.add_argument("--split", type=str, default="validation")
parser.add_argument("--output_file", type=str, default="prediction.json")
parser.add_argument("--on_aios", action="store_true")
parser.add_argument("--max_num", type=int, default=None)
Expand Down
58 changes: 58 additions & 0 deletions cerebrum/commands/list_available_llms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from cerebrum.llm.apis import list_available_llms

from rich.console import Console
from rich.table import Table
from rich.panel import Panel
from rich.text import Text
from rich.box import ROUNDED

import sys

def list_agenthub_agents():
console = Console()

with console.status("[bold green]Listing available LLMs..."):
llms = list_available_llms()

if not llms:
console.print(Panel("[bold yellow]No LLMs found", title="LLM List"))
return

# Create a table with row separators and rounded borders
table = Table(
title="Available Agents in AgentHub",
box=ROUNDED,
show_header=True,
header_style="bold white on blue",
show_lines=True, # This adds horizontal lines between rows
)

# Add columns to the table with adjusted widths
table.add_column("Name", style="cyan bold", no_wrap=True)
table.add_column("Backend", style="green", width=40, overflow="fold")
table.add_column("Hostname", style="blue", no_wrap=True)

# Add rows to the table
for llm in llms:
name = llm.get("name", "N/A")
backend = llm.get("backend", "N/A")
hostname = llm.get("hostname", "N/A")

table.add_row(name, backend, hostname)

# Print the table
console.print("\n") # Add some space before the table
console.print(table)

# Print summary
summary = Text()
summary.append(f"\nTotal LLMs available: ", style="bold")
summary.append(f"{len(llms)}", style="bold green")
console.print(summary)
console.print("\n") # Add some space after the summary

def main():
list_agenthub_agents()

if __name__ == "__main__":
sys.exit(main())
2 changes: 1 addition & 1 deletion cerebrum/example/agents/academic_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def run(self, task_input):
else:
selected_tools = None

breakpoint()
# breakpoint()

if action_type == "call_tool":
response = llm_call_tool(
Expand Down
2 changes: 1 addition & 1 deletion cerebrum/example/agents/academic_agent/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
],
"meta": {
"author": "example",
"version": "1.1.5",
"version": "1.1.7",
"license": "CC0"
},
"build": {
Expand Down
2 changes: 1 addition & 1 deletion cerebrum/example/agents/autogen_demo_agent/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"tools": [],
"meta": {
"author": "autogen",
"version": "0.0.3",
"version": "0.0.4",
"license": "CC0"
},
"build": {
Expand Down
Loading