Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# Benchmark runs directories
results
data
bin
*.zip

# Tests
Expand Down
47 changes: 47 additions & 0 deletions assets/ar/QA/ae/NativQAGlobal_Allam_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import json
import re

from llmebench.datasets import NativQAGlobalDataset
from llmebench.models import AzureModel
from llmebench.tasks import MultiNativQATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "ALLaM-7B-Instruct",
"description": "",
"scores": {},
}


def config():
return {
"dataset": NativQAGlobalDataset,
"task": MultiNativQATask,
"model": AzureModel,
"general_args": {"test_split": "uae"},
}


def prompt(input_sample):
# Define the question prompt
question_prompt = f"""
Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words.

Question: {input_sample['question']}

"""

# Define the assistant prompt
assistant_prompt = """
You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information.
"""
return [
{"role": "user", "content": question_prompt},
{"role": "assistant", "content": assistant_prompt},
]

def post_process(response):
content = response["output"].strip()
return content
48 changes: 48 additions & 0 deletions assets/ar/QA/ae/NativQAGlobal_Fanar_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import json
import re

from llmebench.datasets import NativQAGlobalDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import MultiNativQATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "fanar-llama-3-8b-instruct",
"description": "",
"scores": {},
}


def config():
return {
"dataset": NativQAGlobalDataset,
"task": MultiNativQATask,
"model": OpenAIModel,
"general_args": {"test_split": "uae"},
}


def prompt(input_sample):
# Define the question prompt
question_prompt = f"""
Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text.

Question: {input_sample['question']}

"""

# Define the assistant prompt
assistant_prompt = """
You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information.
"""
return [
{"role": "user", "content": question_prompt},
{"role": "assistant", "content": assistant_prompt},
]


def post_process(response):
content = response["choices"][0]["message"]["content"].strip()
return content
65 changes: 65 additions & 0 deletions assets/ar/QA/ae/NativQAGlobal_Gemini_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import json
import re

from llmebench.datasets import NativQAGlobalDataset
from llmebench.models import GeminiModel
from llmebench.tasks import MultiNativQATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Gemini",
"description": "",
"scores": {},
}


def config():
return {
"dataset": NativQAGlobalDataset,
"task": MultiNativQATask,
"model": GeminiModel,
"general_args": {"test_split": "uae"},
}



def prompt(input_sample):
# Define the question prompt
question_prompt = f"""
Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words.

Question: {input_sample['question']}

"""

# Define the assistant prompt
assistant_prompt = """
You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information.
"""

return [
{
"role": "assistant",
"content": assistant_prompt,
},
{
"role": "user",
"content": question_prompt,
},
]

def post_process(response):
content = response[0]["content"]["parts"][0]["text"]
content = content.replace("\n", "").strip()
if "```json" in content:
# content = content.replace("```json", "").replace('```', '').replace("\n}", "}")
# content = content.replace("{\n", "{").replace("\",\n", "\",")

content = re.search(r"```json(.*)```", content).group(1)
return content
# return json.loads(content)["answer"]
# response = json.loads(data)
# answer = response["answer"]
return answer
38 changes: 38 additions & 0 deletions assets/ar/QA/ae/NativQAGlobal_JAIS13b_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import json
import re

from llmebench.datasets import NativQAGlobalDataset
from llmebench.models import AzureModel
from llmebench.tasks import MultiNativQATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Jais-13b-chat",
"description": "",
"scores": {},
}


def config():
return {
"dataset": NativQAGlobalDataset,
"task": MultiNativQATask,
"model": AzureModel,
"general_args": {"test_split": "uae"},
}


def prompt(input_sample):
base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"

return [
{
"role": "user",
"content": base_prompt,
},
]

def post_process(response):
return response["choices"][0]["message"]["content"]
49 changes: 49 additions & 0 deletions assets/ar/QA/bh/NativQAGlobal_Allam_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import json
import re

from llmebench.datasets import NativQAGlobalDataset
from llmebench.models import AzureModel
from llmebench.tasks import MultiNativQATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "ALLaM-7B-Instruct",
"description": "",
"scores": {},
}



def config():
return {
"dataset": NativQAGlobalDataset,
"task": MultiNativQATask,
"model": AzureModel,
"general_args": {"test_split": "bahrain"},
}


def prompt(input_sample):
# Define the question prompt
question_prompt = f"""
Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words.

Question: {input_sample['question']}

"""

# Define the assistant prompt
assistant_prompt = """
You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information.
"""
return [
{"role": "user", "content": question_prompt},
{"role": "assistant", "content": assistant_prompt},
]


def post_process(response):
content = response["output"].strip()
return content
48 changes: 48 additions & 0 deletions assets/ar/QA/bh/NativQAGlobal_Fanar_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import json
import re

from llmebench.datasets import NativQAGlobalDataset
from llmebench.models import OpenAIModel
from llmebench.tasks import MultiNativQATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "fanar-llama-3-8b-instruct",
"description": "",
"scores": {},
}


def config():
return {
"dataset": NativQAGlobalDataset,
"task": MultiNativQATask,
"model": OpenAIModel,
"general_args": {"test_split": "bahrain"},
}


def prompt(input_sample):
# Define the question prompt
question_prompt = f"""
Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text.

Question: {input_sample['question']}

"""

# Define the assistant prompt
assistant_prompt = """
You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information.
"""
return [
{"role": "user", "content": question_prompt},
{"role": "assistant", "content": assistant_prompt},
]


def post_process(response):
content = response["choices"][0]["message"]["content"].strip()
return content
65 changes: 65 additions & 0 deletions assets/ar/QA/bh/NativQAGlobal_Gemini_ZeroShot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import json
import re

from llmebench.datasets import NativQAGlobalDataset
from llmebench.models import GeminiModel
from llmebench.tasks import MultiNativQATask


def metadata():
return {
"author": "Arabic Language Technologies, QCRI, HBKU",
"model": "Gemini",
"description": "",
"scores": {},
}


def config():
return {
"dataset": NativQAGlobalDataset,
"task": MultiNativQATask,
"model": GeminiModel,
"general_args": {"test_split": "bahrain"},
}



def prompt(input_sample):
# Define the question prompt
question_prompt = f"""
Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words.

Question: {input_sample['question']}

"""

# Define the assistant prompt
assistant_prompt = """
You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information.
"""

return [
{
"role": "assistant",
"content": assistant_prompt,
},
{
"role": "user",
"content": question_prompt,
},
]

def post_process(response):
content = response[0]["content"]["parts"][0]["text"]
content = content.replace("\n", "").strip()
if "```json" in content:
# content = content.replace("```json", "").replace('```', '').replace("\n}", "}")
# content = content.replace("{\n", "{").replace("\",\n", "\",")

content = re.search(r"```json(.*)```", content).group(1)
return content
# return json.loads(content)["answer"]
# response = json.loads(data)
# answer = response["answer"]
return answer
Loading