qcri · hunzed · Oct 2, 2025
diff --git a/.gitignore b/.gitignore
@@ -10,6 +10,7 @@
 # Benchmark runs directories
 results
 data
+bin
 *.zip
 
 # Tests

diff --git a/assets/ar/QA/ae/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/ae/NativQAGlobal_Allam_ZeroShot.py
@@ -0,0 +1,47 @@
+import json
+import re
+
+from llmebench.datasets import NativQAGlobalDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import MultiNativQATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "ALLaM-7B-Instruct",
+        "description": "",
+        "scores": {},
+    }
+
+
+def config():
+    return {
+        "dataset": NativQAGlobalDataset,
+        "task": MultiNativQATask,
+        "model": AzureModel,
+        "general_args": {"test_split": "uae"},
+    }
+
+
+def prompt(input_sample):
+    # Define the question prompt
+    question_prompt = f"""
+        Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words.
+
+        Question: {input_sample['question']}
+
+        """
+
+    # Define the assistant prompt
+    assistant_prompt = """
+    You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. 
+    """
+    return [
+        {"role": "user", "content": question_prompt},
+        {"role": "assistant", "content": assistant_prompt},
+    ]
+
+def post_process(response):
+    content = response["output"].strip()
+    return content
diff --git a/assets/ar/QA/ae/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/ae/NativQAGlobal_Fanar_ZeroShot.py
@@ -0,0 +1,48 @@
+import json
+import re
+
+from llmebench.datasets import NativQAGlobalDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultiNativQATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "fanar-llama-3-8b-instruct",
+        "description": "",
+        "scores": {},
+    }
+
+
+def config():
+    return {
+        "dataset": NativQAGlobalDataset,
+        "task": MultiNativQATask,
+        "model": OpenAIModel,
+        "general_args": {"test_split": "uae"},
+    }
+
+
+def prompt(input_sample):
+    # Define the question prompt
+    question_prompt = f"""
+        Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. 
+
+        Question: {input_sample['question']}
+
+        """
+
+    # Define the assistant prompt
+    assistant_prompt = """
+    You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. 
+    """
+    return [
+        {"role": "user", "content": question_prompt},
+        {"role": "assistant", "content": assistant_prompt},
+    ]
+
+
+def post_process(response):
+    content = response["choices"][0]["message"]["content"].strip()
+    return content
diff --git a/assets/ar/QA/ae/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/ae/NativQAGlobal_Gemini_ZeroShot.py
@@ -0,0 +1,65 @@
+import json
+import re
+
+from llmebench.datasets import NativQAGlobalDataset
+from llmebench.models import GeminiModel
+from llmebench.tasks import MultiNativQATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Gemini",
+        "description": "",
+        "scores": {},
+    }
+
+
+def config():
+    return {
+        "dataset": NativQAGlobalDataset,
+        "task": MultiNativQATask,
+        "model": GeminiModel,
+        "general_args": {"test_split": "uae"},
+    }
+
+
+
+def prompt(input_sample):
+    # Define the question prompt
+    question_prompt = f"""
+    Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words.
+
+    Question: {input_sample['question']}
+
+    """
+
+    # Define the assistant prompt
+    assistant_prompt = """
+    You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. 
+    """
+
+    return [
+        {
+            "role": "assistant",
+            "content": assistant_prompt,
+        },
+        {
+            "role": "user",
+            "content": question_prompt,
+        },
+    ]
+
+def post_process(response):
+    content = response[0]["content"]["parts"][0]["text"]
+    content = content.replace("\n", "").strip()
+    if "```json" in content:
+        # content = content.replace("```json", "").replace('```', '').replace("\n}", "}")
+        # content = content.replace("{\n", "{").replace("\",\n", "\",")
+
+        content = re.search(r"```json(.*)```", content).group(1)
+    return content
+    # return json.loads(content)["answer"]
+    # response = json.loads(data)
+    # answer = response["answer"]
+    return answer
diff --git a/assets/ar/QA/ae/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/ae/NativQAGlobal_JAIS13b_ZeroShot.py
@@ -0,0 +1,38 @@
+import json
+import re
+
+from llmebench.datasets import NativQAGlobalDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import MultiNativQATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Jais-13b-chat",
+        "description": "",
+        "scores": {},
+    }
+
+
+def config():
+    return {
+        "dataset": NativQAGlobalDataset,
+        "task": MultiNativQATask,
+        "model": AzureModel,
+        "general_args": {"test_split": "uae"},
+    }
+
+
+def prompt(input_sample):
+    base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:"
+
+    return [
+        {
+            "role": "user",
+            "content": base_prompt,
+        },
+    ]
+
+def post_process(response):
+    return response["choices"][0]["message"]["content"]
diff --git a/assets/ar/QA/bh/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/bh/NativQAGlobal_Allam_ZeroShot.py
@@ -0,0 +1,49 @@
+import json
+import re
+
+from llmebench.datasets import NativQAGlobalDataset
+from llmebench.models import AzureModel
+from llmebench.tasks import MultiNativQATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "ALLaM-7B-Instruct",
+        "description": "",
+        "scores": {},
+    }
+
+
+
+def config():
+    return {
+        "dataset": NativQAGlobalDataset,
+        "task": MultiNativQATask,
+        "model": AzureModel,
+        "general_args": {"test_split": "bahrain"},
+    }
+
+
+def prompt(input_sample):
+    # Define the question prompt
+    question_prompt = f"""
+        Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words.
+
+        Question: {input_sample['question']}
+
+        """
+
+    # Define the assistant prompt
+    assistant_prompt = """
+    You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. 
+    """
+    return [
+        {"role": "user", "content": question_prompt},
+        {"role": "assistant", "content": assistant_prompt},
+    ]
+
+
+def post_process(response):
+    content = response["output"].strip()
+    return content
diff --git a/assets/ar/QA/bh/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/bh/NativQAGlobal_Fanar_ZeroShot.py
@@ -0,0 +1,48 @@
+import json
+import re
+
+from llmebench.datasets import NativQAGlobalDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultiNativQATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "fanar-llama-3-8b-instruct",
+        "description": "",
+        "scores": {},
+    }
+
+
+def config():
+    return {
+        "dataset": NativQAGlobalDataset,
+        "task": MultiNativQATask,
+        "model": OpenAIModel,
+        "general_args": {"test_split": "bahrain"},
+    }
+
+
+def prompt(input_sample):
+    # Define the question prompt
+    question_prompt = f"""
+        Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text.
+
+        Question: {input_sample['question']}
+
+        """
+
+    # Define the assistant prompt
+    assistant_prompt = """
+    You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. 
+    """
+    return [
+        {"role": "user", "content": question_prompt},
+        {"role": "assistant", "content": assistant_prompt},
+    ]
+
+
+def post_process(response):
+    content = response["choices"][0]["message"]["content"].strip()
+    return content
diff --git a/assets/ar/QA/bh/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/bh/NativQAGlobal_Gemini_ZeroShot.py
@@ -0,0 +1,65 @@
+import json
+import re
+
+from llmebench.datasets import NativQAGlobalDataset
+from llmebench.models import GeminiModel
+from llmebench.tasks import MultiNativQATask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "Gemini",
+        "description": "",
+        "scores": {},
+    }
+
+
+def config():
+    return {
+        "dataset": NativQAGlobalDataset,
+        "task": MultiNativQATask,
+        "model": GeminiModel,
+        "general_args": {"test_split": "bahrain"},
+    }
+
+
+
+def prompt(input_sample):
+    # Define the question prompt
+    question_prompt = f"""
+    Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words.
+
+    Question: {input_sample['question']}
+
+    """
+
+    # Define the assistant prompt
+    assistant_prompt = """
+    You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. 
+    """
+
+    return [
+        {
+            "role": "assistant",
+            "content": assistant_prompt,
+        },
+        {
+            "role": "user",
+            "content": question_prompt,
+        },
+    ]
+
+def post_process(response):
+    content = response[0]["content"]["parts"][0]["text"]
+    content = content.replace("\n", "").strip()
+    if "```json" in content:
+        # content = content.replace("```json", "").replace('```', '').replace("\n}", "}")
+        # content = content.replace("{\n", "{").replace("\",\n", "\",")
+
+        content = re.search(r"```json(.*)```", content).group(1)
+    return content
+    # return json.loads(content)["answer"]
+    # response = json.loads(data)
+    # answer = response["answer"]
+    return answer
-Original file line number
+Diff line change
@@ Expand Up / @@ -10,6 +10,7 @@ @@
     # Benchmark runs directories
     results
     data
+    bin
     *.zip
     # Tests
@@ Expand Down @@