diff --git a/.gitignore b/.gitignore index fee44c7f..f16b65f0 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ # Benchmark runs directories results data +bin *.zip # Tests diff --git a/assets/ar/QA/ae/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/ae/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..1eeb1d6b --- /dev/null +++ b/assets/ar/QA/ae/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "uae"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/ar/QA/ae/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/ae/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..9bdb330f --- /dev/null +++ b/assets/ar/QA/ae/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "uae"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/ae/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/ae/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..bbc841c0 --- /dev/null +++ b/assets/ar/QA/ae/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "uae"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/ae/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/ae/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..652a814f --- /dev/null +++ b/assets/ar/QA/ae/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "uae"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/bh/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/bh/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..cd842cd2 --- /dev/null +++ b/assets/ar/QA/bh/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,49 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "bahrain"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/ar/QA/bh/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/bh/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..bc1478c3 --- /dev/null +++ b/assets/ar/QA/bh/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "bahrain"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/bh/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/bh/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..b130947f --- /dev/null +++ b/assets/ar/QA/bh/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "bahrain"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/bh/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/bh/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..8ec7b898 --- /dev/null +++ b/assets/ar/QA/bh/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "bahrain"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/dz/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/dz/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..d9e28ac1 --- /dev/null +++ b/assets/ar/QA/dz/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,49 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "algeria"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/ar/QA/dz/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/dz/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..e4122b83 --- /dev/null +++ b/assets/ar/QA/dz/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "algeria"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/dz/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/dz/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..a0cc1adb --- /dev/null +++ b/assets/ar/QA/dz/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "algeria"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/dz/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/dz/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..92f33d68 --- /dev/null +++ b/assets/ar/QA/dz/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "algeria"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/eg/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/eg/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..783c5f7d --- /dev/null +++ b/assets/ar/QA/eg/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "egypt"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/ar/QA/eg/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/eg/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..4397c50a --- /dev/null +++ b/assets/ar/QA/eg/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "egypt"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/eg/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/eg/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..e2d2ade2 --- /dev/null +++ b/assets/ar/QA/eg/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "egypt"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/eg/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/eg/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..8e758c2e --- /dev/null +++ b/assets/ar/QA/eg/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "egypt"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/iq/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/iq/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..d569e621 --- /dev/null +++ b/assets/ar/QA/iq/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,49 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "iraq"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content + diff --git a/assets/ar/QA/iq/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/iq/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..88c166d8 --- /dev/null +++ b/assets/ar/QA/iq/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "iraq"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/iq/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/iq/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..ad48052b --- /dev/null +++ b/assets/ar/QA/iq/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "iraq"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/iq/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/iq/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..6a5a99e0 --- /dev/null +++ b/assets/ar/QA/iq/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "iraq"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/kw/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/kw/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..9a3ce7aa --- /dev/null +++ b/assets/ar/QA/kw/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "kuwait"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/ar/QA/kw/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/kw/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..0438fc7f --- /dev/null +++ b/assets/ar/QA/kw/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "kuwait"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/kw/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/kw/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..28b83e74 --- /dev/null +++ b/assets/ar/QA/kw/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "kuwait"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/kw/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/kw/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..8fb329a8 --- /dev/null +++ b/assets/ar/QA/kw/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "kuwait"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/lb/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/lb/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..4aaa3987 --- /dev/null +++ b/assets/ar/QA/lb/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,49 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "lebanon"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content + diff --git a/assets/ar/QA/lb/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/lb/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..0e6a9e30 --- /dev/null +++ b/assets/ar/QA/lb/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "lebanon"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/lb/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/lb/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..58b5efaf --- /dev/null +++ b/assets/ar/QA/lb/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "lebanon"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/lb/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/lb/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..fb517f13 --- /dev/null +++ b/assets/ar/QA/lb/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "lebanon"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/ly/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/ly/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..2f051433 --- /dev/null +++ b/assets/ar/QA/ly/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,49 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "libya"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content + diff --git a/assets/ar/QA/ly/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/ly/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..0797923c --- /dev/null +++ b/assets/ar/QA/ly/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,49 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "libya"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/ly/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/ly/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..0ea73da8 --- /dev/null +++ b/assets/ar/QA/ly/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "libya"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/ly/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/ly/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..35616183 --- /dev/null +++ b/assets/ar/QA/ly/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "libya"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/ma/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/ma/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..48d324c4 --- /dev/null +++ b/assets/ar/QA/ma/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "morocco"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content + diff --git a/assets/ar/QA/ma/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/ma/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..721e8ce3 --- /dev/null +++ b/assets/ar/QA/ma/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "morocco"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/ma/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/ma/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..d08e177c --- /dev/null +++ b/assets/ar/QA/ma/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "morocco"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/ma/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/ma/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..5b69d4de --- /dev/null +++ b/assets/ar/QA/ma/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "morocco"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/mr/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/mr/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..8622547d --- /dev/null +++ b/assets/ar/QA/mr/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,46 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "mauritania"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content + diff --git a/assets/ar/QA/mr/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/mr/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..abeb5cfa --- /dev/null +++ b/assets/ar/QA/mr/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "mauritania"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/mr/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/mr/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..ad4ca3d3 --- /dev/null +++ b/assets/ar/QA/mr/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "mauritania"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/mr/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/mr/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..e8511332 --- /dev/null +++ b/assets/ar/QA/mr/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "mauritania"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/om/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/om/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..fb71890f --- /dev/null +++ b/assets/ar/QA/om/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "oman"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content + diff --git a/assets/ar/QA/om/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/om/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..26fa6013 --- /dev/null +++ b/assets/ar/QA/om/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "oman"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/om/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/om/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..2d6f2e52 --- /dev/null +++ b/assets/ar/QA/om/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "oman"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/om/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/om/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..51c22d5c --- /dev/null +++ b/assets/ar/QA/om/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "oman"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/ps/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/ps/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..f04f7e81 --- /dev/null +++ b/assets/ar/QA/ps/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "palestine"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/ar/QA/ps/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/ps/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..4fbe59ed --- /dev/null +++ b/assets/ar/QA/ps/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "palestine"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/ps/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/ps/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..04242ce6 --- /dev/null +++ b/assets/ar/QA/ps/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "palestine"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/ps/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/ps/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..b39cbe1d --- /dev/null +++ b/assets/ar/QA/ps/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "palestine"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/sa/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/sa/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..8b4df6cd --- /dev/null +++ b/assets/ar/QA/sa/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "saudi_arabia"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content + + diff --git a/assets/ar/QA/sa/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/sa/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..00257240 --- /dev/null +++ b/assets/ar/QA/sa/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "saudi_arabia"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/sa/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/sa/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..ce4cc419 --- /dev/null +++ b/assets/ar/QA/sa/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "saudi_arabia"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/sa/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/sa/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..61429d50 --- /dev/null +++ b/assets/ar/QA/sa/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "saudi_arabia"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/sd/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/sd/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..e260ca92 --- /dev/null +++ b/assets/ar/QA/sd/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "sudan"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/ar/QA/sd/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/sd/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..f385733f --- /dev/null +++ b/assets/ar/QA/sd/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "sudan"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/sd/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/sd/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..e0f3ef03 --- /dev/null +++ b/assets/ar/QA/sd/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "sudan"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/sd/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/sd/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..4beb201b --- /dev/null +++ b/assets/ar/QA/sd/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "sudan"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/sy/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/sy/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..e27dae77 --- /dev/null +++ b/assets/ar/QA/sy/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "syria"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/ar/QA/sy/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/sy/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..2175a6bc --- /dev/null +++ b/assets/ar/QA/sy/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "syria"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/sy/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/sy/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..9aa5407e --- /dev/null +++ b/assets/ar/QA/sy/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "syria"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/sy/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/sy/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..cefca3d3 --- /dev/null +++ b/assets/ar/QA/sy/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "syria"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/tn/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/tn/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..6a49b6d7 --- /dev/null +++ b/assets/ar/QA/tn/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "tunisia"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/ar/QA/tn/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/tn/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..6a31a996 --- /dev/null +++ b/assets/ar/QA/tn/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "tunisia"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/tn/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/tn/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..03008f59 --- /dev/null +++ b/assets/ar/QA/tn/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "tunisia"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/tn/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/tn/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..e3d6d464 --- /dev/null +++ b/assets/ar/QA/tn/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "tunisia"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/QA/ye/NativQAGlobal_Allam_ZeroShot.py b/assets/ar/QA/ye/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..4d7592fb --- /dev/null +++ b/assets/ar/QA/ye/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "yemen"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/ar/QA/ye/NativQAGlobal_Fanar_ZeroShot.py b/assets/ar/QA/ye/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..910073e5 --- /dev/null +++ b/assets/ar/QA/ye/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "yemen"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/ar/QA/ye/NativQAGlobal_Gemini_ZeroShot.py b/assets/ar/QA/ye/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..98fc97cf --- /dev/null +++ b/assets/ar/QA/ye/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "yemen"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following Arabic question. Answer in Arabic. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an Arabic AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/ar/QA/ye/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/ar/QA/ye/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..23fcbf7b --- /dev/null +++ b/assets/ar/QA/ye/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "yemen"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/ar/demographic_attributes/gender/ArabGend_Jais13b_ZeroShot.py b/assets/ar/demographic_attributes/gender/ArabGend_Jais13b_ZeroShot.py index 56a74db5..30e92e1d 100644 --- a/assets/ar/demographic_attributes/gender/ArabGend_Jais13b_ZeroShot.py +++ b/assets/ar/demographic_attributes/gender/ArabGend_Jais13b_ZeroShot.py @@ -6,9 +6,8 @@ def metadata(): return { "author": "Arabic Language Technologies, QCRI, HBKU", - "model": "Jais-13b", + "model": "Jais-13b-chat", "description": "Locally hosted Jais-13b-chat model using FastChat.", - "scores": {"Macro-F1": "0.674"}, } diff --git a/assets/ar/demographic_attributes/gender/ArapTweet_JAIS13b_ZeroShot.py b/assets/ar/demographic_attributes/gender/ArapTweet_JAIS13b_ZeroShot.py index b72e0add..2e49be90 100644 --- a/assets/ar/demographic_attributes/gender/ArapTweet_JAIS13b_ZeroShot.py +++ b/assets/ar/demographic_attributes/gender/ArapTweet_JAIS13b_ZeroShot.py @@ -6,8 +6,9 @@ def metadata(): return { "author": "Arabic Language Technologies, QCRI, HBKU", - "model": "Jais-13b-chat", + "model": "Jais-13b", "description": "Locally hosted Jais-13b-chat model using FastChat.", + "scores": {"Macro-F1": ""}, } @@ -40,7 +41,7 @@ def prompt(input_sample): def post_process(response): label = response["choices"][0]["message"]["content"] - label = label.strip() + # label = label.replace("gender:", "").strip() if "gender: Female" in label or "\nFemale" in label or label == "Female": label = "Female" elif ( diff --git a/assets/ar/news_categorization/ASND_JAIS13b_ZeroShot.py b/assets/ar/news_categorization/ASND_JAIS13b_ZeroShot.py index c37e404d..c4dee7e6 100644 --- a/assets/ar/news_categorization/ASND_JAIS13b_ZeroShot.py +++ b/assets/ar/news_categorization/ASND_JAIS13b_ZeroShot.py @@ -8,6 +8,7 @@ def metadata(): "author": "Arabic Language Technologies, QCRI, HBKU", "model": "Jais-13b-chat", "description": "Locally hosted Jais-13b-chat model using FastChat.", + "scores": {"Macro-F1": "0.156"}, } diff --git a/assets/ar/news_categorization/SANADAkhbarona_JAIS13b_ZeroShot.py b/assets/ar/news_categorization/SANADAkhbarona_JAIS13b_ZeroShot.py index 4117f02c..383d9a19 100644 --- a/assets/ar/news_categorization/SANADAkhbarona_JAIS13b_ZeroShot.py +++ b/assets/ar/news_categorization/SANADAkhbarona_JAIS13b_ZeroShot.py @@ -13,7 +13,6 @@ def metadata(): "author": "Arabic Language Technologies, QCRI, HBKU", "model": "Jais-13b-chat", "description": "Locally hosted Jais-13b-chat model using FastChat.", - "scores": {"Accuracy": "0.187"}, } diff --git a/assets/ar/news_categorization/SANADAlArabiya_JAIS13b_ZeroShot.py b/assets/ar/news_categorization/SANADAlArabiya_JAIS13b_ZeroShot.py index 13af383d..68f18960 100644 --- a/assets/ar/news_categorization/SANADAlArabiya_JAIS13b_ZeroShot.py +++ b/assets/ar/news_categorization/SANADAlArabiya_JAIS13b_ZeroShot.py @@ -13,7 +13,6 @@ def metadata(): "author": "Arabic Language Technologies, QCRI, HBKU", "model": "Jais-13b-chat", "description": "Locally hosted Jais-13b-chat model using FastChat.", - "scores": {"Accuracy": "0.287"}, } diff --git a/assets/ar/news_categorization/SANADAlKhaleej_JAIS13b_ZeroShot.py b/assets/ar/news_categorization/SANADAlKhaleej_JAIS13b_ZeroShot.py index 0468b7c8..1e167ffa 100644 --- a/assets/ar/news_categorization/SANADAlKhaleej_JAIS13b_ZeroShot.py +++ b/assets/ar/news_categorization/SANADAlKhaleej_JAIS13b_ZeroShot.py @@ -13,7 +13,6 @@ def metadata(): "author": "Arabic Language Technologies, QCRI, HBKU", "model": "Jais-13b-chat", "description": "Locally hosted Jais-13b-chat model using FastChat.", - "scores": {"Accuracy": "0.229"}, } diff --git a/assets/ar/semantics/STS/Q2QSim_JAIS13b_ZeroShot.py b/assets/ar/semantics/STS/Q2QSim_JAIS13b_ZeroShot.py index a858e130..0a06b019 100644 --- a/assets/ar/semantics/STS/Q2QSim_JAIS13b_ZeroShot.py +++ b/assets/ar/semantics/STS/Q2QSim_JAIS13b_ZeroShot.py @@ -8,7 +8,6 @@ def metadata(): "author": "Arabic Language Technologies, QCRI, HBKU", "model": "Jais-13b-chat", "description": "Locally hosted Jais-13b-chat model using FastChat.", - "scores": {"Micro-F1": "0.773"}, } diff --git a/assets/en/QA/ca/NativQAGlobal_ON_Allam_ZeroShot.py b/assets/en/QA/ca/NativQAGlobal_ON_Allam_ZeroShot.py new file mode 100644 index 00000000..09ee5bf4 --- /dev/null +++ b/assets/en/QA/ca/NativQAGlobal_ON_Allam_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "ontario"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/en/QA/ca/NativQAGlobal_ON_Fanar_ZeroShot.py b/assets/en/QA/ca/NativQAGlobal_ON_Fanar_ZeroShot.py new file mode 100644 index 00000000..f0fa2e49 --- /dev/null +++ b/assets/en/QA/ca/NativQAGlobal_ON_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "ontario"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/ca/NativQAGlobal_ON_Gemini_ZeroShot.py b/assets/en/QA/ca/NativQAGlobal_ON_Gemini_ZeroShot.py new file mode 100644 index 00000000..04feb513 --- /dev/null +++ b/assets/en/QA/ca/NativQAGlobal_ON_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "ontario"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/ca/NativQAGlobal_ON_JAIS13b_ZeroShot.py b/assets/en/QA/ca/NativQAGlobal_ON_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..c5b418c0 --- /dev/null +++ b/assets/en/QA/ca/NativQAGlobal_ON_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "ontario"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in English based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/en/QA/ca/NativQAGlobal_QC_Allam_ZeroShot.py b/assets/en/QA/ca/NativQAGlobal_QC_Allam_ZeroShot.py new file mode 100644 index 00000000..e3df172c --- /dev/null +++ b/assets/en/QA/ca/NativQAGlobal_QC_Allam_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "quebec"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/en/QA/ca/NativQAGlobal_QC_Fanar_ZeroShot.py b/assets/en/QA/ca/NativQAGlobal_QC_Fanar_ZeroShot.py new file mode 100644 index 00000000..1fdc6a07 --- /dev/null +++ b/assets/en/QA/ca/NativQAGlobal_QC_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "quebec"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/ca/NativQAGlobal_QC_Gemini_ZeroShot.py b/assets/en/QA/ca/NativQAGlobal_QC_Gemini_ZeroShot.py new file mode 100644 index 00000000..389b883f --- /dev/null +++ b/assets/en/QA/ca/NativQAGlobal_QC_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "quebec"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/ca/NativQAGlobal_QC_JAIS13b_ZeroShot.py b/assets/en/QA/ca/NativQAGlobal_QC_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..b120d5a8 --- /dev/null +++ b/assets/en/QA/ca/NativQAGlobal_QC_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "quebec"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in English based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/en/QA/us/ca/NativQAGlobal_Allam_ZeroShot.py b/assets/en/QA/us/ca/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..4c47229d --- /dev/null +++ b/assets/en/QA/us/ca/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "california"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/en/QA/us/ca/NativQAGlobal_Fanar_ZeroShot.py b/assets/en/QA/us/ca/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..869481ce --- /dev/null +++ b/assets/en/QA/us/ca/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "california"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/us/ca/NativQAGlobal_Gemini_ZeroShot.py b/assets/en/QA/us/ca/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..3ff73de8 --- /dev/null +++ b/assets/en/QA/us/ca/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "california"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/us/ca/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/en/QA/us/ca/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..ebb2a52f --- /dev/null +++ b/assets/en/QA/us/ca/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "california"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in Arabic based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/en/QA/us/fl/NativQAGlobal_Allam_ZeroShot.py b/assets/en/QA/us/fl/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..c5445beb --- /dev/null +++ b/assets/en/QA/us/fl/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "florida"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/en/QA/us/fl/NativQAGlobal_Fanar_ZeroShot.py b/assets/en/QA/us/fl/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..f8d0e167 --- /dev/null +++ b/assets/en/QA/us/fl/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "florida"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/us/fl/NativQAGlobal_Gemini_ZeroShot.py b/assets/en/QA/us/fl/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..3808d26c --- /dev/null +++ b/assets/en/QA/us/fl/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "florida"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/us/fl/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/en/QA/us/fl/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..83a7add7 --- /dev/null +++ b/assets/en/QA/us/fl/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "florida"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in English based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/en/QA/us/ge/NativQAGlobal_Allam_ZeroShot.py b/assets/en/QA/us/ge/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..487ec68e --- /dev/null +++ b/assets/en/QA/us/ge/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,46 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "georgia"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/en/QA/us/ge/NativQAGlobal_Fanar_ZeroShot.py b/assets/en/QA/us/ge/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..01ce6c57 --- /dev/null +++ b/assets/en/QA/us/ge/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "georgia"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/us/ge/NativQAGlobal_Gemini_ZeroShot.py b/assets/en/QA/us/ge/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..6407eb8a --- /dev/null +++ b/assets/en/QA/us/ge/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "georgia"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/us/ge/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/en/QA/us/ge/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..bae38e21 --- /dev/null +++ b/assets/en/QA/us/ge/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "georgia"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in English based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/en/QA/us/hi/NativQAGlobal_Allam_ZeroShot.py b/assets/en/QA/us/hi/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..9f15af5b --- /dev/null +++ b/assets/en/QA/us/hi/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "hawaii"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/en/QA/us/hi/NativQAGlobal_Fanar_ZeroShot.py b/assets/en/QA/us/hi/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..b74644ad --- /dev/null +++ b/assets/en/QA/us/hi/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "hawaii"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/us/hi/NativQAGlobal_Gemini_ZeroShot.py b/assets/en/QA/us/hi/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..f4b24f1c --- /dev/null +++ b/assets/en/QA/us/hi/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "hawaii"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/us/hi/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/en/QA/us/hi/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..691c6762 --- /dev/null +++ b/assets/en/QA/us/hi/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "hawaii"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in English based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/en/QA/us/il/NativQAGlobal_Allam_ZeroShot.py b/assets/en/QA/us/il/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..6efaa984 --- /dev/null +++ b/assets/en/QA/us/il/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "illinois"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/en/QA/us/il/NativQAGlobal_Fanar_ZeroShot.py b/assets/en/QA/us/il/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..c5ac17ef --- /dev/null +++ b/assets/en/QA/us/il/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "illinois"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/us/il/NativQAGlobal_Gemini_ZeroShot.py b/assets/en/QA/us/il/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..a18134d9 --- /dev/null +++ b/assets/en/QA/us/il/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "illinois"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/us/il/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/en/QA/us/il/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..7ad15647 --- /dev/null +++ b/assets/en/QA/us/il/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "illinois"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in English based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/en/QA/us/ma/NativQAGlobal_Allam_ZeroShot.py b/assets/en/QA/us/ma/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..4071bd98 --- /dev/null +++ b/assets/en/QA/us/ma/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "massachusetts"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/en/QA/us/ma/NativQAGlobal_Fanar_ZeroShot.py b/assets/en/QA/us/ma/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..23e42d86 --- /dev/null +++ b/assets/en/QA/us/ma/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "massachusetts"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/us/ma/NativQAGlobal_Gemini_ZeroShot.py b/assets/en/QA/us/ma/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..abbdd3b8 --- /dev/null +++ b/assets/en/QA/us/ma/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "massachusetts"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/us/ma/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/en/QA/us/ma/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..7e82f212 --- /dev/null +++ b/assets/en/QA/us/ma/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "massachusetts"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in English based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/en/QA/us/mi/NativQAGlobal_Allam_ZeroShot.py b/assets/en/QA/us/mi/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..4a16af55 --- /dev/null +++ b/assets/en/QA/us/mi/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "michigan"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content \ No newline at end of file diff --git a/assets/en/QA/us/mi/NativQAGlobal_Fanar_ZeroShot.py b/assets/en/QA/us/mi/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..cb68b490 --- /dev/null +++ b/assets/en/QA/us/mi/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "michigan"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/us/mi/NativQAGlobal_Gemini_ZeroShot.py b/assets/en/QA/us/mi/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..3ecefc60 --- /dev/null +++ b/assets/en/QA/us/mi/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "michigan"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/us/mi/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/en/QA/us/mi/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..0fff4dd5 --- /dev/null +++ b/assets/en/QA/us/mi/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "michigan"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in English based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/en/QA/us/nc/NativQAGlobal_Allam_ZeroShot.py b/assets/en/QA/us/nc/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..03c3500a --- /dev/null +++ b/assets/en/QA/us/nc/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "north_carolina"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content \ No newline at end of file diff --git a/assets/en/QA/us/nc/NativQAGlobal_Fanar_ZeroShot.py b/assets/en/QA/us/nc/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..691ef189 --- /dev/null +++ b/assets/en/QA/us/nc/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "north_carolina"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/us/nc/NativQAGlobal_Gemini_ZeroShot.py b/assets/en/QA/us/nc/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..fc87e292 --- /dev/null +++ b/assets/en/QA/us/nc/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "north_carolina"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/us/nc/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/en/QA/us/nc/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..01dc59cc --- /dev/null +++ b/assets/en/QA/us/nc/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "north_carolina"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in English based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/en/QA/us/ny/NativQAGlobal_Allam_ZeroShot.py b/assets/en/QA/us/ny/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..429b8e33 --- /dev/null +++ b/assets/en/QA/us/ny/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "new_york"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content diff --git a/assets/en/QA/us/ny/NativQAGlobal_Fanar_ZeroShot.py b/assets/en/QA/us/ny/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..5d559baf --- /dev/null +++ b/assets/en/QA/us/ny/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "new_york"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/us/ny/NativQAGlobal_Gemini_ZeroShot.py b/assets/en/QA/us/ny/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..0a8ce5fa --- /dev/null +++ b/assets/en/QA/us/ny/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "new_york"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/us/oh/NativQAGlobal_Allam_ZeroShot.py b/assets/en/QA/us/oh/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..43f5e372 --- /dev/null +++ b/assets/en/QA/us/oh/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "ohio"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content \ No newline at end of file diff --git a/assets/en/QA/us/oh/NativQAGlobal_Fanar_ZeroShot.py b/assets/en/QA/us/oh/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..6ae28836 --- /dev/null +++ b/assets/en/QA/us/oh/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "ohio"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/us/oh/NativQAGlobal_Gemini_ZeroShot.py b/assets/en/QA/us/oh/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..2b1dffa4 --- /dev/null +++ b/assets/en/QA/us/oh/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "ohio"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/us/oh/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/en/QA/us/oh/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..8e98a732 --- /dev/null +++ b/assets/en/QA/us/oh/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "ohio"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in English based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/en/QA/us/pa/NativQAGlobal_Allam_ZeroShot.py b/assets/en/QA/us/pa/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..42b11a03 --- /dev/null +++ b/assets/en/QA/us/pa/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "pennsylvania"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["output"].strip() + return content \ No newline at end of file diff --git a/assets/en/QA/us/pa/NativQAGlobal_Fanar_ZeroShot.py b/assets/en/QA/us/pa/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..358ef0b1 --- /dev/null +++ b/assets/en/QA/us/pa/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "pennsylvania"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/us/pa/NativQAGlobal_Gemini_ZeroShot.py b/assets/en/QA/us/pa/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..b6c159b1 --- /dev/null +++ b/assets/en/QA/us/pa/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "pennsylvania"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/us/pa/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/en/QA/us/pa/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..81e8b893 --- /dev/null +++ b/assets/en/QA/us/pa/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "pennsylvania"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in English based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/en/QA/us/tx/NativQAGlobal_Allam_ZeroShot.py b/assets/en/QA/us/tx/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..c6c74e37 --- /dev/null +++ b/assets/en/QA/us/tx/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "texas"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content \ No newline at end of file diff --git a/assets/en/QA/us/tx/NativQAGlobal_Fanar_ZeroShot.py b/assets/en/QA/us/tx/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..31e0ccd3 --- /dev/null +++ b/assets/en/QA/us/tx/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "texas"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/us/tx/NativQAGlobal_Gemini_ZeroShot.py b/assets/en/QA/us/tx/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..dab648dd --- /dev/null +++ b/assets/en/QA/us/tx/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "texas"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/us/tx/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/en/QA/us/tx/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..698dc587 --- /dev/null +++ b/assets/en/QA/us/tx/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "texas"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in English based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/assets/en/QA/us/wa/NativQAGlobal_Allam_ZeroShot.py b/assets/en/QA/us/wa/NativQAGlobal_Allam_ZeroShot.py new file mode 100644 index 00000000..f5270f00 --- /dev/null +++ b/assets/en/QA/us/wa/NativQAGlobal_Allam_ZeroShot.py @@ -0,0 +1,47 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "ALLaM-7B-Instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "washington"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + +def post_process(response): + content = response["output"].strip() + return content \ No newline at end of file diff --git a/assets/en/QA/us/wa/NativQAGlobal_Fanar_ZeroShot.py b/assets/en/QA/us/wa/NativQAGlobal_Fanar_ZeroShot.py new file mode 100644 index 00000000..14347cd2 --- /dev/null +++ b/assets/en/QA/us/wa/NativQAGlobal_Fanar_ZeroShot.py @@ -0,0 +1,48 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import OpenAIModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "fanar-llama-3-8b-instruct", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": OpenAIModel, + "general_args": {"test_split": "washington"}, + } + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + return [ + {"role": "user", "content": question_prompt}, + {"role": "assistant", "content": assistant_prompt}, + ] + + +def post_process(response): + content = response["choices"][0]["message"]["content"].strip() + return content diff --git a/assets/en/QA/us/wa/NativQAGlobal_Gemini_ZeroShot.py b/assets/en/QA/us/wa/NativQAGlobal_Gemini_ZeroShot.py new file mode 100644 index 00000000..70686ac6 --- /dev/null +++ b/assets/en/QA/us/wa/NativQAGlobal_Gemini_ZeroShot.py @@ -0,0 +1,65 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import GeminiModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Gemini", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": GeminiModel, + "general_args": {"test_split": "washington"}, + } + + + +def prompt(input_sample): + # Define the question prompt + question_prompt = f""" + Please use your expertise to answer the following English question. Answer in English. Please provide Answer only. No additional text. Answer should be limited to less or equal to {input_sample['length']} words. + + Question: {input_sample['question']} + + """ + + # Define the assistant prompt + assistant_prompt = """ + You are an English AI assistant specialized in providing detailed and accurate answers across various fields. Your task is to deliver clear, concise, and relevant information. + """ + + return [ + { + "role": "assistant", + "content": assistant_prompt, + }, + { + "role": "user", + "content": question_prompt, + }, + ] + +def post_process(response): + content = response[0]["content"]["parts"][0]["text"] + content = content.replace("\n", "").strip() + if "```json" in content: + # content = content.replace("```json", "").replace('```', '').replace("\n}", "}") + # content = content.replace("{\n", "{").replace("\",\n", "\",") + + content = re.search(r"```json(.*)```", content).group(1) + return content + # return json.loads(content)["answer"] + # response = json.loads(data) + # answer = response["answer"] + return answer diff --git a/assets/en/QA/us/wa/NativQAGlobal_JAIS13b_ZeroShot.py b/assets/en/QA/us/wa/NativQAGlobal_JAIS13b_ZeroShot.py new file mode 100644 index 00000000..5b5f9466 --- /dev/null +++ b/assets/en/QA/us/wa/NativQAGlobal_JAIS13b_ZeroShot.py @@ -0,0 +1,38 @@ +import json +import re + +from llmebench.datasets import NativQAGlobalDataset +from llmebench.models import AzureModel +from llmebench.tasks import MultiNativQATask + + +def metadata(): + return { + "author": "Arabic Language Technologies, QCRI, HBKU", + "model": "Jais-13b-chat", + "description": "", + "scores": {}, + } + + +def config(): + return { + "dataset": NativQAGlobalDataset, + "task": MultiNativQATask, + "model": AzureModel, + "general_args": {"test_split": "washington"}, + } + + +def prompt(input_sample): + base_prompt = f"Your task is to answer questions in English based on a given context.\nNote: Your answers should be spans extracted from the given context without any illustrations.\nYou don't need to provide a complete answer\nContext:{input_sample['context']}\nQuestion:{input_sample['question']}\nAnswer:" + + return [ + { + "role": "user", + "content": base_prompt, + }, + ] + +def post_process(response): + return response["choices"][0]["message"]["content"] \ No newline at end of file diff --git a/llmebench/datasets/NativQAGlobal.py b/llmebench/datasets/NativQAGlobal.py new file mode 100644 index 00000000..7662efda --- /dev/null +++ b/llmebench/datasets/NativQAGlobal.py @@ -0,0 +1,224 @@ +import csv + +from llmebench.datasets.dataset_base import DatasetBase +from llmebench.tasks import TaskType + + +class NativQAGlobalDataset(DatasetBase): + def __init__(self, **kwargs): + super(NativQAGlobalDataset, self).__init__(**kwargs) + + @staticmethod + def get_data_sample(): + return { + "data_id": "a unique question id", + "input": { + "question": "question to be answered", + "length": "number of words in answer", + }, + "label": "A long answer", + } + + @staticmethod + def metadata(): + return { + "language": "multilingual", + "citation": """ + citation text goes here + """, + "link": "", + "license": "", + "splits": { + # for us: washington, florida, michigan, texas, north_carolina, maschusetts, california, pensylvania, illinois, ohio, hawaii, georgia, and new_york. + "washington": { + "dev": "washington/nativqa_dev.tsv", + "test": "washington/nativqa_test.tsv", + }, + "florida": { + "dev": "florida/nativqa_dev.tsv", + "test": "florida/nativqa_test.tsv", + }, + "michigan": { + "dev": "michigan/nativqa_dev.tsv", + "test": "michigan/nativqa_test.tsv", + }, + "texas": { + "dev": "texas/nativqa_dev.tsv", + "test": "texas/nativqa_test.tsv", + }, + "north_carolina": { + "dev": "north_carolina/nativqa_dev.tsv", + "test": "north_carolina/nativqa_test.tsv", + }, + "massachusetts": { + "dev": "massachusetts/nativqa_dev.tsv", + "test": "massachusetts/nativqa_test.tsv", + }, + "california": { + "dev": "california/nativqa_dev.tsv", + "test": "california/nativqa_test.tsv", + }, + "pennsylvania": { + "dev": "pennsylvania/nativqa_dev.tsv", + "test": "pennsylvania/nativqa_test.tsv", + }, + "illinois": { + "dev": "illinois/nativqa_dev.tsv", + "test": "illinois/nativqa_test.tsv", + }, + "hawaii": { + "dev": "hawaii/nativqa_dev.tsv", + "test": "hawaii/nativqa_test.tsv", + }, + "ohio": { + "dev": "ohio/nativqa_dev.tsv", + "test": "ohio/nativqa_test.tsv", + }, + "georgia": { + "dev": "georgia/nativqa_dev.tsv", + "test": "georgia/nativqa_test.tsv", + }, + "new_york": { + "dev": "english_ny/NativQA_ny_NA_ny_dev.tsv", + "test": "english_ny/NativQA_ny_NA_ny_test.tsv", + }, + + # for canada: ontario, and quebec + "ontario": { + "dev": "ontario/nativqa_dev.tsv", + "test": "ontario/nativqa_test.tsv", + }, + "quebec": { + "dev": "quebec/nativqa_dev.tsv", + "test": "quebec/nativqa_test.tsv", + }, + + + # for iraq + "iraq": { + "dev": "Iraq_Baghdad/nativqa_dev.tsv", + "test": "Iraq_Baghdad/nativqa_test.tsv", + }, + + # for mauritania + "mauritania": { + "dev": "Mauritania_Nouakchott/nativqa_dev.tsv", + "test": "Mauritania_Nouakchott/nativqa_test.tsv", + }, + + # for kuwait + "kuwait": { + "dev": "Kuwait_Kuwait_City/nativqa_dev.tsv", + "test": "Kuwait_Kuwait_City/nativqa_test.tsv", + }, + + # for lebanon + "lebanon": { + "dev": "Lebanon_Beirut/nativqa_dev.tsv", + "test": "Lebanon_Beirut/nativqa_test.tsv", + }, + + # for libya + "libya": { + "dev": "Libya_Tripoli/nativqa_dev.tsv", + "test": "Libya_Tripoli/nativqa_test.tsv", + }, + + # for morocco + "morocco": { + "dev": "Morocco_Rabat/nativqa_dev.tsv", + "test": "Morocco_Rabat/nativqa_test.tsv", + }, + + # for syria + "syria": { + "dev": "syria_sy/NativQA_sy_NA_sy_dev.tsv", + "test": "syria_sy/NativQA_sy_NA_sy_test.tsv", + }, + + # for sudan + "sudan": { + "dev": "sudan_su/NativQA_su_NA_su_dev.tsv", + "test": "sudan_su/NativQA_su_NA_su_test.tsv", + }, + + # for yemen + "yemen": { + "dev": "yameni_ya/NativQA_ya_NA_ya_dev.tsv", + "test": "yameni_ya/NativQA_ya_NA_ya_test.tsv", + }, + + # for oman + "oman": { + "dev": "Oman_Muscat/nativqa_dev.tsv", + "test": "Oman_Muscat/nativqa_test.tsv", + }, + + # for saudia arabia riyadh + "saudi_arabia": { + "dev": "Saudi_Arabia_Riyadh/nativqa_dev.tsv", + "test": "Saudi_Arabia_Riyadh/nativqa_test.tsv", + }, + + # for uae abu dhabi + "uae": { + "dev": "UAE_Abu_Dhabi/nativqa_dev.tsv", + "test": "UAE_Abu_Dhabi/nativqa_test.tsv", + }, + + # for algeria + "algeria": { + "dev": "Algeria_Algiers/nativqa_dev.tsv", + "test": "Algeria_Algiers/nativqa_test.tsv", + }, + + # for egypt + "egypt": { + "dev": "Egypt_Cairo/nativqa_dev.tsv", + "test": "Egypt_Cairo/nativqa_test.tsv", + }, + + # for bahrain + "bahrain": { + "dev": "Bahrain_Manama/nativqa_dev.tsv", + "test": "Bahrain_Manama/nativqa_test.tsv", + }, + + # for tunisia + "tunisia": { + "dev": "tunisia_tu/NativQA_tu_NA_tu_dev.tsv", + "test": "tunisia_tu/NativQA_tu_NA_tu_test.tsv", + }, + + # for palestine + "palestine": { + "dev": "palestanian_ps/NativQA_ps_NA_ps_dev.tsv", + "test": "palestanian_ps/NativQA_ps_NA_ps_test.tsv", + }, + + "default": [ + ], + }, + "task_type": TaskType.Other, + } + + def load_data(self, data_path, no_labels=False): + data_path = self.resolve_path(data_path) + data = [] + + with open(data_path) as f: + reader = csv.reader(f, delimiter="\t") + next(reader) + for row in reader: + id = row[0] + question = row[3] + answer = row[4] + length = len(answer.split()) + data.append( + { + "data_id": id, + "input": {"question": question, "length": length}, + "label": answer, + } + ) + return data diff --git a/llmebench/datasets/__init__.py b/llmebench/datasets/__init__.py index 643baf1f..a4b8bc6b 100644 --- a/llmebench/datasets/__init__.py +++ b/llmebench/datasets/__init__.py @@ -77,3 +77,4 @@ from .XLSum import XLSumDataset from .XNLI import XNLIDataset from .XQuAD import XQuADDataset +from .NativQAGlobal import NativQAGlobalDataset \ No newline at end of file