-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgetData.sh
More file actions
executable file
·38 lines (31 loc) · 2.16 KB
/
getData.sh
File metadata and controls
executable file
·38 lines (31 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/bin/bash
# Some datasets are not available from HuggingFace, therefore we need to download them from the original source
# (last accessed 04.01.2025)
# Create a 'res' directory if it doesnt exist
if [ ! -d "res" ]; then
mkdir res
fi
# Download the dataset from the original source
# SimpleQA: https://github.com/openai/simple-evals/blob/main/simpleqa_eval.py
mkdir -p res/simpleqa
wget https://openaipublic.blob.core.windows.net/simple-evals/simple_qa_test_set.csv -O res/simpleqa/simple_qa_test_set.csv
# Shroom2024 https://helsinki-nlp.github.io/shroom/2024
wget 'https://drive.usercontent.google.com/download?id=1wlGZL8Sdqu7xZngcUSrDqp3DCSkYWoaE&export=download&authuser=0' -O shroom2024.zip # replace
mkdir -p res/shroom2024
unzip -o shroom2024.zip -d res/shroom2024
rm shroom2024.zip
# Shroom2025 https://helsinki-nlp.github.io/shroom/
wget 'https://a3s.fi/mickusti-2007780-pub/train.zip' -O shroom2025.zip
mkdir -p res/shroom2025
unzip -o shroom2025.zip -d res/shroom2025
rm shroom2025.zip
# get DefAn https://github.com/ernlavr/DefAn
mkdir -p res/defan
wget 'https://raw.githubusercontent.com/ernlavr/DefAn/refs/heads/main/DefAn-public/QA_domain_1_public.csv' -O res/defan/QA_domain_1_public.csv
wget 'https://raw.githubusercontent.com/ernlavr/DefAn/refs/heads/main/DefAn-public/QA_domain_2_public.csv' -O res/defan/QA_domain_2_public.csv
wget 'https://raw.githubusercontent.com/ernlavr/DefAn/refs/heads/main/DefAn-public/QA_domain_3_public.csv' -O res/defan/QA_domain_3_public.csv
wget 'https://raw.githubusercontent.com/ernlavr/DefAn/refs/heads/main/DefAn-public/QA_domain_4_public.csv' -O res/defan/QA_domain_4_public.csv
wget 'https://raw.githubusercontent.com/ernlavr/DefAn/refs/heads/main/DefAn-public/QA_domain_5_public.csv' -O res/defan/QA_domain_5_public.csv
wget 'https://raw.githubusercontent.com/ernlavr/DefAn/refs/heads/main/DefAn-public/QA_domain_6_public.csv' -O res/defan/QA_domain_6_public.csv
wget 'https://raw.githubusercontent.com/ernlavr/DefAn/refs/heads/main/DefAn-public/QA_domain_7_public.csv' -O res/defan/QA_domain_7_public.csv
wget 'https://raw.githubusercontent.com/ernlavr/DefAn/refs/heads/main/DefAn-public/QA_domain_8_public.csv' -O res/defan/QA_domain_8_public.csv