vision-flan.github.io/format_data.py at main · vision-flan/vision-flan.github.io · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import json

inputs = json.load(open('/Users/zhiyangxu/Documents/multiinstructv2/vision-flan.github.io/tasks_web.json','r'))
"""
{
        "id": "multiinstruct_ok_vqa_['train', 'val']_162419",
        "image": "./task_imgs/COCO_train2014_000000162419.jpg",
        "task_name": "ok_vqa",
        "conversations": [
            {
                "from": "human",
                "value": "What activity might these vehicles been used for?\n<image>"
            },
            {
                "from": "gpt",
                "value": "transportation"
            }
        ]
    }
{
        "instance_id": "recipe-qa+visual_coherence+3584",
        "img_path": "./imgs/recipe-qa+visual_coherence_974_recipe-qa+visual_coherence+3584.jpg",
        "options": null,
        "input_text": null,
        "raw_output": "The least similar image is B",
        "prompt": "The given image contains 4 image choices numbered A through D. Select the least similar image among the gorup",
        "target": "The least similar image is B",
        "dataset_name": "recipe-qa",
        "task_name": "recipe-qa+visual_coherence"
    }
"""
outputs = []
for line in inputs:
    output = {}
    output['img_path'] = line['image']
    output['prompt'] = line['conversations'][0]['value']
    output['target'] = line['conversations'][1]['value']
    output['task_name'] = line['task_name']
    outputs.append(output)

with open('tasks.json','w') as fout:
    json.dump({'data':outputs}, fout)