WebExperT/prepare_train.py at master · Luohh5/WebExperT · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189

import numpy as np
import json
import pdb
import jsonlines
import os
from tqdm import tqdm
import sys
sys.path.insert(0, '../')
from SeeAct.src.data_utils.format_prompt_utils import get_choices

def build_example(problem, id):
    answer = 'Answer: ' + problem['choices'][problem['answer']] + '\n'
    question = problem['question']
    rationale = problem['lecture'] + problem['solution']
    choices = problem['choices'].copy()
    choices.remove(problem['choices'][problem['answer']])
    distractors = ''
    for i, choice in enumerate(choices):
        distractors += f'({i + 1}) {choice}\n'
    if problem['image'] != None:
        image = 'Picture: <img>' + \
                os.path.join(data_root, problem['split'], str(id), problem[
                    'image']) + f'</img>\n'
        context = f'Context: ' + problem['hint'] + '\n' if problem['hint'] != '' else ''
        # qg_prompt = 'Please generate a question from this picture, context and the corresponding answer.' if \
        # problem['hint'] != '' else 'Please generate a question from this picture and the corresponding answer.'
        # rg_prompt = 'According to this picture, context and question with its answer, how to make a reasoning to answer the question?' if \
        # problem[
        #     'hint'] != '' else 'According to this picture and question with its answer, how to make a reasoning to answer the question?'
        # dg_prompt = 'According to this picture, context and question with its answer obtained through reasoning, please generate at least 1 plausible yet incorrect answers which should be similar and grammatically consistent with the correct answer and seperate them with numbers like (1) (2) (3). \n' if \
        # problem[
        #     'hint'] != '' else 'According to this picture and question with its answer obtained through reasoning, please generate at least 1 plausible yet incorrect answers which should be similar and grammatically consistent with the correct answer and seperate them with numbers like (1) (2) (3).\n'
        qg = '\nExample:\n' + image + context + answer + f'Question: {question}'
        rg = '\nExample:\n' + image + context + f'Question: {question}\n' + answer + f'Reasoning: {rationale}'
        dg = '\nExample:\n' + image + context + f'Question: {question}\n' + answer + f'Distractors: {distractors}'
    else:
        context = 'Context: ' + problem['hint'] + '\n' if problem['hint'] != '' else ''
        # qg_prompt = 'Please generate a question from this context and the corresponding answer.' if \
        #     problem[
        #         'hint'] != '' else 'Please generate a question from the corresponding answer.'
        # rg_prompt = 'According to this context and question with its answer, how to make a reasoning to answer the question?' if \
        #     problem[
        #         'hint'] != '' else 'According to this question with its answer, how to make a reasoning to answer the question?'
        # dg_prompt = 'According to this context and question with its answer obtained through reasoning, please generate at least 1 plausible yet incorrect answers which should be similar and grammatically consistent with the correct answer and seperate them with numbers like (1) (2) (3). \n' if \
        #     problem[
        #         'hint'] != '' else 'According to this question with its answer obtained through reasoning, please generate at least 1 plausible yet incorrect answers which should be similar and grammatically consistent with the correct answer and seperate them with numbers like (1) (2) (3).\n'
        qg = '\nExample:\n' + context + answer + f'Question: {question}'
        rg = '\nExample:\n' + context + f'Question: {question}\n' + answer + f'Reasoning: {rationale}'
        dg = '\nExample:\n' + context + f'Question: {question}\n' + answer + f'Distractors: {distractors}'
    return qg, rg, dg


save_list = []
split = 'test_domain'
use_plan = False
save_root = f'playground/data/flan-t5-finetune/mind2web_{split}_modified_woplan.json'
data_root = f'playground/data/{split}_can_modified'
if split != 'train':
    plan_path = f'playground/data/mind2web_train_pl/{split}.jsonl'
    predict_plans = open(plan_path,'r').readlines()
plan_id = 0
num = 0
for root, dirs, files in os.walk(data_root):
    for id, file in enumerate(files):
        # if file == 'train_5.json' or file == 'train_10.json':
        #     continue
        examples = json.load(open(os.path.join(data_root, file)))

        for e_id, example in enumerate(tqdm(examples)):
            task = f"{example['confirmed_task']}"
            if split != 'train':
                predict_plan = json.loads(predict_plans[plan_id])['response']
                plan_id += 1
                query = json.loads(predict_plans[plan_id])['query']
                prompt = query[query.find("</img>\nTask:"):]
                query_1 = json.loads(predict_plans[plan_id-1])['query']
                prompt_1 = query_1[query_1.find("</img>\nTask:"):]
                while prompt_1 == prompt and plan_id < len(predict_plans)-1:
                    plan_id += 1
                    query = json.loads(predict_plans[plan_id])['query']
                    prompt = query[query.find("</img>\nTask:"):]
                    query_1 = json.loads(predict_plans[plan_id - 1])['query']
                    prompt_1 = query_1[query_1.find("</img>\nTask:"):]

            for a_id, action in enumerate(example['actions']):
                # element_list = list(action["score"].keys())
                if '26candidates' not in action.keys():
                    continue
                element_list = action['26candidates']
                if len(action['pos_candidates']) > 0:
                    correct_element_id = action['pos_candidates'][0]['backend_node_id']
                else:
                    correct_element_id = -1

                image = f"playground/data/raw_dump/task/{example['annotation_id']}/processed/snapshots/{action['action_uid']}_labeled.jpg"
                # if example['plan'].find()
                elements = []
                positive_elements = action['pos_candidates']
                negative_elements = action['neg_candidates']
                elements.extend(positive_elements)
                elements.extend(negative_elements)

                all_elements_id = [item['backend_node_id'] for item in elements]
                all_elements = get_choices(action, all_elements_id, -1,
                                           keep_html_brackets=True)
                element_dict = {}
                for element in all_elements:
                    element_dict.update({element[0]:element[1]})
                if action['operation']['value'] == "":
                    action['operation']['value'] = "None"
                assistant_value = f"\nACTION: {action['operation']['op']}\nVALUE: {action['operation']['value']}"
                if use_plan:
                    if split != 'train':
                        prompt_2 = f"{predict_plan}\nPrevious Actions:\n"
                    else:
                        prompt_2 = f"{example['plan']}\nPrevious Actions:\n"
                else:
                    prompt_2 = f"Previous Actions:\n"
                if len(example["action_reprs"][:a_id]) > 0:
                    for pre_act_id, action in enumerate(example["action_reprs"][:a_id]):
                        prompt_2 += f"{pre_act_id+1}. {action}\n"
                else:
                    prompt_2 += "None\n"
                task_description = f"""Combined with the image, plan and previous action, what should be the next action to complete the task: {task} Please select from the following choices:\n"""

                prompt_2 += task_description

                # pdb.set_trace()
                # print(action["score"])
                # pdb.set_trace()
                correct_element_latter = "None"
                # element_choice = action["score"][:26]
                num_choice = 26
                correct_element_number = -1
                # for idx, choice in enumerate(element_list):
                #     if choice == correct_element_id and idx >= num_choice:
                #         num_choice -= 1
                #         correct_element_number = np.random.randint(0,26)
                #         correct_element_content = element_dict[choice]
                #         break
                # if correct_element_id not in element_list:
                #     pdb.set_trace()
                for idx, choice in enumerate(element_list):
                    # convert to ascii A, B, C, D, ...
                    # print(type(idx))
                    if idx >= num_choice:
                        break
                    # if idx == correct_element_number:
                    #     prompt_2 += f"{chr(65 + idx)}. {correct_element_content}\n"
                    #     correct_element_latter = chr(65 + idx)
                    #     continue
                    prompt_2 += f"{chr(65 + idx)}. {element_dict[choice]}\n"
                    if choice == correct_element_id:
                        correct_element_latter = chr(65 + idx)
                if correct_element_latter == 'None':
                    num += 1
                assistant_value = f"ELEMENT: {correct_element_latter}" + assistant_value
                prompt_2 += """None. None of the other options match the correct element."""
                #
                # All the choices above are HTML of interactive elements in the screenshot. You should take into account both their text content and location in screenshot (red rectangle) to determine whether one matches your target element. If none of these elements match your target element, please select None. None of the other options match the correct element.
                #
                # Conclude your answer using the format below. Ensure your answer is strictly adhering to the format provided below. Please do not leave any explanation in your answers of the final standardized format part, and this final part should be clear and certain. The element choice, action, and value should be in three separate lines.
                #
                # Format:
                #
                # ELEMENT: The uppercase letter of your choice. (No need for PRESS ENTER)
                #
                # ACTION: Choose an action from {CLICK, SELECT, TYPE, PRESS ENTER, TERMINATE, NONE}.
                #
                # VALUE: Provide additional input based on ACTION.
                #
                # The VALUE means:
                # If ACTION == TYPE, specify the text to be typed.
                # If ACTION == SELECT, indicate the option to be chosen. Revise the selection value to align with the available options within the element.
                # If ACTION == CLICK, PRESS ENTER, TERMINATE or NONE, write "None".
                #
                # NOTE THAT your answer should strictly contains only 1 ELEMENT, 1 ACTION, and 1 VALUE!!!"""
                user_value = "<image>\n" + prompt_2

                # if assistant_value.find('Fishing') != -1:
                #     continue
                conversations = {"id": f"{example['annotation_id']}_{a_id}", "image": image, "conversations": [{"from": "human", "value": user_value}, {"from": "gpt", "value": assistant_value}]}
                # print(dict)
                # pdb.set_trace()
                save_list.append(conversations)
with open(save_root, 'w') as fp:
    json.dump(save_list, fp)
print(num)