-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconstant.py
More file actions
76 lines (62 loc) · 2.31 KB
/
constant.py
File metadata and controls
76 lines (62 loc) · 2.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""
This file contains constants and configs used throughout the project.
"""
"""
File structure of generated data:
- data/
- {GEN_IDENTIFIER}/ # unique id
- mmd/ # Flowchart Mermaid scripts
- pkl/ # Flowchart objects in pickle format
- img/ # Flowchart images
- qa/ # Question-Answer pairs, for testing
- questions.jsonl
- ground_truth.jsonl
- conversations.json/ # Conversations for training
- conversations_qa.json/ # Conversations for testing, with more information
- ocr_results.pkl
- statistics.txt # Statistics information for flowcharts and conversations
"""
# id for the data generation process, used in directory names
GEN_IDENTIFIER = "test" # change this to a unique identifier for your generation run
USE_COT = True # whether to use chain-of-thought reasoning in qa generation
USE_OCR = True # whether to use OCR results in question generation
GEN_IMGS_ON = True # whether to generate images
FLOWCHART_NUM = 5
# directories for storing generated data
MMD_DIR = f"data/{GEN_IDENTIFIER}/mmd"
PKL_DIR = f"data/{GEN_IDENTIFIER}/pkl"
IMG_DIR = f"data/{GEN_IDENTIFIER}/img"
CONVS_DIR = f"data/{GEN_IDENTIFIER}" # conversations
OCR_DIR = f"data/{GEN_IDENTIFIER}"
QA_DIR = f"data/{GEN_IDENTIFIER}/qa"
STATS_DIR = f"data/{GEN_IDENTIFIER}/stats"
CONV_FILE_NAME = "conversations.json"
CONV_QA_FILE_NAME = "conversations_qa.json"
QUESTIONS_FILE_NAME = "questions.jsonl"
GROUND_TRUTH_FILE_NAME = "ground_truths.jsonl"
FLOWCHART_STATS_FILE_NAME = "flowchart_statistics.txt"
CONV_STATS_FILE_NAME = "conversation_statistics.txt"
IMG_REF_DIR = f"img" # used in conversations to refer to images
# flowchart/node types
NORMAL_TYPE = 0 # normal flowchart/non-decision node
DECISION_TYPE = 1 # decision flowchart/decision node
# question types
NEXTOK_TYPE = 1
ALLNEXT_TYPE = 2
ALLPREV_TYPE = 3
COND_TYPE = 4
VALID_TYPE = 5
# used in convs generation
IMG_PLACEHOLDER = "<image>\n"
# cond ids in matrix
INVALID_ID = 0 # invalid edge id in matrix
YES_ID = 10 # yes_edge id in matrix
NO_ID = 11 # no_edge id in matrix
MAX_NODE_NUM = 25 # max node number in a flowchart
# a-z
ALLOWED_CHARACTERS = "abcdefghijklmnopqrstuvwxyz"
MIN_CONFIDENCE = 0.7
# simple answers
YES_ANSWER = "yes"
NO_ANSWER = "no"
NONE_ANSWER = "none"