-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathannotate.py
More file actions
76 lines (74 loc) · 3.55 KB
/
annotate.py
File metadata and controls
76 lines (74 loc) · 3.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""Annotate structural file"""
import argparse
import re
import pathlib
import rtoml
def do_job(target_str):
"""Main thread"""
print("----\nStart annotation")
structure_doc = rtoml.load(open(f"record/{target_str}/mid/structure.toml",encoding="utf8"))
keyword_dict = {}
keyword_list = []
for keyword_path in sorted(list(pathlib.Path(f"record/{target_str}").glob('keyword-*.toml'))):
print(f"Process: {keyword_path}")
keyword_doc = rtoml.load(open(keyword_path,encoding="utf8"))
unique_list = [n for n in keyword_doc.keys() if n not in keyword_list]
duplicate_list = [n for n in keyword_doc.keys() if n in keyword_list]
if len(duplicate_list) > 0:
print(F"ERROR: duplicate list ~ {duplicate_list}")
keyword_list.extend(unique_list)
keyword_dict.update(keyword_doc)
def check(input_str,exclude,do_re=str()):
include_list = []
exclude_list = []
for key_str,detail_dict in structure_doc.items():
name_str = detail_dict["name"]
include_bool = (input_str in name_str)
if do_re != str():
key_re = re.compile(do_re)
if key_re.match(name_str):
include_bool = True
if include_bool:
exclude_bool = False
for exclude_str in exclude:
if exclude_str in name_str:
exclude_bool = True
if exclude_bool:
exclude_list.append(key_str)
else:
include_list.append(key_str)
return include_list, exclude_list
for entry_name in keyword_list:
ent_de = keyword_dict[entry_name] # entry_detail
inc_col_list = [] # inclusive_collect_list
exc_col_list = [] # exclusive_collect_list
for inc_str in ent_de['inclusive']: # inclusive_str
inc_list, exc_list = check(inc_str,ent_de['exclusive'],do_re=ent_de.get('re',str()))
inc_col_list.extend([n for n in inc_list if n not in inc_col_list])
exc_col_list.extend([n for n in exc_list if n not in exc_col_list])
if len(inc_col_list) > 1:
print(F"[{entry_name}]\n found in: ({len(inc_col_list)})")
print(" {}".format("\n ".join(inc_col_list)))
print(F" Excluded: ({len(exc_col_list)})")
print(" {}".format("\n ".join(exc_col_list)))
for episode_str in inc_col_list:
episode_table = structure_doc[episode_str]
episode_tag_list = episode_table["tag"]
episode_tag_list.append(entry_name)
episode_table["tag"] = episode_tag_list
episode_tag_list = episode_table["category"]
episode_tag_list.extend(ent_de["category"])
episode_table["category"] = episode_tag_list
structure_doc[episode_str] = episode_table
with open(f"record/{target_str}/mid/keyword.toml","w",encoding="utf8") as target_handler:
rtoml.dump(keyword_dict,target_handler)
with open(f"record/{target_str}/mid/annotation.toml","w",encoding="utf8") as target_handler:
target_handler.write("# Add your own tag to each episode\n\n")
with open(f"record/{target_str}/mid/annotation.toml","a",encoding="utf8") as target_handler:
rtoml.dump(structure_doc,target_handler)
print(" ----\nEnd annotation")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Annotate data")
parser.add_argument("target", help="target path")
args = parser.parse_args()
do_job(args.target)