-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
98 lines (80 loc) · 3.05 KB
/
main.py
File metadata and controls
98 lines (80 loc) · 3.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import csv
from ampligraph_training import train_model, grid_search_hyperparams
from ampligraph_test import test_model
from coreference import coref_resolution
from references import create_ref_elements
from triplets import OpenIEClient
from nltk_utils import lemmatize_triplets, lemmatize_triplets_only_verbs, \
create_stopwords_custom_object, preprocess_text
from nltk_utils import remove_stopwords
from config import csv_folder
from neo4j_utils import *
from ampligraph_predict import *
def readfile(filename):
with open("./" + filename, "r") as f:
return f.read()
def triplets_to_csv(triplets, filename):
with open(filename, "w", newline='') as f:
writer = csv.writer(f)
for triplet in triplets:
data = [triplet["subject"], triplet["relation"], triplet["object"]]
writer.writerow(data)
def create_csvs():
client = OpenIEClient()
for i in range(start, stop + 1):
text = readfile("./plots/hp" + str(i))
print("\n*********TEXT*********\n")
print(text)
print("\n**********************\n")
text = preprocess_text(text)
if coreference:
text = coref_resolution(text)
if stopwords:
text = remove_stopwords(text, custom_stopwords)
triplets = client.extract_triplets(text)
if lemmatize:
if only_verbs:
lemmatize_triplets_only_verbs(triplets)
else:
lemmatize_triplets(triplets)
triplets_to_csv(triplets, csv_folder + "triplets_hp" + str(i) + ".csv")
# how to manipulate: is a dict with 3 keys:
# subject => triplet["subject"] return the subject of the triplet
# relation => triplet["relation"] return the relation (predicate) of the triplet
# object => triplet["object"] return the object of the triplet
#
# each triplet is of class 'dict'
# var 'triplets' is of class 'list'
def merge_csvs():
with open(csv_folder + "triplets_hp_merged.csv", "w", newline='') as f_out:
writer = csv.writer(f_out)
for i in range(1, 8):
with open(csv_folder + "triplets_hp" + str(i) + ".csv", "r") as f:
reader = csv.reader(f)
for row in reader:
writer.writerow(row)
def create_graph_node4j():
graph = create_graph()
dict_elements = create_ref_elements()
with open(csv_folder + "triplets_hp_merged.csv", "r", newline="") as f:
reader = csv.reader(f)
for row in reader:
add_triple(graph, row[0], row[1], row[2], dict_elements)
for i in range(num_gen_repetions):
with open(csv_folder + "predicted"+str(i)+".csv", "r", newline="") as f:
reader = csv.reader(f)
for row in reader:
add_triple(graph, row[0], row[1], row[2], dict_elements)
return graph
def main():
create_stopwords_custom_object()
create_csvs()
merge_csvs()
grid_search_hyperparams()
test_model()
create_unseen()
predict_unseen()
g = create_graph_node4j()
return
if __name__ == '__main__':
main()