Skip to content

Commit d7539e4

Browse files
committed
RDN transfer experiment
1 parent c805e92 commit d7539e4

1 file changed

Lines changed: 245 additions & 0 deletions

File tree

rdnb_transfer_experiment.py

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
2+
#import logging
3+
#logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s', handlers=[logging.FileHandler('app.log','w'),logging.StreamHandler()])
4+
5+
from experiments import experiments, bk, setups
6+
from gensim.test.utils import datapath
7+
from datasets.get_datasets import *
8+
from boostsrl import boostsrl
9+
import parameters as params
10+
import utils as utils
11+
import numpy as np
12+
import random
13+
import time
14+
import sys
15+
import os
16+
17+
#verbose=True
18+
source_balanced = False
19+
balanced = False
20+
21+
experiment_title = ''
22+
23+
def save_experiment(data, experiment_title):
24+
if not os.path.exists('experiments/' + experiment_title):
25+
os.makedirs('experiments/' + experiment_title)
26+
results = []
27+
if os.path.isfile('experiments/rdnb.json'):
28+
with open('experiments/{}/rdnb.json'.format(experiment_title), 'r') as fp:
29+
results = json.load(fp)
30+
results.append(data)
31+
with open('experiments/{}/rdnb.json'.format(experiment_title), 'w') as fp:
32+
json.dump(results, fp)
33+
34+
def train_and_test(background, train_pos, train_neg, train_facts, test_pos, test_neg, test_facts):
35+
'''
36+
Train RDN-B
37+
'''
38+
39+
start = time.time()
40+
model = boostsrl.train(background, train_pos, train_neg, train_facts, refine=None, transfer=None, trees=params.TREES)
41+
42+
end = time.time()
43+
learning_time = end-start
44+
45+
utils.print_function('Model training time {}'.format(learning_time), experiment_title)
46+
47+
will = ['WILL Produced-Tree #'+str(i+1)+'\n'+('\n'.join(model.get_will_produced_tree(treenumber=i+1))) for i in range(10)]
48+
for w in will:
49+
utils.print_function(w, experiment_title)
50+
51+
start = time.time()
52+
53+
# Test model
54+
results = boostsrl.test(model, test_pos, test_neg, test_facts, trees=params.TREES)
55+
56+
end = time.time()
57+
inference_time = end-start
58+
59+
utils.print_function('Inference time {}'.format(inference_time), experiment_title)
60+
61+
return model, results.summarize_results(), learning_time, inference_time
62+
63+
def get_confusion_matrix(to_predicate):
64+
# Get confusion matrix by reading results from db files created by the Java application
65+
utils.print_function('Converting results file to txt', experiment_title)
66+
67+
utils.convert_db_to_txt(to_predicate, params.TEST_OUTPUT)
68+
y_true, y_pred = utils.read_results(params.TEST_OUTPUT.format(to_predicate).replace('.db', '.txt'))
69+
70+
71+
utils.print_function('Building confusion matrix', experiment_title)
72+
73+
# True Negatives, False Positives, False Negatives, True Positives
74+
TN, FP, FN, TP = utils.get_confusion_matrix(y_true, y_pred)
75+
76+
utils.print_function('Confusion matrix \n', experiment_title)
77+
matrix = ['TP: {}'.format(TP), 'FP: {}'.format(FP), 'TN: {}'.format(TN), 'FN: {}'.format(FN)]
78+
for m in matrix:
79+
utils.print_function(m, experiment_title)
80+
81+
# Converts to int to fix JSON np.int64 problem
82+
return {'TP': int(TP), 'FP': int(FP), 'TN': int(TN), 'FN': int(FN)}
83+
84+
def main():
85+
86+
if not os.path.exists('experiments'):
87+
os.makedirs('experiments')
88+
89+
results, confusion_matrix = {}, {}
90+
91+
# Dictionaries to keep all experiments results
92+
#transboostler_experiments = {}
93+
rdnb_confusion_matrix = {}
94+
95+
for experiment in experiments:
96+
97+
confusion_matrix_save_all = []
98+
99+
experiment_title = experiment['id'] + '_' + experiment['source'] + '_' + experiment['target']
100+
101+
target = experiment['target']
102+
103+
# Load total target dataset
104+
tar_total_data = datasets.load(target, bk[target], seed=params.SEED)
105+
106+
if target in ['nell_sports', 'nell_finances', 'yago2s']:
107+
n_runs = params.N_FOLDS
108+
else:
109+
n_runs = len(tar_total_data[0])
110+
111+
results = { 'save': { }}
112+
113+
utils.print_function('Starting experiment {} \n'.format(experiment_title), experiment_title)
114+
115+
_id = experiment['id']
116+
source = experiment['source']
117+
target = experiment['target']
118+
predicate = experiment['predicate']
119+
to_predicate = experiment['to_predicate']
120+
arity = experiment['arity']
121+
122+
if target in ['twitter', 'yeast']:
123+
recursion = True
124+
else:
125+
recursion = False
126+
127+
# Get sources and targets
128+
sources = [s.replace('.', '').replace('+', '').replace('-', '') for s in set(bk[source]) if s.split('(')[0] != to_predicate and 'recursion_' not in s]
129+
targets = [t.replace('.', '').replace('+', '').replace('-', '') for t in set(bk[target]) if t.split('(')[0] != to_predicate and 'recursion_' not in t]
130+
131+
path = os.getcwd() + '/experiments/' + experiment_title
132+
if not os.path.exists(path):
133+
os.mkdir(path)
134+
135+
results['save'] = {
136+
'experiment': 0,
137+
'n_runs': 0,
138+
'seed': 441773,
139+
'source_balanced' : False,
140+
'balanced' : False,
141+
'folds' : n_runs,
142+
'nodeSize' : params.NODESIZE,
143+
'numOfClauses' : params.NUMOFCLAUSES,
144+
'maxTreeDepth' : params.MAXTREEDEPTH
145+
}
146+
147+
# APAGAR ISSO AQUI
148+
n_runs = 1
149+
while results['save']['n_runs'] < n_runs:
150+
151+
utils.print_function('Run: ' + str(results['save']['n_runs'] + 1), experiment_title)
152+
153+
if('rdn-b' not in rdnb_confusion_matrix):
154+
#transboostler_experiments[embeddingModel] = {}
155+
rdnb_confusion_matrix['rdn-b'] = {}
156+
157+
#transboostler_experiments[embeddingModel][similarityMetric] = []
158+
#experiment_metrics = {key: {'CLL': [], 'AUC ROC': [], 'AUC PR': [], 'Learning Time': [], 'Inference Time': []} for key in params.AMOUNTS}
159+
rdnb_confusion_matrix['rdn-b'] = []
160+
confusion_matrix = {'TP': [], 'FP': [], 'TN': [], 'FN': []}
161+
162+
utils.print_function('Starting experiments for RDN-B \n', experiment_title)
163+
164+
if target in ['nell_sports', 'nell_finances', 'yago2s']:
165+
n_folds = params.N_FOLDS
166+
else:
167+
n_folds = len(tar_total_data[0])
168+
169+
results_save, confusion_matrix_save = [], []
170+
for i in range(n_folds):
171+
utils.print_function('\n Starting fold {} of {} folds \n'.format(i+1, n_folds), experiment_title)
172+
173+
ob_save, cm_save = {}, {}
174+
175+
if target not in ['nell_sports', 'nell_finances', 'yago2s']:
176+
[tar_train_pos, tar_test_pos] = datasets.get_kfold_small(i, tar_total_data[0])
177+
else:
178+
t_total_data = datasets.load(target, bk[target], target=to_predicate, balanced=balanced, seed=params.SEED)
179+
tar_train_pos = datasets.split_into_folds(t_total_data[1][0], n_folds=n_folds, seed=params.SEED)[i] + t_total_data[0][0]
180+
181+
# Load new predicate target dataset
182+
tar_data = datasets.load(target, bk[target], target=to_predicate, balanced=balanced, seed=params.SEED)
183+
184+
# Group and shuffle
185+
if target not in ['nell_sports', 'nell_finances', 'yago2s']:
186+
[tar_train_facts, tar_test_facts] = datasets.get_kfold_small(i, tar_data[0])
187+
[tar_train_pos, tar_test_pos] = datasets.get_kfold_small(i, tar_data[1])
188+
[tar_train_neg, tar_test_neg] = datasets.get_kfold_small(i, tar_data[2])
189+
else:
190+
[tar_train_facts, tar_test_facts] = [tar_data[0][0], tar_data[0][0]]
191+
to_folds_pos = datasets.split_into_folds(tar_data[1][0], n_folds=n_folds, seed=params.SEED)
192+
to_folds_neg = datasets.split_into_folds(tar_data[2][0], n_folds=n_folds, seed=params.SEED)
193+
[tar_train_pos, tar_test_pos] = datasets.get_kfold_small(i, to_folds_pos)
194+
[tar_train_neg, tar_test_neg] = datasets.get_kfold_small(i, to_folds_neg)
195+
196+
random.shuffle(tar_train_pos)
197+
random.shuffle(tar_train_neg)
198+
199+
utils.print_function('Start training from scratch\n', experiment_title)
200+
201+
utils.print_function('Target train facts examples: %s' % len(tar_train_facts), experiment_title)
202+
utils.print_function('Target train pos examples: %s' % len(tar_train_pos), experiment_title)
203+
utils.print_function('Target train neg examples: %s\n' % len(tar_train_neg), experiment_title)
204+
205+
utils.print_function('Target test facts examples: %s' % len(tar_test_facts), experiment_title)
206+
utils.print_function('Target test pos examples: %s' % len(tar_test_pos), experiment_title)
207+
utils.print_function('Target test neg examples: %s\n' % len(tar_test_neg), experiment_title)
208+
209+
# Creating background
210+
background = boostsrl.modes(bk[target], [to_predicate], useStdLogicVariables=False, maxTreeDepth=params.MAXTREEDEPTH, nodeSize=params.NODESIZE, numOfClauses=params.NUMOFCLAUSES)
211+
212+
# Train and test
213+
utils.print_function('Training from scratch \n', experiment_title)
214+
215+
# Learn and test model not revising theory
216+
model, t_results, learning_time, inference_time = train_and_test(background, tar_train_pos, tar_train_neg, tar_train_facts, tar_test_pos, tar_test_neg, tar_test_facts)
217+
del model
218+
219+
t_results['Learning time'] = learning_time
220+
ob_save['rdn-b'] = t_results
221+
222+
utils.show_results(utils.get_results_dict(t_results, learning_time, inference_time), experiment_title)
223+
224+
cm = get_confusion_matrix(to_predicate)
225+
cm_save['rdn-b'] = cm
226+
227+
confusion_matrix['TP'].append(cm['TP'])
228+
confusion_matrix['FP'].append(cm['FP'])
229+
confusion_matrix['TN'].append(cm['TN'])
230+
confusion_matrix['FN'].append(cm['FN'])
231+
232+
rdnb_confusion_matrix['rdn-b'].append(confusion_matrix)
233+
del cm, t_results, learning_time, inference_time
234+
235+
results_save.append(ob_save)
236+
save_experiment(results_save, experiment_title)
237+
results['save']['n_runs'] += 1
238+
239+
matrix_filename = os.getcwd() + '/experiments/{}_{}_{}/rdnb_confusion_matrix.json'.format(_id, source, target)
240+
241+
# Save all results
242+
utils.save_json_file(matrix_filename, rdnb_confusion_matrix)
243+
244+
if __name__ == '__main__':
245+
sys.exit(main())

0 commit comments

Comments
 (0)