-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathcount_zeroavg.py
More file actions
55 lines (45 loc) · 1.5 KB
/
count_zeroavg.py
File metadata and controls
55 lines (45 loc) · 1.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import sys
import utils.name_convention as name
import numpy as np
if len(sys.argv) <= 1:
corpus_type = "bow"
else:
if sys.argv[1] == "t":
corpus_type = "tfidf"
elif sys.argv[1] == "b":
corpus_type = "binary"
else:
corpus_type = "bow"
if len(sys.argv) <= 2:
topics_count = 3
else:
topics_count = int(sys.argv[2])
if len(sys.argv) <= 3:
src = "pp_reuters"
else:
src = sys.argv[3]
if len(sys.argv) <= 4:
tc = "path"
else:
tc = sys.argv[4]
if len(sys.argv) <= 5:
words_count = 150
else:
words_count = int(sys.argv[5])
word_pairs = words_count*(words_count - 1)/2
ofile = open("wn_zeros_summary.txt", "w")
for tc in "path wup lch lin res jcn".split():
ofile.write(tc + ": ")
avgwn_list = []
avgdist_list = []
for corpus_type in ["tfidf", "bow","binary"]:
for topics_count in [5,10,15,20]:
dname = name.get_output_dir(corpus_type, topics_count, src)
zfile = open(dname + "/zeros_sum_" + tc + "_w" + str(words_count) + ".txt")
not_in_wn = int(zfile.readline().split(":")[1])
no_distance = int(zfile.readline().split(":")[1])
avg_wn = float(not_in_wn)/(topics_count * word_pairs)
avgwn_list.append(avg_wn)
avg_dis = float(no_distance)/(topics_count * word_pairs)
avgdist_list.append(avg_dis)
ofile.write("not in wn: " + str(np.average(avgwn_list))+ " no distance: " + str(np.average(avgdist_list))+"\n")