-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathcoh_topic_num.py
More file actions
98 lines (83 loc) · 2.83 KB
/
coh_topic_num.py
File metadata and controls
98 lines (83 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import sys
import utils.name_convention as name
import matplotlib.pyplot as plt
import numpy as np
if len(sys.argv) <= 1:
tc = "tc"
else:
tc = sys.argv[1]
if len(sys.argv) <= 2:
src = "pp_reuters"
else:
src = sys.argv[2]
if len(sys.argv) <= 3:
measure = "mean"
else:
measure = "median"
type_names = ["binary", "bow", "tfidf"]
typelist = [[],[],[]]
x_axis = [5,10,15,20]
for index, corpus_type in enumerate(type_names):
for topics_count in x_axis:
dname = name.get_output_dir(corpus_type, topics_count, src)
if tc == "tc":
ifile = open(dname + "/top_topics_20_start0.txt")
tclist = []
for line in ifile:
if line.startswith("topic"):
tclist.append(float(line.split()[2]))
elif tc == "tct":
ifile = open(dname + "/top_topics_tfidf_20.txt")
tclist = []
for line in ifile:
if line.startswith("topic"):
tclist.append(float(line.split()[2]))
else:
ifile = open(dname + "/" + tc +"/w020_start0.txt")
linelist = []
for line in ifile:
linelist.append(line)
tclist = []
for lindex, l in enumerate(linelist):
if l.startswith("Topic"):
if measure == "mean":
value = linelist[lindex+1]
else:
value = linelist[lindex+2]
tclist.append(float(value.split()[1]))
typelist[index].append(np.average(tclist))
# plot
fig = plt.figure()
if tc == "tc":
title = "Co-occurrence Based Topic Coherence"
ylabel = "Co-occurrence TC"
elif tc == "tct":
title = "Tfidf Co-occurrence Based Topic Coherence"
ylabel = "Tfidf Co-occurrence TC"
else:
title = tc.upper() + " Coherence"
ylabel = tc.upper()
fig.suptitle(title +" \n - " + src.replace("pp_","").title() ,fontsize=20)
plt.ylabel(ylabel)
plt.xlabel("# of topics")
linelist = [0, 0, 0]
for index, tclist in enumerate(typelist):
color = [0, 0, 0]
if index == 1:
color[index] = 0.5
else:
color[index] = 0.9
linelist[index], = plt.plot(x_axis, tclist, color=(color[0], color[1], color[2]), marker="o")
for x, y in zip(x_axis, tclist):
if index == 0:
yoffset = 10
elif index == 1:
yoffset = 10
elif index == 2:
yoffset = 0
plt.annotate("("+str(x)+", "+str("{:.2f}".format(float(y)))+")", xy=(x, y), color=(color[0], color[1], color[2]), fontsize=20,
xytext=(-25, yoffset), textcoords='offset points')
plt.legend(linelist, type_names, bbox_to_anchor=(1, -0.05), ncol=4)
fig = plt.gcf()
fig.set_size_inches(16, 12)
plt.savefig("plot_3comp_" + tc + "_" + src + ".png")