-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcli.py
More file actions
executable file
·146 lines (127 loc) · 4.76 KB
/
cli.py
File metadata and controls
executable file
·146 lines (127 loc) · 4.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
import getopt
import sys
# keep for my env compatibility
sys.path.append("/home/artsokol/anaconda/lib/python2.7/site-packages")
import pymorphy2
import corpus
import collocation
import find_by_mask
from nltk.util import ngrams
import freqs
import tonality
noun = "СУЩ"
verb1 = "ГЛ"
verb2 = "ИНФ"
adj = "ПРИЛ"
def get_options(a_period,a_output=None,a_source=None):
if sys.version_info < (3, 0):
print ("must use python 3.0 or greater")
sys.exit()
try:
#opts, args = getopt.getopt(sys.argv[1:], "hocs:", ["help", "output=","corpus=","source="])
opts, args = getopt.getopt(sys.argv[1:], "hc:s:", ["help", "corpus=","source="])
except getopt.GetoptError as err:
# print help information and exit:
print(err) # will print something like "option -a not recognized"
usage()
sys.exit(2)
for o, a in opts:
if o in ("-h", "--help"):
usage()
sys.exit()
elif o in ("-c", "--corpus"):
if a == "":
usage()
sys.exit()
a_period.append(a.split(','))
elif o in ("-s", "--source"):
if a == "":
usage()
sys.exit()
a_source.append(a)
print (a_source)
else:
assert False, "unhandled option"
if a_period==[]:
usage()
sys.exit()
def print_out(data,outfile=None):
print(data)
if outfile != None:
print(data, file=outfile)
def get_nGramsTemplate(stringToParse):
return list(stringToParse.upper().split('+'))
def help():
print ("actions:")
print ("collocation <ngram_with_spaces> - find all collocations for the ngram,\n"
" save into *_collocation.txt files\n"
" and output three the most common collocations")
print ("mask_search <file_name> - find all phrases according its mask\n"
" defined by parts of speech\n"
" and save into <file_name> file (stdout by default)\n"
" <file_name> is optional")
print ("freq <n for ngrams> - count frequencies for all ngrams\n"
" and save it into freq.txt file\n"
" n can be from 1 to 3")
print ("sentiment - output files with max and min tones for the chosen corpus")
print ("help - for help")
print ("exit - for exit")
def usage():
print("NAME:")
print("\tcli.py - command line tool for corpus management")
print("SYNOPSIS:")
print("\tcli.py -c first_year,last_year [options]")
print("OPTIONS:")
print("\t-c first_year,last_year, --corpus=first_year,last_year\t\tMandatory parameter. Certain time period shoud be specifyed")
print("\t-h, --help\t\t\t\t\t\t\tGet usage info")
#print("\t-o file, --output=file\t\t\t\t\t\tOutput all information into file")
print("\t-s name1,name2,..nameN, --source=name1,name2,..nameN\t\tSpecifies the corpus source newspaper. Valid sources are RG, Novaya")
print("\t\t\t\t\t\t\t\t\tWithout key all possible sources are used")
print("")
if __name__ == "__main__":
input_period = []
output = []
newspaper = []
output_file = None
get_options(a_period=input_period,a_output=output,a_source=newspaper)
corp = corpus.corpus()
corp.load('dumps/corp_multy-lemm.dump')
data = corp.get_lemm(period=[int(input_period[0][0]), int(input_period[0][1])], sources=newspaper)
help()
while True:
command = input('--> ')
if command == "help":
help()
elif command[:len("collocation")] == "collocation":
try:
command.split(' ')[1]
except:
print ("ngram wasn't found")
help()
continue
collocation.find_collocations(data, command.split(' ')[1:])
elif command[:len("mask_search")] == "mask_search":
try:
find_by_mask.find_by_part_of_speech(data, command.split(' ')[1])
except:
find_by_mask.find_by_part_of_speech(data)
elif command[:len("freq")] == "freq":
try:
if(int(command.split(' ')[1]) not in range(1,4)):
print ("incorrect n for ngrams\n")
help()
continue
except:
print ("incorrect n for ngrams\n")
help()
continue
freqs.get_ngrams_with_frequencies(data, int(command.split(' ')[1]))
elif command[:len("exit")] == "exit":
sys.exit()
elif command[:len("sentiment")] == "sentiment":
tonality.ask_sentiment(print_max_min_texts=True, years=input_period, sources=newspaper)
else:
print ("incorrect command\n")
help()