-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsearch.py
More file actions
83 lines (60 loc) · 1.62 KB
/
search.py
File metadata and controls
83 lines (60 loc) · 1.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""
Search module for IR system
Naive command line interpreter to parse queries
and return documents.
"""
from future.utils import iteritems
import os
import retrieve_data as rd
import index
import nltk
from nltk.corpus import stopwords
from print_docs import printDocs
from parse_query import parseQuery
def getDocLst(docs):
""" Order list of documents according to weight.
Arguments:
docs: dict of docId -> weight
Returns:
ordered list of tuples (docId, weight)
"""
# convert to list
docLst = []
for docId, weight in iteritems(docs):
if weight > 0.:
docLst.append((docId, weight))
# sort by weight
return sorted(docLst, key=lambda x: -x[1])
def search(data, dataIndex, query):
""" Execute search and print results.
Arguments:
data: list of documents.
dataIndex: the index.
query: string
"""
_docs = parseQuery(dataIndex, query)
docs = getDocLst(_docs)
if len(docs)>0:
printDocs(data, docs, query)
else:
print("Nothing found.")
def loop(data, dataIndex):
""" Command line interpreter.
Arguments:
data: list of documents.
dataIndex: the index.
The loop ends when the keyword :exit (or exit:)
is entered.
"""
while True:
query = raw_input("\nsearch> ")
if query.lower().strip() in ['q', 'q']:
break;
elif query:
search(data, dataIndex, query)
def main():
data = rd.loadData()
dataIndex = index.run(data)
loop(data, dataIndex)
if __name__ == "__main__":
main()