-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathExternalDict.py
More file actions
163 lines (130 loc) · 6.24 KB
/
ExternalDict.py
File metadata and controls
163 lines (130 loc) · 6.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#-------------------------------------------------------------------------------
# Name: ExternalDict.py
# Purpose: Creates ExternalDict objects for reading, using, and writing externally
# stored dictionaries to store constants.
# Authors: Bridget O'Daniel, Wenli Zhao, Lily Wu
# Created: 10/06/2015
# Acknowledgements: Aaron Hall for merge_dicts: http://stackoverflow.com/questions/38987/how-can-i-merge-two-python-dictionaries-in-a-single-expression
#-------------------------------------------------------------------------------
import os.path
from sets import Set
def merge_dicts(*dict_args):
'''Given any number of dicts, shallow copy and merge into a new dict,
precedence goes to key value pairs in latter dicts.'''
result = {}
for dictionary in dict_args:
result.update(dictionary)
return result
class ExternalDict:
'''Creates an ExternalDict object to handle a constant dictionary needed for the
creation of files to be read by the GenSpec classifier.'''
STARTING_VALUE = 1 #Starting value for assignment of unique IDs
def __init__(self, path):
'''Creates an ExternalDict object. Reads from a file at the provided path if it exists,
otherwise the dictionary starts off empty, but can be saved at that location.
pre: path is a string representing a path to a file. If a file exists by that name, it
must be in the form of an ExternalDict file.
ED form: "key>x<value\\t" per entry in file -> key:value pair'''
self.path = path
self.fname = os.path.basename(os.path.normpath( path ))
self.dict = {} #Default
self.upload()
#-------------------------- Operator Overloading ---------------------------------#
def __len__(self): #len(ED)
'''Gets the current length of the External Dict object.'''
return len(self.dict)
def __str__(self): #str(ED) and print ED
'''Gets the string representation of the External Dict object.'''
return "External Dict "+self.fname+": \n"+str(self.dict)
def __getitem__(self, key): #ED[key]
'''Gets the value to match the given provided key.'''
return self.dict[key]
def __setitem__(self, key, value): #ED[key] = new_value
'''Sets the value at key in the External Dict.'''
self.dict[key] = value
def __iter__(self): #Allows for iteration
'''Gets an interation object for the ED, over the keys.'''
return self.dict.iterkeys()
def __contains__(self, key): #key in ED?
'''Checks to see if ED contains the given key. Returns True or False.'''
return (key in self.dict)
#-------------------------- Basic Dictionary Tasks -------------------------------#
def keys(self):
'''Gets the ED's keys.'''
return self.dict.keys()
def values(self):
'''Gets the ED's values.'''
return self.dict.values()
def items(self):
'''Gets the ED's items.'''
return self.dict.items()
#------------------------- Specific to External Dict -----------------------------#
#-------------Reading File
def is_external_dict_exist(self):
'''Checks if a file for the External Dict exists and returns True or False.'''
return os.path.isfile(self.path)
def upload(self):
'''Looks for an External Dict to upload that information to the dictionary.
If no such file exists, assumes the dictionary is empty.
post: Fills the ED's dictionary with any information in the file.'''
if self.is_external_dict_exist():
self.read_external_dict()
else:
self.dict = {}
def read_external_dict(self):
'''Reads the External Dict file, building the dictionary it contains into the ED
object's dictionary. Merges with current key-value pairs in the ED, but overwites
with newest information if the same key is found.'''
ed = {}
dictFile = open(self.path, "r")
lines = dictFile.readlines()
items = lines[0].strip().split("\t")
for item in items:
pair = item.split(">x<")
ed[pair[0]] = pair[1]
dictFile.close()
self.dict = merge_dicts(self.dict, ed)
#--------------Altering ED
def add_pair(self, pair):
'''Adds the key:value pair in the tuple to the ED.
pre: pair is a tuple of length 2'''
self.dict[pair[0]] = pair[1]
def add_dict(self, dictionary):
'''Takes in a dictionary and adds its key:value pairs to the ED. Combines with
previus contents of ED, overwriting with any new values for a particular key.'''
self.dict = merge_dicts(self.dict, dictionary)
def add_list(self, lst):
'''Takes in a list and adds each item as a key with a unique id as the value.
WARNING: ID is only unique for items added through THIS METHOD. Manually adding a
pair with the same value as another is still possible.'''
for item in lst: #For each item not already a key, add it with a unique ID
if item in self:
pass
else:
self[item] = len(self) + ExternalDict.STARTING_VALUE #length of the dict at that point is the unique ID
def empty_dict(self):
'''Completely resets the dictionary to empty.'''
self.dict = {}
def get_key_set(self, lst=[]):
'''Takes in a list of keys and returns a set of those keys AND those already in
the ED with duplicates removed
pre: string is a String
post: returns an unordered, no duplicate set of strings'''
return Set(lst.extend(self.keys())) #Adds on words already in dictionary, removes duplicates and randomizes order
#--------------Storing ED
def save(self):
'''Saves the External Dict to a file for reuse in other data gathering ventures.
post: Outputs a file with the name of self.fname where the key:value pairs are
listed as follows: key:vakue\key:value...
WARNING: Overwrites any file by the same name.'''
outFile = open(self.path,"w") #Empties whatever was previously in the file
for item in self.items():
try:
outFile.write( str(item[0])+">x<"+str(item[1])+"\t" )
except UnicodeEncodeError:
if type(item[0]) is unicode:
outFile.write( item[0].encode('utf-8')+">x<"+str(item[1])+"\t" )
else:
outFile.write( str(item[0])+">x<"+item[1].encode('utf-8')+"\t" )
print self.fname+" written."
outFile.close()