-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjerking.py
More file actions
161 lines (121 loc) · 4.66 KB
/
jerking.py
File metadata and controls
161 lines (121 loc) · 4.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import re
import random
import datetime
from dateutil import parser
from dateutil import relativedelta
import pprint
__doc__ = 'Contains logic for creating objects from the jerkcity.com/jerkcity.txt file and convenience functions like searching.'
__version__ = '0.69'
class Jerk(object):
def __init__(self, date, number, title, text):
'''Date uses datetime.date objects'''
self.date = date
self.num = number
self.title = title
self.raw_text = text
self.process_text()
def process_text(self):
self.characters = set()
self.lines = []
for line in self.raw_text:
try:
character, line = line.split(':', 1)
except ValueError:
self.lines.append(line)
continue
self.characters.add(character)
self.lines.append(line)
def __repr__(self):
return 'num={num}, date={date}, title={title}, raw_text={raw_text}'.format(**self.__dict__)
def load_jerks(fn='jerkcity_full.txt'):
'''Load the jerkcity.com/jerkcity.txt format into a list of Jerk objects'''
with open(fn, 'r') as inf:
next(inf) # first line just says how many acts are in the play
jerk_objs = []
cur_text = []
cur_date, cur_name, cur_title = '1/1/1980', 'NaN', '!ERROR!'
for line in inf:
if re.search('JERKCITY #\d+:', line):
try:
cur_num, cur_title = re.search('JERKCITY #(\d+):\s+(.*)\n', line).groups()
cur_num = int(cur_num)
except AttributeError:
print(line)
elif re.search('\d+/\d+/\d+', line):
date_string = re.search('(\d+/\d+/\d+)', line).group(1)
month, day, year = map(int, date_string.split('/'))
cur_date = datetime.datetime(year, month, day)
#elif re.search(':\S+', line):
# cur_text.append(line)
elif re.match('--cut here--', line):
jerk_objs.append(Jerk(cur_date, cur_num, cur_title, cur_text))
cur_text = []
elif re.search('\S+', line):
cur_text.append(line[:-1])
return jerk_objs
def find_jerk(jerk_objs, phrase):
'''Return a list of jerk objects that contain lines matching the phrase.
Also returns the list of lines that trigger the match.'''
tagged_jerks = []
tagged_lines = []
for jerk in jerk_objs:
for line in jerk.lines:
if re.search(phrase.upper(), line):
tagged_jerks.append(jerk)
tagged_lines.append(line)
break
return tagged_jerks, tagged_lines
def find_hulag(jerk_objs):
'''Find a fuckin' hulag who cares'''
hulags = {}
for jerk in jerk_objs:
for line in jerk.lines:
if re.search('(H[HBULAG]+)\s', line):
hulag = re.search('(H[HBULAG]+)\s', line).group(1)
if set(hulag) == {'H', 'A'} or set(hulag) == {'H', 'U'}:
continue
if hulag in hulags.keys():
hulags[hulag] += 1
else:
hulags[hulag] = 1
return hulags
def just_lines(jerk_objs):
'''Return a list of lines from the jerk_objs'''
all_lines = []
for jerk in jerk_objs:
all_lines += jerk.lines
return all_lines
def find_by_date(jerk_objs, date_string):
'''Return the Jerk with the minimum distance to date_string (parsed by dateutil)'''
try:
date = parser.parse(date_string)
except ValueError:
return 'Unable to parse date!'
return sorted(jerk_objs, key=lambda x: abs(date - x.date))[0]
def find_by_num(jerk_objs, num):
'''Return the jerk_obj that has num'''
try:
return list(filter(lambda x: x.num == int(num), jerk_objs))[0]
except IndexError:
random.choice(jerk_objs)
if __name__ == '__main__':
jerk_objs = load_jerks()
bychar = lambda jerk: len(jerk.characters)
jerkchar = sorted(jerk_objs, key=bychar)
print(jerkchar[0])
print(jerkchar[-1])
raise SystemExit
hulags = find_hulag(jerk_objs)
#pprint.pprint(hulags)
#print(len(hulags))
#print(find_jerk(jerk_objs, 'mumbo'))
#print(random.choice(list(find_hulag(jerk_objs).keys())))
phrase = 'mumbo'
jerks, lines = find_jerk(jerk_objs, phrase)
jerk, line = random.choice(list(zip(jerks, lines)))
#print('{} (Comic #{})'.format(line, jerk.num))
#print(find_by_date(jerk_objs, 'may 8 2001').date)
#print(find_by_date(jerk_objs, 'may 8 2002'))
print(find_by_num(jerk_objs, '4163'))
j, l = find_jerk(jerk_objs, 'folks')
print(len(j), len(l))