Skip to content

Commit f36c2be

Browse files
authored
Merge pull request #5 from OpenTransitTools/response
Response is in place
2 parents 1050ba6 + bca2515 commit f36c2be

14 files changed

Lines changed: 811 additions & 54 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# junk & crap
2+
agencies.txt
23
logs
34
files
45
stats.txt

docs/modsec_response.log

Lines changed: 582 additions & 0 deletions
Large diffs are not rendered by default.

ott/log_parser/control/loader.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@ def load_log_file(file, session):
1616
recs = parser.parse_log_file(file)
1717
except:
1818
recs = None
19+
20+
# modsec?: with no recs from above, maybe this is a mod_security file containing trip plans
21+
#import pdb; pdb.set_trace()
1922
if recs is None or len(recs) == 0:
20-
# with no recs from first parser, maybe this is a mod_security file containing trip plans
21-
#import pdb; pdb.set_trace()
2223
recs = parser_modsec.parse_log_file(file)
2324

2425
if recs and len(recs) > 0:
@@ -29,6 +30,8 @@ def load_log_file(file, session):
2930
logs.append(rawlog)
3031
RawLog.persist_data(session, logs)
3132

33+
return
34+
3235

3336
def loader():
3437
files, cmdline = utils.cmd_line_loader()
@@ -45,8 +48,8 @@ def loader():
4548

4649

4750
def load_and_post_process():
48-
loader()
49-
ProcessedRequests.process()
51+
files,cmdline = loader()
52+
ProcessedRequests.process(ignore_test_system=cmdline.test_system)
5053
ProcessedRequests.post_process()
5154

5255

ott/log_parser/control/parser_modsec.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,15 +129,31 @@ def parse_section_c(req):
129129
try:
130130
if "query" in sec_c:
131131
if "variables" in sec_c:
132-
vars = sec_c.split("variables\":")
133-
ret_val = vars[1][:-1] # return things right of the variables, except for dangling bracket
132+
#import pdb; pdb.set_trace()
133+
vars = sec_c.split("variables\":")[1]
134+
# extra parse step for 'exentsions' data (tuck exenstions into the query json, and keep dangling bracket)
135+
if '},"extensions":' in vars:
136+
ret_val = vars.replace('},"extensions":', ',"extensions":')
137+
else:
138+
ret_val = vars[:-1] # return things right of the variables, except for dangling bracket
134139
else:
135140
ret_val = sec_c
136141
except Exception as e:
137142
pass
138143
return ret_val
139144

140145

146+
def parse_section_e(req):
147+
"""
148+
section e has the response
149+
150+
--ac12e444-E--
151+
<json> (or <something>)
152+
"""
153+
sec_e = req.get("E", None)
154+
return sec_e
155+
156+
141157
def parse_section_f(req, def_code="520"):
142158
"""
143159
section f has response headers
@@ -180,6 +196,9 @@ def parse_raw_request(req):
180196
payload = parse_section_c(req)
181197
rec['payload'] = payload
182198

199+
response = parse_section_e(req)
200+
rec['response'] = response
201+
183202
code = parse_section_f(req)
184203
rec['code'] = code
185204

ott/log_parser/control/publisher.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ def csv(file_path, chunk_size=10000):
1010
session = utils.make_session(False)
1111
requests = session.query(ProcessedRequests).order_by(ProcessedRequests.ip_hash, ProcessedRequests.log_id).all()
1212
if requests and len(requests) > 0:
13-
fieldnames = requests[0].to_csv_dict().keys()
1413
#import pdb; pdb.set_trace()
14+
csv_columns = requests[0].to_csv_dict().keys()
1515
with open(file_path, mode='w') as csv_file:
16-
csv = file_utils.make_csv_writer(csv_file, fieldnames)
16+
csv = file_utils.make_csv_writer(csv_file, csv_columns)
1717
for r in requests:
1818
if not r.filter_request:
1919
csv.writerow(r.to_csv_dict())

ott/log_parser/db/processed_requests.py

Lines changed: 74 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from ast import Return
2-
#import imp -- MARCH 7 2026 .. removed this depricated core util (any errors?)
32
from re import S
43
from sqlalchemy import Column, String, Boolean, Integer, Float, func, and_
54
from sqlalchemy.orm import relationship
@@ -54,11 +53,12 @@ class ProcessedRequests(Base):
5453
uselist=False, viewonly=True,
5554
)
5655

57-
def __init__(self, raw_rec):
56+
def __init__(self, raw_rec, ignore_test_system=False):
57+
#import pdb; pdb.set_trace()
5858
super(ProcessedRequests, self)
5959
self.log_id = raw_rec.id
6060
self.ip_hash = utils.obfuscate(raw_rec.ip)
61-
self.app_name = self.get_app_name(raw_rec)
61+
self.app_name = self.get_app_name(raw_rec, ignore_test_system)
6262

6363
# TODO - refactor, this is a confusing mix of model and controller / parser
6464
try:
@@ -81,13 +81,76 @@ def __init__(self, raw_rec):
8181
self.parse_modes(modes)
8282
self.parse_companies(qs)
8383
self.apply_filters(raw_rec.url)
84+
self.check_response(raw_rec.response)
8485
except:
8586
self.filter_request = -111
8687
log.debug("couldn't parse " + raw_rec.url)
8788

89+
@classmethod
90+
def get_agency_map(cls, tm_only=False):
91+
tm_map = {
92+
"TRIMET:TRAM":"Aerial Tram",
93+
"TRIMET:PSC":"Streetcar",
94+
"TRIMET:TRIMET":"TriMet",
95+
}
96+
rtp_map = {
97+
"CLACKAMAS:ADULT":"Clackamas",
98+
"CTRAN:ADULT":"C-TRAN",
99+
"CTRAN_FLEX:ADULT":"The Current",
100+
"MULT:ADULT":"Multnomah",
101+
"RIDECONNECTION:ADULT":"Ride Connection",
102+
"SAM:ADULT":"SAM",
103+
"SMART:ADULT":"SMART",
104+
"WASH_FLEX":"SPOT",
105+
"WAPARK":"Washington Park",
106+
}
107+
108+
if tm_only:
109+
agency_map = tm_map
110+
else:
111+
agency_map = tm_map | rtp_map
112+
113+
return agency_map
114+
115+
def check_response(self, response):
116+
def find_agencies():
117+
agency_map = self.get_agency_map()
118+
ag = []
119+
for ak in agency_map.keys():
120+
if ak in response:
121+
ag.append(agency_map.get(ak))
122+
123+
ret_val = "" if len(ag) <= 0 else ",".join(ag)
124+
return ret_val
125+
126+
def filter_modes(def_mode="WALK"):
127+
#import pdb; pdb.set_trace()
128+
m = self.modes
129+
if "BUS" not in response: m = m.replace('BUS', '')
130+
if not utils.is_match_any(["RAIL", "SUBWAY", "TRAIN", "TRAM", "GONDOLA"], response): m = m.replace('RAIL', '')
131+
if not utils.is_match_any(["CALL_AGENCY", "COORDINATE_WITH_DRIVER"], response): m = m.replace('FLEX', '')
132+
m = m.replace(',,', ',')
133+
m = m.strip(",$")
134+
if m is None or m == "" or m == ",":
135+
m = def_mode
136+
return m
137+
138+
#import pdb; pdb.set_trace()
139+
if response:
140+
if '"itineraries":[{' in response:
141+
self.agencies = find_agencies()
142+
self.modes = filter_modes()
143+
elif utils.is_match_all(['errors":[{"message"'], response):
144+
self.agencies = None
145+
elif utils.is_match_all(['"itineraries":[]', 'routingErrors', 'code'], response):
146+
self.agencies = None
147+
elif utils.is_match_all(['"itineraries":[]', 'routingErrors'], response):
148+
self.agencies = None
149+
150+
88151
def apply_filters(self, url, fltval=-222):
89152
""" filter out uptime test urls, etc... """
90-
#import pdb; pdb.set_trace()
153+
#import pdb; pdb.set_trace()
91154
if self.filter_request is None:
92155
if 'fromPlace=PDX' in url and ('toPlace=ZOO' in url or 'toPlace=SW%20Zoo%20Rd' in url):
93156
self.filter_request = fltval
@@ -116,7 +179,7 @@ def apply_filters(self, url, fltval=-222):
116179
self.filter_request = fltval + 55
117180

118181
@classmethod
119-
def get_app_name(cls, rec, def_val="no idea what app..."):
182+
def get_app_name(cls, rec, ignore_test_system=False, def_val="no idea what app..."):
120183
""" trimet specific -- override me for other agencies / uses """
121184
app_name = def_val
122185

@@ -136,7 +199,7 @@ def get_app_name(cls, rec, def_val="no idea what app..."):
136199

137200
if len(rec.referer) > 3:
138201
referer = rec.referer.lower()
139-
if 'localhost:8000' in referer or 'labs' in referer or 'test.trimet' in referer:
202+
if ignore_test_system is False and ('localhost:8000' in referer or 'labs' in referer or 'test.trimet' in referer):
140203
app_name = TEST_SYSTEM
141204
elif 'call-test' in referer:
142205
app_name = call2
@@ -158,7 +221,7 @@ def get_app_name(cls, rec, def_val="no idea what app..."):
158221
elif utils.is_old_trimet(rec.url):
159222
app_name = old
160223

161-
if utils.is_developer_api(rec.url):
224+
if ignore_test_system is False and utils.is_developer_api(rec.url):
162225
rec.is_api = True
163226
if app_name is def_val:
164227
app_name = api
@@ -222,26 +285,7 @@ def parse_agencies(self, qs, tm_only=False):
222285
return the list of agencies implied in the request
223286
will look at the banned agencies param, and trim the list of possible request agencies
224287
"""
225-
tm_map = {
226-
"TRIMET:TRAM":"Aerial Tram",
227-
"TRIMET:PSC":"Streetcar",
228-
"TRIMET:TRIMET":"TriMet",
229-
}
230-
rtp_map = {
231-
"CLACKAMAS":"Clackamas",
232-
"CTRAN":"C-TRAN",
233-
"CTRAN_FLEX":"The Current",
234-
"MULT":"Multnomah",
235-
"RIDECONNECTION:":"Ride Connection",
236-
"SAM":"SAM",
237-
"SMART":"SMART",
238-
"WASH_FLEX":"SPOT",
239-
"WAPARK":"Washington Park",
240-
}
241-
if tm_only:
242-
agency_map = tm_map
243-
else:
244-
agency_map = tm_map | rtp_map
288+
agency_map = self.get_agency_map(tm_only)
245289

246290
# filter banned agencies from the above list
247291
for b in utils.get_banned_agencies(qs):
@@ -322,6 +366,7 @@ def to_csv_dict(self):
322366
- request datetime
323367
- ???
324368
"""
369+
#import pdb; pdb.set_trace()
325370
ua = utils.clean_useragent(self.log.browser)
326371
browser = utils.get_browser(ua)
327372
url = utils.to_url(self.log)
@@ -347,7 +392,7 @@ def to_csv_dict(self):
347392
return ret_val
348393

349394
@classmethod
350-
def process(cls, chunk_size=10000):
395+
def process(cls, chunk_size=10000, ignore_test_system=False):
351396
"""
352397
process logs from log file(s)
353398
"""
@@ -361,7 +406,7 @@ def process(cls, chunk_size=10000):
361406
# step 2: loop thru raw log file entries
362407
processed = []
363408
for l in logs:
364-
p = ProcessedRequests(l)
409+
p = ProcessedRequests(l, ignore_test_system)
365410
processed.append(p)
366411
# step 2b: save off the post-process data in 'chunks'
367412
if len(processed) > chunk_size:

ott/log_parser/db/raw_log.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ class RawLog(Base):
1414
date = Column(DateTime())
1515
url = Column(String(2084))
1616
payload = Column(String())
17+
response = Column(String())
1718
code = Column(Integer())
1819
referer = Column(String(2084))
1920
browser = Column(String(2084))
@@ -25,12 +26,14 @@ def __init__(self, rec):
2526
self.date = utils.convert_apache_dt(rec.get('apache_dt', None))
2627
self.url = rec.get('url', "")
2728
self.payload = rec.get('payload', "")
29+
self.response = rec.get('response', "")
2830
self.code = num_utils.to_int(rec.get('code', 212), 212)
2931
self.referer = rec.get('referer', "")
3032
self.browser = rec.get('browser', "")
3133
self.is_json = rec.get('is_json', False)
3234
#import pdb; pdb.set_trace()
3335

36+
3437
def main():
3538
from ..control.loader import load_log_file
3639
session = utils.make_session(False)

ott/log_parser/utils.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,19 +154,28 @@ def obfuscate(input, key=u'key'):
154154
return digest
155155

156156

157-
def cmd_line_loader(prog_name='log_parser/bin/loader', sub_dirs=[""]):
157+
def cmd_line_loader(prog_name='poetry run loader', sub_dirs=[""]):
158158
parser = db_cmdline.db_parser(prog_name, url_required=False)
159159
parser.add_argument(
160160
'--log_directory', '--logs', '-logs', '-l',
161161
required=True,
162162
help="Directory of .log files..."
163163
)
164+
# TODO: why are both logs and files needed?
165+
# file_utils.find_files(cmdline.log_directory, cmdline.files, True)
164166
parser.add_argument(
165167
'--files', '--ff', '-ff',
166168
required=False,
167169
default=".log",
168170
help="Directory of .log files..."
169171
)
172+
parser.add_argument(
173+
'--test_system', '--ts', '-ts',
174+
action='store_true',
175+
required=False,
176+
help="Don't mark any records as coming from a 'test system' (e.g., ability to load test requests and publish things, etc...)."
177+
)
178+
170179
cmdline = parser.parse_args()
171180
files = file_utils.find_files(cmdline.log_directory, cmdline.files, True)
172181
if len(files) == 0:
@@ -233,8 +242,12 @@ def encode(p):
233242
def to_url(log):
234243
ret_val = log.url
235244
if log.payload and len(log.payload) > 10 and '?' not in log.url:
236-
pl = json.loads(log.payload) # OTP 2.x graphql
237-
ret_val = "{}home/planner-trip/?fromPlace={}&toPlace={}".format(log.referer, encode(pl.get('fromPlace')), encode(pl.get('toPlace')))
245+
#import pdb; pdb.set_trace()
246+
try:
247+
pl = json.loads(log.payload) # OTP 2.x graphql
248+
ret_val = "{}home/planner-trip/?fromPlace={}&toPlace={}".format(log.referer, encode(pl.get('fromPlace')), encode(pl.get('toPlace')))
249+
except Exception as e:
250+
pass
238251
return ret_val
239252

240253

ott/log_parser/view/csv.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,29 @@
1+
from collections import Counter
2+
from ott.utils.parse.cmdline.base_cmdline import file_cmdline
3+
from ott.utils import file_utils
4+
15
import logging
26
log = logging.getLogger(__file__)
37

48

5-
def csv(data: dict, file_name: str):
6-
return None
9+
def modes_plus_agencies(prog_name='poetry run view_csv', file_name='trip_requests.csv'):
10+
cmdline = file_cmdline(prog_name, file_name)
11+
print(f"{cmdline.file}")
12+
13+
data = []
14+
for r in file_utils.read_csv(cmdline.file):
15+
companies = r.get('agencies').strip()
16+
modes = r.get('modes')
17+
if len(companies) > 1:
18+
sep = " -> "
19+
else:
20+
if "BIKE" in modes or "WALK" in modes:
21+
sep = ""
22+
else:
23+
sep = "(COULDN'T PLAN TRIP) "
24+
data.append(f"{companies}{sep}{modes}")
25+
counts = Counter(data)
26+
for s in sorted(counts.items()):
27+
print(f"{s[1]:8} {s[0]}")
28+
29+
return 0

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,11 @@ test_process = "ott.log_parser.db.processed_requests:main"
2727
loader = "ott.log_parser.control.loader:main"
2828
load_and_post_process = "ott.log_parser.control.loader:load_and_post_process"
2929
publisher = "ott.log_parser.control.publisher:main"
30-
parser = "ott.log_parser.control.parser:main"
3130
parser_modsec_test = "ott.log_parser.control.parser_modsec:simple_test"
3231
report = "ott.log_parser.control.reporter:main"
32+
33+
modes_plus_agencies = "ott.log_parser.view.csv:modes_plus_agencies"
34+
parser = "ott.log_parser.control.parser:main"
3335
stats = "ott.log_parser.control.stats:main"
3436

3537
[build-system]

0 commit comments

Comments
 (0)