Skip to content

Commit 50070d8

Browse files
committed
chore: sanitize example data in docs and tests
1 parent ce8cfaf commit 50070d8

File tree

3 files changed

+44
-44
lines changed

3 files changed

+44
-44
lines changed

backend/integrations/email_ingest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
<p>Alert triggered: Production ERROR/FATAL Monitor</p>
1919
<table>
2020
<tr><th>host</th><th>source</th><th>message</th><th>_raw</th></tr>
21-
<tr><td>web-01</td><td>/opt/eapservice/app/log/app.log</td>
21+
<tr><td>web-01</td><td>/var/log/myapp/myapp.log</td>
2222
<td>ERROR</td><td>2026-02-12 04:10:57 ERROR ...</td></tr>
23-
<tr><td>web-02</td><td>/opt/eapservice/app/log/app.log</td>
23+
<tr><td>web-02</td><td>/var/log/myapp/myapp.log</td>
2424
<td>FATAL</td><td>2026-02-12 04:11:02 FATAL ...</td></tr>
2525
</table>
2626
</body>
@@ -306,7 +306,7 @@ def normalize(self, payload: dict) -> list[NormalizedAlert]:
306306
if not service:
307307
source_path = row.get("source", "")
308308
if source_path:
309-
# Extract app name from paths like /opt/eapservice/APP_NAME/log/...
309+
# Extract app name from paths like /opt/app/APP_NAME/log/...
310310
match = re.search(r"/([^/]+)/logs?/", source_path)
311311
if match:
312312
service = match.group(1)

docs/DOCUMENTATION.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ For best results, modify your SPL query to aggregate results before sending. Thi
119119
Example — current email-based query:
120120

121121
```spl
122-
index=* (host="*prod*") source="/opt/eapservice/APP/log/APP.log"
122+
index=*
123123
| rex field=_raw "(?<message>ERROR|FATAL)"
124124
| search message="ERROR" OR message="FATAL"
125125
| table host, source, message, _raw
@@ -128,7 +128,7 @@ index=* (host="*prod*") source="/opt/eapservice/APP/log/APP.log"
128128
Recommended webhook-optimized version:
129129

130130
```spl
131-
index=* (host="*prod*") source="/opt/eapservice/APP/log/APP.log"
131+
index=*
132132
| rex field=_raw "(?<message>ERROR|FATAL)"
133133
| search message="ERROR" OR message="FATAL"
134134
| stats count as error_count
@@ -203,7 +203,7 @@ The email normalizer:
203203
- Extracts the alert name from the subject line (strips "Splunk Alert:" prefix)
204204
- Parses the HTML table into individual rows
205205
- Creates one alert per row, using the same field extraction as the webhook normalizer
206-
- If no explicit `service` field exists, derives it from the log path (e.g., `/opt/eapservice/APPNAME/log/...``APPNAME`)
206+
- If no explicit `service` field exists, derives it from the log path (e.g., `/opt/app/SERVICE_NAME/log/...``SERVICE_NAME`)
207207
- Falls back to tab-delimited or pipe-delimited plain text if no HTML table is found
208208
- Creates a single alert from the email body if no table is parseable
209209

tests/test_email_ingest.py

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
parse_plain_text_table,
88
)
99

10-
# ─── Realistic Splunk alert email (based on user's actual query output) ───
10+
# ─── Realistic Splunk alert email (example format) ──────────────────
1111

1212
SPLUNK_HTML_EMAIL = {
1313
"subject": "Splunk Alert: Production ERROR/FATAL Monitor",
@@ -24,22 +24,22 @@
2424
<th>_raw</th>
2525
</tr>
2626
<tr>
27-
<td>ihub-intraprod-std-webapp-2.int.company.com</td>
28-
<td>/opt/eapservice/REDACTED_2/logs/REDACTED_2.log</td>
27+
<td>app-prod-web-2.internal.example.com</td>
28+
<td>/opt/apps/billing-svc/logs/billing-svc.log</td>
2929
<td>ERROR</td>
30-
<td>2026-02-12 04:10:57.489 [ajp-nio-10.0.0.2-10022-exec-2]
30+
<td>2026-02-12 04:10:57.489 [http-nio-8080-exec-2]
3131
ERROR o.a.c.c.C - dispatcherServlet threw exception</td>
3232
</tr>
3333
<tr>
34-
<td>ihub-intraprod-std-webapp-2.int.company.com</td>
35-
<td>/opt/eapservice/REDACTED_2/logs/REDACTED_2.log</td>
34+
<td>app-prod-web-2.internal.example.com</td>
35+
<td>/opt/apps/billing-svc/logs/billing-svc.log</td>
3636
<td>ERROR</td>
3737
<td>2026-02-12 04:10:57.489 ERROR -
3838
Connection prematurely closed BEFORE response</td>
3939
</tr>
4040
<tr>
41-
<td>ihub-intraprod-std-webapp-1.int.company.com</td>
42-
<td>/opt/eapservice/REDACTED_1/log/REDACTED_1.log</td>
41+
<td>app-prod-web-1.internal.example.com</td>
42+
<td>/opt/apps/order-svc/log/order-svc.log</td>
4343
<td>FATAL</td>
4444
<td>2026-02-12 04:12:00.000 FATAL OutOfMemoryError: Java heap space</td>
4545
</tr>
@@ -48,8 +48,8 @@
4848
</html>
4949
""",
5050
"body_text": "",
51-
"from": "splunk@company.com",
52-
"to": "it-alerts@company.com",
51+
"from": "splunk@example.com",
52+
"to": "alerts@example.com",
5353
}
5454

5555
# Aggregated query results (what the recommended SPL produces)
@@ -62,31 +62,31 @@
6262
<tr><th>source</th><th>message</th><th>error_count</th><th>affected_hosts</th>
6363
<th>host</th><th>severity</th><th>service</th><th>latest_error</th></tr>
6464
<tr>
65-
<td>/opt/eapservice/REDACTED_2/logs/REDACTED_2.log</td>
65+
<td>/opt/apps/billing-svc/logs/billing-svc.log</td>
6666
<td>ERROR</td>
6767
<td>27</td>
6868
<td>2</td>
69-
<td>ihub-intraprod-std-webapp-2.int.company.com</td>
69+
<td>app-prod-web-2.internal.example.com</td>
7070
<td>high</td>
71-
<td>REDACTED_2</td>
71+
<td>billing-svc</td>
7272
<td>Connection prematurely closed BEFORE response</td>
7373
</tr>
7474
<tr>
75-
<td>/opt/eapservice/REDACTED_1/log/REDACTED_1.log</td>
75+
<td>/opt/apps/order-svc/log/order-svc.log</td>
7676
<td>FATAL</td>
7777
<td>3</td>
7878
<td>1</td>
79-
<td>ihub-intraprod-std-webapp-1.int.company.com</td>
79+
<td>app-prod-web-1.internal.example.com</td>
8080
<td>critical</td>
81-
<td>REDACTED_1</td>
81+
<td>order-svc</td>
8282
<td>FATAL OutOfMemoryError: Java heap space</td>
8383
</tr>
8484
</table>
8585
</body>
8686
</html>
8787
""",
88-
"from": "splunk@company.com",
89-
"to": "it-alerts@company.com",
88+
"from": "splunk@example.com",
89+
"to": "alerts@example.com",
9090
}
9191

9292
PLAIN_TEXT_EMAIL = {
@@ -96,15 +96,15 @@
9696
"db-01\t/var/log/syslog\tDisk 95% full\n"
9797
"db-02\t/var/log/syslog\tDisk 88% full"
9898
),
99-
"from": "splunk@company.com",
100-
"to": "alerts@company.com",
99+
"from": "splunk@example.com",
100+
"to": "alerts@example.com",
101101
}
102102

103103
MINIMAL_EMAIL = {
104104
"subject": "Splunk Alert: Something Happened",
105105
"body_text": "An alert was triggered but no table data is available.",
106-
"from": "splunk@company.com",
107-
"to": "alerts@company.com",
106+
"from": "splunk@example.com",
107+
"to": "alerts@example.com",
108108
}
109109

110110

@@ -229,7 +229,7 @@ def test_invalid_no_body(self):
229229

230230
class TestEmailNormalization:
231231
def test_html_email_produces_multiple_alerts(self):
232-
"""Your actual Splunk email format with 3 result rows."""
232+
"""Splunk email format with 3 result rows."""
233233
n = EmailNormalizer()
234234
alerts = n.normalize(SPLUNK_HTML_EMAIL)
235235

@@ -246,17 +246,17 @@ def test_html_email_extracts_hosts(self):
246246
alerts = n.normalize(SPLUNK_HTML_EMAIL)
247247

248248
hosts = [a.host for a in alerts]
249-
assert "ihub-intraprod-std-webapp-2.int.company.com" in hosts
250-
assert "ihub-intraprod-std-webapp-1.int.company.com" in hosts
249+
assert "app-prod-web-2.internal.example.com" in hosts
250+
assert "app-prod-web-1.internal.example.com" in hosts
251251

252252
def test_html_email_extracts_service_from_path(self):
253253
"""Service should be derived from the log path."""
254254
n = EmailNormalizer()
255255
alerts = n.normalize(SPLUNK_HTML_EMAIL)
256256

257257
services = [a.service for a in alerts]
258-
assert "REDACTED_2" in services
259-
assert "REDACTED_1" in services
258+
assert "billing-svc" in services
259+
assert "order-svc" in services
260260

261261
def test_html_email_description_from_raw(self):
262262
"""When no 'message' description field, _raw should be used."""
@@ -277,13 +277,13 @@ def test_aggregated_email_uses_explicit_fields(self):
277277
# First row: high severity
278278
a1 = alerts[0]
279279
assert a1.severity == "high"
280-
assert a1.service == "REDACTED_2"
281-
assert a1.host == "ihub-intraprod-std-webapp-2.int.company.com"
280+
assert a1.service == "billing-svc"
281+
assert a1.host == "app-prod-web-2.internal.example.com"
282282

283283
# Second row: critical severity
284284
a2 = alerts[1]
285285
assert a2.severity == "critical"
286-
assert a2.service == "REDACTED_1"
286+
assert a2.service == "order-svc"
287287
assert "OutOfMemoryError" in a2.description
288288

289289
def test_plain_text_email(self):
@@ -308,7 +308,7 @@ def test_labels_contain_email_metadata(self):
308308
alerts = n.normalize(SPLUNK_HTML_EMAIL)
309309

310310
for a in alerts:
311-
assert a.labels["splunk_email_from"] == "splunk@company.com"
311+
assert a.labels["splunk_email_from"] == "splunk@example.com"
312312
assert a.labels["splunk_search_name"] == "Production ERROR/FATAL Monitor"
313313

314314
def test_internal_fields_excluded_from_labels(self):
@@ -328,18 +328,18 @@ def test_same_service_same_incident(self):
328328
n = EmailNormalizer()
329329
alerts = n.normalize(SPLUNK_HTML_EMAIL)
330330

331-
# First two alerts are from REDACTED_2 — same service
332-
assert alerts[0].service == "REDACTED_2"
333-
assert alerts[1].service == "REDACTED_2"
334-
# Third alert is from REDACTED_1 — different service
335-
assert alerts[2].service == "REDACTED_1"
331+
# First two alerts are from billing-svc — same service
332+
assert alerts[0].service == "billing-svc"
333+
assert alerts[1].service == "billing-svc"
334+
# Third alert is from order-svc — different service
335+
assert alerts[2].service == "order-svc"
336336

337337
def test_different_hosts_same_service(self):
338338
"""Different hosts with same service should still correlate."""
339339
n = EmailNormalizer()
340340
alerts = n.normalize(SPLUNK_HTML_EMAIL)
341341

342-
# Both from REDACTED_2 on the same host — will dedup
342+
# Both from billing-svc on the same host — will dedup
343343
assert alerts[0].host == alerts[1].host
344-
# REDACTED_1 on different host — separate alert, same incident
344+
# order-svc on different host — separate alert, same incident
345345
assert alerts[2].host != alerts[0].host

0 commit comments

Comments
 (0)