Skip to content

Commit b94c282

Browse files
authored
Merge pull request #1 from springdom/claude/gifted-lovelace
feat: alert tags, notes, silences, notifications & integrations
2 parents 4e946c7 + 3ae864d commit b94c282

41 files changed

Lines changed: 4971 additions & 69 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ Open-source alert management and incident response platform. Ingest alerts from
1818
- **Full audit trail** — Every incident action (created, alert added, severity changed, acknowledged, resolved) is recorded as a timeline event.
1919
- **Auto-resolve** — When all alerts in an incident resolve, the incident auto-resolves.
2020
- **Dark ops-console dashboard** — Real-time React UI with incident/alert views, severity badges, detail panels, and one-click ack/resolve.
21+
- **Alert tagging** — Categorize alerts with free-form tags. Add/remove tags from the detail panel or via API. Stored as JSONB with GIN index.
22+
- **Investigation notes** — Add timestamped notes to alerts during investigation. Supports author attribution and full CRUD.
23+
- **Silence windows** — Suppress alerts during maintenance windows with service/severity matchers.
24+
- **Notification channels** — Configure Slack and email notification channels with severity/service filters.
2125

2226
## Architecture
2327

@@ -70,7 +74,8 @@ curl -X POST http://localhost:8000/api/v1/webhooks/generic \
7074
"severity": "critical",
7175
"service": "payment-api",
7276
"host": "web-01",
73-
"description": "CPU usage above 95% for 10 minutes"
77+
"description": "CPU usage above 95% for 10 minutes",
78+
"tags": ["production", "us-east-1"]
7479
}'
7580

7681
# Prometheus Alertmanager
@@ -162,10 +167,21 @@ curl -X POST http://localhost:8000/api/v1/webhooks/generic \
162167
| `GET` | `/api/v1/alerts/{id}` | Get alert by ID |
163168
| `POST` | `/api/v1/alerts/{id}/acknowledge` | Acknowledge alert |
164169
| `POST` | `/api/v1/alerts/{id}/resolve` | Resolve alert |
170+
| `PUT` | `/api/v1/alerts/{id}/tags` | Replace all tags |
171+
| `POST` | `/api/v1/alerts/{id}/tags/{tag}` | Add a tag |
172+
| `DELETE` | `/api/v1/alerts/{id}/tags/{tag}` | Remove a tag |
173+
| `GET` | `/api/v1/alerts/{id}/notes` | List notes |
174+
| `POST` | `/api/v1/alerts/{id}/notes` | Add a note |
175+
| `PUT` | `/api/v1/alerts/notes/{id}` | Update a note |
176+
| `DELETE` | `/api/v1/alerts/notes/{id}` | Delete a note |
165177
| `GET` | `/api/v1/incidents` | List incidents (filterable) |
166178
| `GET` | `/api/v1/incidents/{id}` | Get incident with alerts + events |
167179
| `POST` | `/api/v1/incidents/{id}/acknowledge` | Acknowledge incident + all alerts |
168180
| `POST` | `/api/v1/incidents/{id}/resolve` | Resolve incident + all alerts |
181+
| `GET` | `/api/v1/silences` | List silence windows |
182+
| `POST` | `/api/v1/silences` | Create silence window |
183+
| `GET` | `/api/v1/notifications/channels` | List notification channels |
184+
| `POST` | `/api/v1/notifications/channels` | Create notification channel |
169185

170186
## Configuration
171187

@@ -213,14 +229,16 @@ cd frontend && npm install && npm run dev
213229

214230
## Roadmap
215231

216-
- [ ] Grafana normalizer
217-
- [ ] Datadog normalizer
232+
- [x] Grafana normalizer
233+
- [x] Datadog normalizer
234+
- [x] Notification channels (Slack, email)
235+
- [x] Silence / maintenance windows
236+
- [x] Alert tagging and investigation notes
237+
- [x] Metrics and SLA tracking (MTTA, MTTR)
218238
- [ ] On-call scheduling and escalation policies
219-
- [ ] Notification channels (Slack, email, PagerDuty bridge)
220-
- [ ] Silence / maintenance windows
221239
- [ ] RBAC and multi-tenancy
222240
- [ ] Topology-aware correlation (service dependency graph)
223-
- [ ] Metrics and SLA tracking (MTTA, MTTR)
241+
- [ ] PagerDuty bridge
224242

225243
## License
226244

backend/api/routes/alerts.py

Lines changed: 151 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,67 @@
99
from backend.schemas import (
1010
AlertAckRequest,
1111
AlertListResponse,
12+
AlertNoteCreate,
13+
AlertNoteListResponse,
14+
AlertNoteResponse,
15+
AlertNoteUpdate,
1216
AlertResponse,
17+
AlertTagsUpdate,
18+
)
19+
from backend.services import (
20+
acknowledge_alert,
21+
add_alert_tag,
22+
create_alert_note,
23+
delete_alert_note,
24+
get_alert_notes,
25+
get_alerts,
26+
remove_alert_tag,
27+
resolve_alert,
28+
update_alert_note,
29+
update_alert_tags,
1330
)
14-
from backend.services import acknowledge_alert, get_alerts, resolve_alert
1531

1632
router = APIRouter(prefix="/alerts", tags=["alerts"])
1733

1834

35+
# ─── Note routes (must be before /{alert_id} to avoid UUID collision) ──
36+
37+
38+
@router.put(
39+
"/notes/{note_id}",
40+
response_model=AlertNoteResponse,
41+
summary="Update a note",
42+
)
43+
async def edit_note(
44+
note_id: uuid.UUID,
45+
body: AlertNoteUpdate,
46+
db: AsyncSession = Depends(get_db),
47+
) -> AlertNoteResponse:
48+
"""Update the text of an existing note."""
49+
note = await update_alert_note(db, str(note_id), body.text)
50+
if not note:
51+
raise HTTPException(status_code=404, detail="Note not found")
52+
return AlertNoteResponse.model_validate(note)
53+
54+
55+
@router.delete(
56+
"/notes/{note_id}",
57+
status_code=204,
58+
summary="Delete a note",
59+
)
60+
async def remove_note(
61+
note_id: uuid.UUID,
62+
db: AsyncSession = Depends(get_db),
63+
) -> None:
64+
"""Delete a note."""
65+
deleted = await delete_alert_note(db, str(note_id))
66+
if not deleted:
67+
raise HTTPException(status_code=404, detail="Note not found")
68+
69+
70+
# ─── Alert list & detail ──────────────────────────────────
71+
72+
1973
@router.get(
2074
"",
2175
response_model=AlertListResponse,
@@ -67,6 +121,9 @@ async def get_alert(
67121
return AlertResponse.model_validate(alert)
68122

69123

124+
# ─── Alert actions ─────────────────────────────────────────
125+
126+
70127
@router.post(
71128
"/{alert_id}/acknowledge",
72129
response_model=AlertResponse,
@@ -103,3 +160,96 @@ async def resolve(
103160
raise HTTPException(status_code=404, detail="Alert not found")
104161

105162
return AlertResponse.model_validate(alert)
163+
164+
165+
# ─── Tags ──────────────────────────────────────────────────
166+
167+
168+
@router.put(
169+
"/{alert_id}/tags",
170+
response_model=AlertResponse,
171+
summary="Set alert tags",
172+
)
173+
async def set_tags(
174+
alert_id: uuid.UUID,
175+
body: AlertTagsUpdate,
176+
db: AsyncSession = Depends(get_db),
177+
) -> AlertResponse:
178+
"""Replace all tags on an alert."""
179+
alert = await update_alert_tags(db, str(alert_id), body.tags)
180+
if not alert:
181+
raise HTTPException(status_code=404, detail="Alert not found")
182+
return AlertResponse.model_validate(alert)
183+
184+
185+
@router.post(
186+
"/{alert_id}/tags/{tag}",
187+
response_model=AlertResponse,
188+
summary="Add a tag to an alert",
189+
)
190+
async def add_tag(
191+
alert_id: uuid.UUID,
192+
tag: str,
193+
db: AsyncSession = Depends(get_db),
194+
) -> AlertResponse:
195+
"""Add a single tag to an alert."""
196+
alert = await add_alert_tag(db, str(alert_id), tag)
197+
if not alert:
198+
raise HTTPException(status_code=404, detail="Alert not found")
199+
return AlertResponse.model_validate(alert)
200+
201+
202+
@router.delete(
203+
"/{alert_id}/tags/{tag}",
204+
response_model=AlertResponse,
205+
summary="Remove a tag from an alert",
206+
)
207+
async def delete_tag(
208+
alert_id: uuid.UUID,
209+
tag: str,
210+
db: AsyncSession = Depends(get_db),
211+
) -> AlertResponse:
212+
"""Remove a single tag from an alert."""
213+
alert = await remove_alert_tag(db, str(alert_id), tag)
214+
if not alert:
215+
raise HTTPException(status_code=404, detail="Alert not found")
216+
return AlertResponse.model_validate(alert)
217+
218+
219+
# ─── Notes ─────────────────────────────────────────────────
220+
221+
222+
@router.get(
223+
"/{alert_id}/notes",
224+
response_model=AlertNoteListResponse,
225+
summary="List alert notes",
226+
)
227+
async def list_notes(
228+
alert_id: uuid.UUID,
229+
db: AsyncSession = Depends(get_db),
230+
) -> AlertNoteListResponse:
231+
"""Get all notes for an alert."""
232+
notes = await get_alert_notes(db, str(alert_id))
233+
return AlertNoteListResponse(
234+
notes=[AlertNoteResponse.model_validate(n) for n in notes],
235+
total=len(notes),
236+
)
237+
238+
239+
@router.post(
240+
"/{alert_id}/notes",
241+
response_model=AlertNoteResponse,
242+
status_code=201,
243+
summary="Add a note to an alert",
244+
)
245+
async def add_note(
246+
alert_id: uuid.UUID,
247+
body: AlertNoteCreate,
248+
db: AsyncSession = Depends(get_db),
249+
) -> AlertNoteResponse:
250+
"""Add a timestamped note to an alert."""
251+
try:
252+
note = await create_alert_note(db, str(alert_id), body.text, body.author)
253+
except ValueError:
254+
raise HTTPException(status_code=404, detail="Alert not found")
255+
return AlertNoteResponse.model_validate(note)

0 commit comments

Comments
 (0)