From 7163588e29112ed3cbb196fbfbc0aba5c0e1b88f Mon Sep 17 00:00:00 2001 From: Ankit Juneja Date: Thu, 14 May 2026 22:10:44 +0530 Subject: [PATCH 01/11] feat(runbooks): runbook-aware diagnosis grounding (#1073 phase 2a) --- app/cli/commands/__init__.py | 2 + app/cli/commands/runbook.py | 54 +++++ .../command_registry/cli_parity.py | 10 + .../publish_findings/formatters/report.py | 16 ++ .../publish_findings/report_context.py | 15 ++ app/runbooks/__init__.py | 21 ++ app/runbooks/retrieval.py | 67 +++++++ app/runbooks/store.py | 185 ++++++++++++++++++ app/state/agent_state.py | 7 + docs/runbooks.mdx | 94 +++++++++ tests/cli/test_runbook_command.py | 98 ++++++++++ tests/delivery/test_report_provenance.py | 65 ++++++ .../plan_actions/test_runbook_retrieval.py | 100 ++++++++++ .../test_runbook_injection.py | 50 +++++ tests/runbooks/__init__.py | 0 tests/runbooks/test_retrieval.py | 107 ++++++++++ tests/runbooks/test_store.py | 122 ++++++++++++ .../runbooks/001-payments-oom/alert.json | 15 ++ .../runbooks/001-payments-oom/answer.yml | 7 + .../runbooks/001-payments-oom/runbook.md | 16 ++ .../runbooks/001-payments-oom/scenario.yml | 11 ++ tests/synthetic/runbooks/__init__.py | 0 .../synthetic/runbooks/test_runbook_suite.py | 94 +++++++++ 23 files changed, 1156 insertions(+) create mode 100644 app/cli/commands/runbook.py create mode 100644 app/runbooks/__init__.py create mode 100644 app/runbooks/retrieval.py create mode 100644 app/runbooks/store.py create mode 100644 docs/runbooks.mdx create mode 100644 tests/cli/test_runbook_command.py create mode 100644 tests/nodes/plan_actions/test_runbook_retrieval.py create mode 100644 tests/nodes/root_cause_diagnosis/test_runbook_injection.py create mode 100644 tests/runbooks/__init__.py create mode 100644 tests/runbooks/test_retrieval.py create mode 100644 tests/runbooks/test_store.py create mode 100644 tests/synthetic/runbooks/001-payments-oom/alert.json create mode 100644 tests/synthetic/runbooks/001-payments-oom/answer.yml create mode 100644 tests/synthetic/runbooks/001-payments-oom/runbook.md create mode 100644 tests/synthetic/runbooks/001-payments-oom/scenario.yml create mode 100644 tests/synthetic/runbooks/__init__.py create mode 100644 tests/synthetic/runbooks/test_runbook_suite.py diff --git a/app/cli/commands/__init__.py b/app/cli/commands/__init__.py index d6939a159..e1989d5d9 100644 --- a/app/cli/commands/__init__.py +++ b/app/cli/commands/__init__.py @@ -20,6 +20,7 @@ from app.cli.commands.messaging import messaging from app.cli.commands.onboard import onboard from app.cli.commands.remote import remote +from app.cli.commands.runbook import runbook from app.cli.commands.tests import tests from app.cli.commands.watchdog import watchdog_command @@ -35,6 +36,7 @@ messaging, hermes_command, watchdog_command, + runbook, health_command, doctor_command, update_command, diff --git a/app/cli/commands/runbook.py b/app/cli/commands/runbook.py new file mode 100644 index 000000000..c04a80c36 --- /dev/null +++ b/app/cli/commands/runbook.py @@ -0,0 +1,54 @@ +"""``opensre runbook`` CLI group — manage local diagnosis runbooks.""" + +from __future__ import annotations + +from pathlib import Path + +import click + +from app.runbooks.store import ( + RunbookValidationError, + _runbook_dir, + load_all, + remove, + save, +) + + +@click.group(name="runbook") +def runbook() -> None: + """Manage local runbooks that ground diagnosis remediation steps.""" + + +@runbook.command("add") +@click.argument("path", type=click.Path(exists=True, dir_okay=False, path_type=Path)) +def runbook_add(path: Path) -> None: + """Copy a markdown runbook into ~/.config/opensre/runbooks/.""" + try: + stored = save(path) + except RunbookValidationError as exc: + raise click.ClickException(str(exc)) from exc + click.echo(f"✓ Saved runbook '{stored.slug}' to {stored.path}") + + +@runbook.command("list") +def runbook_list() -> None: + """List runbooks currently in the local store.""" + runbooks = load_all() + if not runbooks: + click.echo(f"No runbooks found in {_runbook_dir()}") + return + for rb in runbooks: + service = rb.service or "-" + triggers = ", ".join(rb.triggers) + click.echo(f"{rb.slug} service={service} triggers=[{triggers}]") + + +@runbook.command("remove") +@click.argument("slug") +def runbook_remove(slug: str) -> None: + """Delete a runbook by slug (the filename without .md).""" + if remove(slug): + click.echo(f"✓ Removed runbook '{slug}'") + return + raise click.ClickException(f"no runbook with slug '{slug}' in {_runbook_dir()}") diff --git a/app/cli/interactive_shell/command_registry/cli_parity.py b/app/cli/interactive_shell/command_registry/cli_parity.py index 348e0968a..bf2444b19 100644 --- a/app/cli/interactive_shell/command_registry/cli_parity.py +++ b/app/cli/interactive_shell/command_registry/cli_parity.py @@ -234,6 +234,10 @@ def _cmd_watchdog(session: ReplSession, console: Console, args: list[str]) -> bo return run_cli_command(console, ["watchdog", *args]) +def _cmd_runbook(session: ReplSession, console: Console, args: list[str]) -> bool: # noqa: ARG001 + return run_cli_command(console, ["runbook", *args]) + + COMMANDS: list[SlashCommand] = [ SlashCommand( "/onboard", @@ -296,4 +300,10 @@ def _cmd_watchdog(session: ReplSession, console: Console, args: list[str]) -> bo _cmd_watchdog, execution_tier=ExecutionTier.SAFE, ), + SlashCommand( + "/runbook", + "manage local runbooks for diagnosis grounding ('/runbook add|list|remove')", + _cmd_runbook, + execution_tier=ExecutionTier.SAFE, + ), ] diff --git a/app/delivery/publish_findings/formatters/report.py b/app/delivery/publish_findings/formatters/report.py index c22a852cb..615d2dfca 100644 --- a/app/delivery/publish_findings/formatters/report.py +++ b/app/delivery/publish_findings/formatters/report.py @@ -502,6 +502,9 @@ def format_slack_message(ctx: ReportContext) -> str: + "\n".join(f"• {_sanitize_for_slack(s)}" for s in remediation_steps) + "\n" ) + runbook_provenance = ctx.get("runbook_provenance") + if runbook_provenance and runbook_provenance.get("slug"): + remediation_block += f"_Source: runbooks/{runbook_provenance['slug']}.md_\n" trace_steps = build_investigation_trace(ctx) trace_block = ( @@ -705,6 +708,19 @@ def _add(block: "dict[str, Any] | None") -> None: } ) _add(_mrkdwn_section("\n".join(f"• {_sanitize_for_slack(s)}" for s in remediation_steps))) + runbook_provenance = ctx.get("runbook_provenance") + if runbook_provenance and runbook_provenance.get("slug"): + blocks.append( + { + "type": "context", + "elements": [ + { + "type": "mrkdwn", + "text": f"_Source: runbooks/{runbook_provenance['slug']}.md_", + } + ], + } + ) # ── Investigation Trace ── trace_steps = build_investigation_trace(ctx) diff --git a/app/delivery/publish_findings/report_context.py b/app/delivery/publish_findings/report_context.py index da5c16335..93f4b958a 100644 --- a/app/delivery/publish_findings/report_context.py +++ b/app/delivery/publish_findings/report_context.py @@ -95,6 +95,10 @@ class ReportContext(TypedDict, total=False): # Alert severity (e.g. critical, high) for channel-specific formatting (Telegram, etc.) severity: str | None + # Runbook provenance: top-1 runbook used to ground remediation_steps + # (populated by pipeline → matched_runbook). + runbook_provenance: dict[str, str] | None + kube_pod_name: str | None kube_container_name: str | None kube_namespace: str | None @@ -897,6 +901,16 @@ def build_report_context(state: InvestigationState) -> ReportContext: """ ns = _NormalizedState(state) source_provenance = _build_source_provenance(ns.available_sources) + matched_runbook = state.get("matched_runbook") + runbook_provenance: dict[str, str] | None = ( + { + "slug": str(matched_runbook.get("slug", "")), + "title": str(matched_runbook.get("title") or matched_runbook.get("slug", "")), + "path": str(matched_runbook.get("path", "")), + } + if isinstance(matched_runbook, dict) and matched_runbook.get("slug") + else None + ) catalog, source_to_id = _build_evidence_catalog(ns) # Add provenance summaries to evidence entries when possible. for source_name, entry_id in source_to_id.items(): @@ -955,6 +969,7 @@ def build_report_context(state: InvestigationState) -> ReportContext: "datadog_site": ns.datadog_site, "source_provenance": source_provenance, "severity": (state.get("severity") or None), + "runbook_provenance": runbook_provenance, # Kubernetes pod details — from Datadog evidence first, then alert annotations "kube_pod_name": ( ns.evidence.get("datadog_pod_name") diff --git a/app/runbooks/__init__.py b/app/runbooks/__init__.py new file mode 100644 index 000000000..25fe51c42 --- /dev/null +++ b/app/runbooks/__init__.py @@ -0,0 +1,21 @@ +"""Local runbook store + retrieval used to ground diagnosis remediation steps.""" + +from app.runbooks.retrieval import retrieve_matching_runbook +from app.runbooks.store import ( + RUNBOOK_DIR, + Runbook, + RunbookValidationError, + load_all, + remove, + save, +) + +__all__ = [ + "RUNBOOK_DIR", + "Runbook", + "RunbookValidationError", + "load_all", + "remove", + "retrieve_matching_runbook", + "save", +] diff --git a/app/runbooks/retrieval.py b/app/runbooks/retrieval.py new file mode 100644 index 000000000..f27ba318d --- /dev/null +++ b/app/runbooks/retrieval.py @@ -0,0 +1,67 @@ +"""Deterministic top-1 runbook retrieval. + +Pure scoring — no disk I/O, no LLM calls. The caller is responsible for +loading the candidate runbooks (see ``app.runbooks.store.load_all``) and for +producing keyword/service inputs from the current alert state. +""" + +from __future__ import annotations + +from app.runbooks.store import Runbook + + +def _score( + runbook: Runbook, + keyword_set: frozenset[str], + service: str | None, + pipeline_name: str | None, +) -> int: + """Score a single runbook against the current alert. + + +2 when ``runbook.service`` matches the alert service or pipeline name. + +1 for each shared trigger keyword. + """ + service_score = 0 + if runbook.service: + rb_service = runbook.service.lower() + if (service and rb_service == service.lower()) or ( + pipeline_name and rb_service == pipeline_name.lower() + ): + service_score = 2 + + keyword_overlap = len(keyword_set & set(runbook.triggers)) + return service_score + keyword_overlap + + +def retrieve_matching_runbook( + runbooks: list[Runbook], + keywords: list[str], + service: str | None, + pipeline_name: str | None, +) -> Runbook | None: + """Return the top-1 runbook by score, or ``None`` when nothing matches. + + Ties broken by slug (sorted ascending) for deterministic output. + """ + if not runbooks: + return None + + keyword_set = frozenset(k.lower() for k in keywords if k) + best: tuple[int, str] | None = None + winner: Runbook | None = None + + for runbook in runbooks: + score = _score(runbook, keyword_set, service, pipeline_name) + if score <= 0: + continue + candidate = (score, runbook.slug) + # Higher score wins; on tie, lexicographically smaller slug wins. + if ( + best is None + or candidate[0] > best[0] + or (candidate[0] == best[0] and candidate[1] < best[1]) + ): + best = candidate + winner = runbook + + return winner diff --git a/app/runbooks/store.py b/app/runbooks/store.py new file mode 100644 index 000000000..2735cb5c1 --- /dev/null +++ b/app/runbooks/store.py @@ -0,0 +1,185 @@ +"""Disk-backed store for user-authored markdown runbooks. + +A runbook is a Markdown file with YAML frontmatter that declares the alert +shapes it applies to. Files live under ``~/.config/opensre/runbooks/`` and are +loaded lazily by the planner during an investigation. + +Required frontmatter keys: + triggers: list[str] # lowercase keyword fragments matched against the alert + +Optional frontmatter keys: + service: str # matched against pipeline_name / alert service + category: str # informational; surfaced in `runbook list` + title: str # falls back to slug when absent +""" + +from __future__ import annotations + +import re +import shutil +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import yaml # type: ignore[import-untyped] + +from app.constants import OPENSRE_HOME_DIR + +RUNBOOK_DIR: Path = OPENSRE_HOME_DIR / "runbooks" + +_FRONTMATTER_RE = re.compile(r"\A---\s*\n(.*?)\n---\s*\n", re.DOTALL) + + +class RunbookValidationError(ValueError): + """Raised when a runbook file is missing required frontmatter.""" + + +@dataclass(frozen=True) +class Runbook: + """Parsed runbook ready for retrieval and prompt injection.""" + + slug: str + title: str + service: str | None + category: str | None + triggers: tuple[str, ...] + body: str + path: Path + extra: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + """Serialize into a plain dict for the LangGraph state envelope.""" + return { + "slug": self.slug, + "title": self.title, + "service": self.service, + "category": self.category, + "triggers": list(self.triggers), + "body": self.body, + "path": str(self.path), + } + + +def _runbook_dir() -> Path: + """Return RUNBOOK_DIR, re-resolved against the current OPENSRE_HOME_DIR. + + Tests monkeypatch ``app.constants.OPENSRE_HOME_DIR`` to redirect the store + onto a tmp_path. The module-level ``RUNBOOK_DIR`` constant is captured at + import time, so we re-resolve through the constants module here to honor + the monkeypatch. + """ + from app import constants as _constants + + return _constants.OPENSRE_HOME_DIR / "runbooks" + + +def _parse_frontmatter(text: str) -> tuple[dict[str, Any], str]: + """Return (frontmatter_dict, body_without_frontmatter).""" + match = _FRONTMATTER_RE.match(text) + if not match: + return {}, text + raw = match.group(1) + parsed = yaml.safe_load(raw) + if not isinstance(parsed, dict): + parsed = {} + return parsed, text[match.end() :] + + +def _coerce_triggers(value: Any) -> tuple[str, ...]: + """Normalize a frontmatter ``triggers`` value into a tuple of lowercase strings.""" + if value is None: + return () + if isinstance(value, str): + return (value.strip().lower(),) if value.strip() else () + if isinstance(value, list | tuple): + return tuple(str(item).strip().lower() for item in value if str(item).strip()) + return () + + +def _parse_runbook_file(path: Path) -> Runbook: + """Read and parse a single runbook file. + + Raises: + RunbookValidationError: when required frontmatter is missing. + """ + text = path.read_text(encoding="utf-8") + frontmatter, body = _parse_frontmatter(text) + triggers = _coerce_triggers(frontmatter.get("triggers")) + if not triggers: + raise RunbookValidationError( + f"{path.name}: frontmatter must include a non-empty 'triggers' list" + ) + + slug = path.stem + title_value = frontmatter.get("title") + title = ( + str(title_value).strip() + if title_value and str(title_value).strip() + else slug.replace("-", " ").replace("_", " ").title() + ) + service_value = frontmatter.get("service") + service = str(service_value).strip() or None if service_value else None + category_value = frontmatter.get("category") + category = str(category_value).strip() or None if category_value else None + + extra = { + key: value + for key, value in frontmatter.items() + if key not in {"title", "service", "category", "triggers"} + } + + return Runbook( + slug=slug, + title=title, + service=service, + category=category, + triggers=triggers, + body=body.strip(), + path=path, + extra=extra, + ) + + +def load_all() -> list[Runbook]: + """Load every valid runbook under ``RUNBOOK_DIR``. + + Missing directory → ``[]``. Invalid files are skipped silently so a single + bad runbook can never break the planner during an investigation. + """ + directory = _runbook_dir() + if not directory.exists(): + return [] + runbooks: list[Runbook] = [] + for path in sorted(directory.glob("*.md")): + try: + runbooks.append(_parse_runbook_file(path)) + except (RunbookValidationError, OSError, yaml.YAMLError): + continue + return runbooks + + +def save(source: Path) -> Runbook: + """Validate ``source`` and copy it into the runbook store. + + Returns the parsed Runbook after copying. Slug = source.stem. + """ + if not source.exists() or not source.is_file(): + raise FileNotFoundError(f"runbook source not found: {source}") + + parsed = _parse_runbook_file(source) + + directory = _runbook_dir() + directory.mkdir(parents=True, exist_ok=True) + dest = directory / f"{parsed.slug}.md" + shutil.copyfile(source, dest) + + return _parse_runbook_file(dest) + + +def remove(slug: str) -> bool: + """Delete ``RUNBOOK_DIR/.md``. Return True if the file existed.""" + target = _runbook_dir() / f"{slug}.md" + if not target.exists(): + return False + target.unlink() + return True diff --git a/app/state/agent_state.py b/app/state/agent_state.py index 93d99003d..7582151a3 100644 --- a/app/state/agent_state.py +++ b/app/state/agent_state.py @@ -57,6 +57,12 @@ class AgentState(TypedDict, total=False): available_sources: dict[str, dict] available_action_names: list[str] + # Top-1 runbook matched by `retrieve_matching_runbook` during plan_actions. + # Populated only when a runbook in ~/.config/opensre/runbooks/ scores > 0 + # for the current alert. Consumed by the diagnosis prompt builder + # (grounds remediation_steps) and the publish_findings provenance line. + matched_runbook: dict[str, Any] | None + # Tool budget enforcement - caps the number of tools per investigation step tool_budget: int # Maximum tools to select per step (default: 10) @@ -167,6 +173,7 @@ class AgentStateModel(StrictConfigModel): retrieval_controls: RetrievalControlsMap | None = None available_sources: dict[str, dict[str, Any]] = Field(default_factory=dict) available_action_names: list[str] = Field(default_factory=list) + matched_runbook: dict[str, Any] | None = None tool_budget: int = Field( default=10, ge=1, le=50, description="Maximum tools to select per step" ) diff --git a/docs/runbooks.mdx b/docs/runbooks.mdx new file mode 100644 index 000000000..70038eb2a --- /dev/null +++ b/docs/runbooks.mdx @@ -0,0 +1,94 @@ +--- +title: "Runbooks" +--- + +OpenSRE can ground its remediation suggestions in your team's own runbooks. +Drop a Markdown file with YAML frontmatter into the local runbook store and +the diagnosis prompt will inject the matched runbook when the current alert +fires — so `Recommended Actions` mirrors your team's playbook instead of a +generic category template. + +### Where they live + +Runbooks live under `~/.config/opensre/runbooks/`. One file per playbook, one +slug per filename (the filename without `.md` is the slug). + +### File format + +```markdown +--- +service: payments-api # optional — matches pipeline / alert service +triggers: # required — lowercase keyword fragments + - oom + - memory + - exit code 137 +category: resource_exhaustion # optional — informational only +title: Payments API OOM playbook # optional — falls back to slug +--- +# Payments API OOM playbook + +When `payments-api` pods restart with exit code 137: + +1. Bump JVM `-Xmx` from 1.5G → 2G in the `payments-api` Helm values. +2. Page `#payments-oncall` before scaling — owner approval required. +3. Verify recovery via the Grafana dashboard `payments-api/jvm-memory`. +``` + +Only `triggers` is required. Files with missing/empty `triggers` are skipped +on load. + +### CLI + +```bash +# Copy a runbook into the local store +opensre runbook add ./payments-oom.md + +# Show every runbook currently in the store +opensre runbook list + +# Delete a runbook by slug +opensre runbook remove payments-oom +``` + +### How matching works + +During `plan_actions`, OpenSRE scores every runbook against the current alert +and picks the highest-scoring one (top-1). When the top score is 0, nothing +is matched and remediation falls back to the deterministic category template +shipped in Phase 1. + +``` +score = (2 if runbook.service matches the alert service or pipeline_name) + + |alert keywords ∩ runbook.triggers| +``` + +Keyword extraction is shared with the planner +(`app.nodes.plan_actions.extract_keywords`), so any term in +`KEYWORD_PATTERNS` that appears in the alert name or problem statement +contributes to the overlap. + +Ties on score are broken by slug (lexicographic), so output is deterministic. + +### Where it shows up + +When a runbook matches, the diagnosis prompt receives a `RELEVANT TEAM +RUNBOOK` block and is instructed to prefer actions described inside. +The final published report appends a provenance line under +`Recommended Actions`: + +``` +## Recommended Actions +• Bump JVM -Xmx from 1.5G → 2G in payments-api Helm values (per runbook) +• Page #payments-oncall channel before scaling — owner approval required +• Verify recovery via Grafana dashboard payments-api/jvm-memory +_Source: runbooks/payments-oom.md_ +``` + +The Slack Block Kit renderer emits an equivalent `context` block below +`Recommended Actions`. + +### Out of scope (deferred to Phase 2b) + +- Remote ingest from Notion / Google Docs / Confluence (`opensre runbook import …`) +- Vector retrieval — current matching is deterministic and auditable by design +- Multi-tenant scoping by `org_id` — single-tenant per home dir today diff --git a/tests/cli/test_runbook_command.py b/tests/cli/test_runbook_command.py new file mode 100644 index 000000000..a69ddc9ac --- /dev/null +++ b/tests/cli/test_runbook_command.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest +from click.testing import CliRunner + +from app.cli.__main__ import cli + + +def _patch_home(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Path: + home = tmp_path / "opensre_home" + monkeypatch.setattr("app.constants.OPENSRE_HOME_DIR", home) + return home + + +def _make_fixture(path: Path, *, with_triggers: bool = True) -> Path: + frontmatter = "service: payments-api\n" + if with_triggers: + frontmatter += "triggers:\n - oom\n - memory\n" + path.write_text(f"---\n{frontmatter}---\n# body\n", encoding="utf-8") + return path + + +def test_runbook_add_writes_to_store(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + home = _patch_home(monkeypatch, tmp_path) + source = _make_fixture(tmp_path / "payments-oom.md") + + runner = CliRunner() + result = runner.invoke(cli, ["runbook", "add", str(source)]) + + assert result.exit_code == 0, result.output + assert "Saved runbook 'payments-oom'" in result.output + assert (home / "runbooks" / "payments-oom.md").exists() + + +def test_runbook_add_rejects_invalid_frontmatter( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + _patch_home(monkeypatch, tmp_path) + source = _make_fixture(tmp_path / "bad.md", with_triggers=False) + + runner = CliRunner() + result = runner.invoke(cli, ["runbook", "add", str(source)]) + + assert result.exit_code != 0 + assert "triggers" in result.output + + +def test_runbook_list_shows_entries(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + home = _patch_home(monkeypatch, tmp_path) + target = home / "runbooks" / "payments-oom.md" + target.parent.mkdir(parents=True, exist_ok=True) + _make_fixture(target) + + runner = CliRunner() + result = runner.invoke(cli, ["runbook", "list"]) + + assert result.exit_code == 0 + assert "payments-oom" in result.output + assert "service=payments-api" in result.output + assert "oom" in result.output + + +def test_runbook_list_empty_message(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + _patch_home(monkeypatch, tmp_path) + + runner = CliRunner() + result = runner.invoke(cli, ["runbook", "list"]) + + assert result.exit_code == 0 + assert "No runbooks found" in result.output + + +def test_runbook_remove_deletes_file(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + home = _patch_home(monkeypatch, tmp_path) + target = home / "runbooks" / "payments-oom.md" + target.parent.mkdir(parents=True, exist_ok=True) + _make_fixture(target) + + runner = CliRunner() + result = runner.invoke(cli, ["runbook", "remove", "payments-oom"]) + + assert result.exit_code == 0 + assert "Removed runbook 'payments-oom'" in result.output + assert not target.exists() + + +def test_runbook_remove_unknown_slug_errors( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + _patch_home(monkeypatch, tmp_path) + + runner = CliRunner() + result = runner.invoke(cli, ["runbook", "remove", "missing"]) + + assert result.exit_code != 0 + assert "no runbook" in result.output diff --git a/tests/delivery/test_report_provenance.py b/tests/delivery/test_report_provenance.py index 4c9a53189..92068942c 100644 --- a/tests/delivery/test_report_provenance.py +++ b/tests/delivery/test_report_provenance.py @@ -244,3 +244,68 @@ def test_build_slack_blocks_omits_recommended_actions_when_empty() -> None: header_texts = [b.get("text", {}).get("text", "") for b in blocks if b.get("type") == "header"] assert not any("Recommended Actions" in t for t in header_texts) + + +def test_runbook_provenance_added_when_state_has_matched_runbook() -> None: + state = _make_state() + state["matched_runbook"] = { + "slug": "payments-oom", + "title": "Payments OOM", + "path": "/home/user/.config/opensre/runbooks/payments-oom.md", + "body": "Bump heap.", + } + + ctx = build_report_context(state) + + assert ctx["runbook_provenance"] == { + "slug": "payments-oom", + "title": "Payments OOM", + "path": "/home/user/.config/opensre/runbooks/payments-oom.md", + } + + +def test_runbook_provenance_none_when_state_missing_match() -> None: + ctx = build_report_context(_make_state()) + + assert ctx["runbook_provenance"] is None + + +def test_format_slack_message_renders_runbook_source_line() -> None: + state = _make_state() + state["remediation_steps"] = ["Bump heap to 2G."] + state["matched_runbook"] = {"slug": "payments-oom", "body": "x"} + ctx = build_report_context(state) + + message = format_slack_message(ctx) + + assert "## Recommended Actions" in message + assert "_Source: runbooks/payments-oom.md_" in message + + +def test_format_slack_message_omits_runbook_source_when_no_match() -> None: + state = _make_state() + state["remediation_steps"] = ["Bump heap to 2G."] + ctx = build_report_context(state) + + message = format_slack_message(ctx) + + assert "_Source: runbooks/" not in message + + +def test_build_slack_blocks_emits_runbook_context_block() -> None: + state = _make_state() + state["remediation_steps"] = ["Bump heap to 2G."] + state["matched_runbook"] = {"slug": "payments-oom", "body": "x"} + ctx = build_report_context(state) + + blocks = build_slack_blocks(ctx) + + context_blocks = [b for b in blocks if b.get("type") == "context"] + runbook_text = "_Source: runbooks/payments-oom.md_" + assert any( + any( + isinstance(el, dict) and runbook_text in el.get("text", "") + for el in block.get("elements", []) + ) + for block in context_blocks + ) diff --git a/tests/nodes/plan_actions/test_runbook_retrieval.py b/tests/nodes/plan_actions/test_runbook_retrieval.py new file mode 100644 index 000000000..196529313 --- /dev/null +++ b/tests/nodes/plan_actions/test_runbook_retrieval.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import pytest + +from app.nodes.investigate.models import InvestigateInput +from app.nodes.plan_actions import node as node_module + + +class _TrackerStub: + def start(self, _name: str, _message: str) -> None: + return + + def complete(self, _name: str, *, fields_updated: list[str], message: str) -> None: + assert message + self.fields_updated = fields_updated + + +def _patch_home(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Path: + home = tmp_path / "opensre_home" + monkeypatch.setattr("app.constants.OPENSRE_HOME_DIR", home) + return home + + +def _write_runbook(home: Path, slug: str, frontmatter: str, body: str = "body") -> None: + target = home / "runbooks" / f"{slug}.md" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(f"---\n{frontmatter}\n---\n{body}", encoding="utf-8") + + +def _stub_plan_actions(monkeypatch: pytest.MonkeyPatch) -> None: + plan = node_module.InvestigationPlan( + actions=["get_logs"], + rationale="Logs around failures.", + retrieval_controls=None, + ) + monkeypatch.setattr( + node_module.InvestigateInput, + "from_state", + lambda _state: InvestigateInput(raw_alert={}, context={}, tool_budget=10), + ) + monkeypatch.setattr( + node_module, + "build_plan_actions", + lambda **_kwargs: (plan, {"knowledge": {}}, ["get_logs"], [], False, "", []), + ) + monkeypatch.setattr(node_module, "get_tracker", lambda: _TrackerStub()) + + +def test_node_plan_actions_attaches_matched_runbook( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + home = _patch_home(monkeypatch, tmp_path) + _write_runbook( + home, + "payments-oom", + frontmatter="service: payments-api\ntriggers:\n - oom\n - memory", + ) + _stub_plan_actions(monkeypatch) + + state: dict[str, Any] = { + "raw_alert": {}, + "context": {}, + "resolved_integrations": {}, + "investigation_loop_count": 0, + "problem_md": "payments-api OOMKilled with memory exhaustion.", + "alert_name": "PaymentsOOM", + "pipeline_name": "payments-api", + "alert_json": {"commonLabels": {"service": "payments-api"}}, + } + + result = node_module.node_plan_actions(state) + + matched = result["matched_runbook"] + assert matched is not None + assert matched["slug"] == "payments-oom" + assert matched["service"] == "payments-api" + + +def test_node_plan_actions_matched_runbook_none_when_no_match( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + _patch_home(monkeypatch, tmp_path) + _stub_plan_actions(monkeypatch) + + result = node_module.node_plan_actions( + { + "raw_alert": {}, + "context": {}, + "resolved_integrations": {}, + "investigation_loop_count": 0, + "problem_md": "", + "alert_name": "SomethingElse", + "pipeline_name": "other", + } + ) + + assert result["matched_runbook"] is None diff --git a/tests/nodes/root_cause_diagnosis/test_runbook_injection.py b/tests/nodes/root_cause_diagnosis/test_runbook_injection.py new file mode 100644 index 000000000..d97a4ce79 --- /dev/null +++ b/tests/nodes/root_cause_diagnosis/test_runbook_injection.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from app.nodes.root_cause_diagnosis.directives import _build_runbook_section +from app.nodes.root_cause_diagnosis.prompt_builder import build_diagnosis_prompt + + +def _base_state() -> dict[str, object]: + return { + "problem_md": "payments-api OOMKilled.", + "alert_name": "PaymentsOOM", + "hypotheses": [], + "raw_alert": {}, + } + + +def test_build_runbook_section_returns_empty_when_no_match() -> None: + assert _build_runbook_section(None) == "" + assert _build_runbook_section({}) == "" + + +def test_build_runbook_section_includes_slug_and_body() -> None: + section = _build_runbook_section({"slug": "payments-oom", "body": "Bump heap to 2G."}) + + assert "RELEVANT TEAM RUNBOOK (payments-oom)" in section + assert "Bump heap to 2G." in section + assert "Cite the runbook slug" in section + + +def test_build_runbook_section_truncates_long_body() -> None: + big = "x" * 5000 + section = _build_runbook_section({"slug": "s", "body": big}) + + assert "x" * 2000 in section + assert "x" * 2001 not in section + + +def test_diagnosis_prompt_injects_matched_runbook() -> None: + state = _base_state() + state["matched_runbook"] = {"slug": "payments-oom", "body": "Page on-call."} + + prompt = build_diagnosis_prompt(state, evidence={}) + + assert "RELEVANT TEAM RUNBOOK (payments-oom)" in prompt + assert "Page on-call." in prompt + + +def test_diagnosis_prompt_omits_section_without_matched_runbook() -> None: + prompt = build_diagnosis_prompt(_base_state(), evidence={}) + + assert "RELEVANT TEAM RUNBOOK" not in prompt diff --git a/tests/runbooks/__init__.py b/tests/runbooks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/runbooks/test_retrieval.py b/tests/runbooks/test_retrieval.py new file mode 100644 index 000000000..9af7ae204 --- /dev/null +++ b/tests/runbooks/test_retrieval.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +from pathlib import Path + +from app.runbooks.retrieval import retrieve_matching_runbook +from app.runbooks.store import Runbook + + +def _rb( + slug: str, + triggers: tuple[str, ...] = (), + service: str | None = None, +) -> Runbook: + return Runbook( + slug=slug, + title=slug, + service=service, + category=None, + triggers=triggers, + body="", + path=Path(f"/tmp/{slug}.md"), + ) + + +def test_returns_none_when_no_matches() -> None: + runbooks = [_rb("a", triggers=("oom",), service="foo")] + + result = retrieve_matching_runbook( + runbooks=runbooks, + keywords=["cpu"], + service="bar", + pipeline_name="baz", + ) + + assert result is None + + +def test_service_match_alone_wins() -> None: + runbooks = [_rb("payments-oom", triggers=("oom",), service="payments-api")] + + result = retrieve_matching_runbook( + runbooks=runbooks, + keywords=[], + service="payments-api", + pipeline_name=None, + ) + + assert result is not None + assert result.slug == "payments-oom" + + +def test_pipeline_name_also_counts_as_service_match() -> None: + runbooks = [_rb("p", triggers=("oom",), service="pipeline-x")] + + result = retrieve_matching_runbook( + runbooks=runbooks, + keywords=[], + service=None, + pipeline_name="pipeline-x", + ) + + assert result is not None + assert result.slug == "p" + + +def test_service_match_outranks_keyword_only_match() -> None: + keyword_only = _rb("keyword-only", triggers=("oom", "memory")) + service_match = _rb("service-match", triggers=("oom",), service="payments-api") + + result = retrieve_matching_runbook( + runbooks=[keyword_only, service_match], + keywords=["oom"], + service="payments-api", + pipeline_name=None, + ) + + assert result is not None + assert result.slug == "service-match" + + +def test_ties_broken_by_slug() -> None: + a = _rb("a-runbook", triggers=("oom",)) + b = _rb("b-runbook", triggers=("oom",)) + + result = retrieve_matching_runbook( + runbooks=[b, a], + keywords=["oom"], + service=None, + pipeline_name=None, + ) + + assert result is not None + assert result.slug == "a-runbook" + + +def test_keyword_case_insensitive() -> None: + runbooks = [_rb("x", triggers=("oom",))] + + result = retrieve_matching_runbook( + runbooks=runbooks, + keywords=["OOM"], + service=None, + pipeline_name=None, + ) + + assert result is not None + assert result.slug == "x" diff --git a/tests/runbooks/test_store.py b/tests/runbooks/test_store.py new file mode 100644 index 000000000..c37f369ad --- /dev/null +++ b/tests/runbooks/test_store.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +from app.runbooks import store + + +def _patch_home(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Path: + home = tmp_path / "opensre_home" + monkeypatch.setattr("app.constants.OPENSRE_HOME_DIR", home) + return home + + +def _write_runbook(path: Path, frontmatter: str, body: str = "# Body") -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(f"---\n{frontmatter}\n---\n{body}", encoding="utf-8") + + +def test_load_all_returns_empty_when_dir_missing( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + _patch_home(monkeypatch, tmp_path) + + assert store.load_all() == [] + + +def test_load_all_parses_valid_runbook(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + home = _patch_home(monkeypatch, tmp_path) + _write_runbook( + home / "runbooks" / "payments-oom.md", + frontmatter=( + "service: payments-api\n" + "triggers:\n - oom\n - memory\n" + "category: resource_exhaustion\n" + "title: Payments OOM" + ), + body="Bump JVM heap to 2G.", + ) + + runbooks = store.load_all() + + assert len(runbooks) == 1 + rb = runbooks[0] + assert rb.slug == "payments-oom" + assert rb.title == "Payments OOM" + assert rb.service == "payments-api" + assert rb.category == "resource_exhaustion" + assert rb.triggers == ("oom", "memory") + assert "Bump JVM heap" in rb.body + + +def test_load_all_skips_runbook_without_triggers( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + home = _patch_home(monkeypatch, tmp_path) + _write_runbook( + home / "runbooks" / "invalid.md", + frontmatter="service: foo", + ) + _write_runbook( + home / "runbooks" / "valid.md", + frontmatter="triggers:\n - oom", + ) + + runbooks = store.load_all() + + assert [rb.slug for rb in runbooks] == ["valid"] + + +def test_save_validates_triggers(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + _patch_home(monkeypatch, tmp_path) + source = tmp_path / "input.md" + source.write_text("---\nservice: foo\n---\nbody", encoding="utf-8") + + with pytest.raises(store.RunbookValidationError): + store.save(source) + + +def test_save_copies_into_store(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + home = _patch_home(monkeypatch, tmp_path) + source = tmp_path / "payments-oom.md" + source.write_text( + "---\nservice: payments-api\ntriggers:\n - oom\n---\nbody", + encoding="utf-8", + ) + + saved = store.save(source) + + assert saved.slug == "payments-oom" + assert saved.path == home / "runbooks" / "payments-oom.md" + assert saved.path.exists() + + +def test_remove_returns_true_when_present_false_otherwise( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + home = _patch_home(monkeypatch, tmp_path) + _write_runbook(home / "runbooks" / "x.md", frontmatter="triggers:\n - a") + + assert store.remove("x") is True + assert not (home / "runbooks" / "x.md").exists() + assert store.remove("x") is False + + +def test_to_dict_round_trips_fields(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + home = _patch_home(monkeypatch, tmp_path) + _write_runbook( + home / "runbooks" / "x.md", + frontmatter="service: s\ntriggers:\n - t1", + body="content", + ) + + rb = store.load_all()[0] + payload = rb.to_dict() + + assert payload["slug"] == "x" + assert payload["service"] == "s" + assert payload["triggers"] == ["t1"] + assert payload["body"] == "content" + assert payload["path"] == str(home / "runbooks" / "x.md") diff --git a/tests/synthetic/runbooks/001-payments-oom/alert.json b/tests/synthetic/runbooks/001-payments-oom/alert.json new file mode 100644 index 000000000..571c981a8 --- /dev/null +++ b/tests/synthetic/runbooks/001-payments-oom/alert.json @@ -0,0 +1,15 @@ +{ + "title": "[synthetic-runbooks] PaymentsOOMKilled — payments-api", + "state": "firing", + "alert_source": "datadog", + "commonLabels": { + "alertname": "PaymentsOOMKilled", + "severity": "critical", + "pipeline_name": "payments-api", + "service": "payments-api" + }, + "commonAnnotations": { + "summary": "payments-api pods restarted with exit code 137 (OOMKilled).", + "description": "Memory consumption climbed steadily before container kill." + } +} diff --git a/tests/synthetic/runbooks/001-payments-oom/answer.yml b/tests/synthetic/runbooks/001-payments-oom/answer.yml new file mode 100644 index 000000000..8f744f6ab --- /dev/null +++ b/tests/synthetic/runbooks/001-payments-oom/answer.yml @@ -0,0 +1,7 @@ +expected_matched_slug: payments-oom +expected_service: payments-api +expected_triggers: + - oom + - memory + - failure +expected_provenance_line: "_Source: runbooks/payments-oom.md_" diff --git a/tests/synthetic/runbooks/001-payments-oom/runbook.md b/tests/synthetic/runbooks/001-payments-oom/runbook.md new file mode 100644 index 000000000..7d034494b --- /dev/null +++ b/tests/synthetic/runbooks/001-payments-oom/runbook.md @@ -0,0 +1,16 @@ +--- +service: payments-api +triggers: + - oom + - memory + - failure +category: resource_exhaustion +title: Payments API OOM playbook +--- +# Payments API OOM playbook + +When `payments-api` pods restart with exit code 137: + +1. Bump JVM `-Xmx` from 1.5G → 2G in the `payments-api` Helm values. +2. Page `#payments-oncall` before scaling — owner approval required. +3. Verify recovery via the Grafana dashboard `payments-api/jvm-memory`. diff --git a/tests/synthetic/runbooks/001-payments-oom/scenario.yml b/tests/synthetic/runbooks/001-payments-oom/scenario.yml new file mode 100644 index 000000000..15454102a --- /dev/null +++ b/tests/synthetic/runbooks/001-payments-oom/scenario.yml @@ -0,0 +1,11 @@ +schema_version: "1.0" +scenario_id: 001-payments-oom +engine: runbooks +failure_mode: oom +severity: critical +service: payments-api +pipeline_name: payments-api +description: | + Payments API pods crash-loop with exit code 137 (OOMKilled) after a JVM + heap leak. The matching runbook should be retrieved and its slug should be + surfaced as the source for remediation steps. diff --git a/tests/synthetic/runbooks/__init__.py b/tests/synthetic/runbooks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/synthetic/runbooks/test_runbook_suite.py b/tests/synthetic/runbooks/test_runbook_suite.py new file mode 100644 index 000000000..51e3261a6 --- /dev/null +++ b/tests/synthetic/runbooks/test_runbook_suite.py @@ -0,0 +1,94 @@ +"""Synthetic runbook suite — fixture-driven, no live infra. + +Each scenario directory under ``tests/synthetic/runbooks/`` contains: +- scenario.yml metadata (service, pipeline_name, failure_mode) +- alert.json the alert payload, mirrors shape used by extract_alert +- runbook.md the runbook expected to match +- answer.yml assertions about retrieval + provenance rendering + +The test: +1. Copies ``runbook.md`` into a tmp OPENSRE_HOME / runbooks dir. +2. Runs ``retrieve_matching_runbook`` with keywords/service derived from the alert. +3. Asserts the matched slug + provenance line match the answer key. +""" + +from __future__ import annotations + +import json +import shutil +from pathlib import Path + +import pytest +import yaml + +from app.nodes.plan_actions.extract_keywords import extract_keywords +from app.nodes.publish_findings.formatters.report import format_slack_message +from app.nodes.publish_findings.report_context import build_report_context +from app.runbooks.retrieval import retrieve_matching_runbook +from app.runbooks.store import load_all + +SUITE_DIR = Path(__file__).resolve().parent + + +def _scenarios() -> list[Path]: + return sorted(p for p in SUITE_DIR.iterdir() if p.is_dir() and p.name[0].isdigit()) + + +@pytest.mark.parametrize("scenario_dir", _scenarios(), ids=lambda p: p.name) +def test_runbook_suite_scenario( + scenario_dir: Path, monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + scenario = yaml.safe_load((scenario_dir / "scenario.yml").read_text(encoding="utf-8")) + alert = json.loads((scenario_dir / "alert.json").read_text(encoding="utf-8")) + answer = yaml.safe_load((scenario_dir / "answer.yml").read_text(encoding="utf-8")) + + home = tmp_path / "opensre_home" + monkeypatch.setattr("app.constants.OPENSRE_HOME_DIR", home) + target_runbooks = home / "runbooks" + target_runbooks.mkdir(parents=True, exist_ok=True) + shutil.copyfile( + scenario_dir / "runbook.md", + target_runbooks / f"{answer['expected_matched_slug']}.md", + ) + + common_labels = alert.get("commonLabels", {}) + common_annotations = alert.get("commonAnnotations", {}) + problem_md = ( + common_annotations.get("summary", "") + " " + common_annotations.get("description", "") + ) + alert_name = common_labels.get("alertname", "") + + keywords = extract_keywords(problem_md, alert_name) + matched = retrieve_matching_runbook( + runbooks=load_all(), + keywords=keywords, + service=common_labels.get("service"), + pipeline_name=scenario.get("pipeline_name"), + ) + + assert matched is not None + assert matched.slug == answer["expected_matched_slug"] + assert matched.service == answer["expected_service"] + assert set(matched.triggers) >= set(answer["expected_triggers"]) + + state = { + "pipeline_name": scenario.get("pipeline_name", ""), + "alert_name": alert_name, + "root_cause": "JVM heap exhaustion in payments-api.", + "root_cause_category": "resource_exhaustion", + "validated_claims": [], + "non_validated_claims": [], + "investigation_recommendations": [], + "remediation_steps": ["Bump JVM -Xmx from 1.5G to 2G (per runbook payments-oom)."], + "available_sources": {}, + "evidence": {}, + "raw_alert": alert, + "matched_runbook": matched.to_dict(), + } + + ctx = build_report_context(state) + assert ctx["runbook_provenance"] is not None + assert ctx["runbook_provenance"]["slug"] == answer["expected_matched_slug"] + + message = format_slack_message(ctx) + assert answer["expected_provenance_line"] in message From 6e0474ab35ca40d01310a5981c491f7ffdf0f29f Mon Sep 17 00:00:00 2001 From: Ankit Juneja Date: Thu, 14 May 2026 23:09:34 +0530 Subject: [PATCH 02/11] feat(runbooks): port phase 2a to ReAct agent architecture --- app/agent/prompt.py | 41 +++++-- app/pipeline/pipeline.py | 31 ++++++ tests/agent/test_runbook_injection.py | 62 +++++++++++ tests/agent/test_runbook_retrieval.py | 70 ++++++++++++ .../plan_actions/test_runbook_retrieval.py | 100 ------------------ .../test_runbook_injection.py | 50 --------- .../synthetic/runbooks/test_runbook_suite.py | 8 +- 7 files changed, 199 insertions(+), 163 deletions(-) create mode 100644 tests/agent/test_runbook_injection.py create mode 100644 tests/agent/test_runbook_retrieval.py delete mode 100644 tests/nodes/plan_actions/test_runbook_retrieval.py delete mode 100644 tests/nodes/root_cause_diagnosis/test_runbook_injection.py diff --git a/app/agent/prompt.py b/app/agent/prompt.py index c8aeb75a5..111acffd9 100644 --- a/app/agent/prompt.py +++ b/app/agent/prompt.py @@ -93,6 +93,24 @@ _SECONDARY_SOURCES = {"knowledge", "openclaw", "google_docs"} +def _build_runbook_section(matched_runbook: dict[str, Any] | None) -> str: + """Inject a team-authored runbook excerpt into the alert context. + + Phase 2a runbook-aware reasoning: the agent prefers actions described in + the matched runbook when proposing remediation steps. + """ + if not matched_runbook: + return "" + body = str(matched_runbook.get("body", ""))[:2000] + slug = str(matched_runbook.get("slug", "unknown")) + return ( + f"\n## Relevant team runbook ({slug})\n\n" + f"{body}\n\n" + "When proposing remediation steps, prefer actions described in this runbook. " + f"Cite [{slug}] in any step you draw from it.\n" + ) + + def build_system_prompt(_state: dict[str, Any]) -> str: return _INVESTIGATION_SYSTEM @@ -120,15 +138,20 @@ def format_alert_context(state: dict[str, Any]) -> str: start_guidance = _build_start_guidance(alert_source, alert_name, tools_by_source) tools_section = _format_tools_by_source(tools_by_source) - return _ALERT_CONTEXT_TEMPLATE.format( - alert_name=alert_name, - alert_source=alert_source or "unknown", - pipeline_name=pipeline_name, - severity=severity, - extra=extra, - connected_integrations=connected_integrations, - start_guidance=start_guidance, - tools_by_source=tools_section, + runbook_section = _build_runbook_section(state.get("matched_runbook")) + + return ( + _ALERT_CONTEXT_TEMPLATE.format( + alert_name=alert_name, + alert_source=alert_source or "unknown", + pipeline_name=pipeline_name, + severity=severity, + extra=extra, + connected_integrations=connected_integrations, + start_guidance=start_guidance, + tools_by_source=tools_section, + ) + + runbook_section ) diff --git a/app/pipeline/pipeline.py b/app/pipeline/pipeline.py index 445fa3c89..572315100 100644 --- a/app/pipeline/pipeline.py +++ b/app/pipeline/pipeline.py @@ -155,6 +155,8 @@ def run_connected_investigation(state: AgentState) -> AgentState: if state_any.get("is_noise"): return cast(AgentState, state_any) + _merge(state_any, {"matched_runbook": _retrieve_runbook(state_any)}) + _merge(state_any, ConnectedInvestigationAgent().run(state_any)) _merge( state_any, @@ -192,6 +194,35 @@ def run_chat(state: AgentState) -> AgentState: return cast(AgentState, state_any) +def _retrieve_runbook(state: dict[str, Any]) -> dict[str, Any] | None: + """Top-1 runbook match for the current alert, or None. + + Pure read-only; never raises — a bad runbook store must not block investigation. + """ + from app.runbooks.retrieval import retrieve_matching_runbook + from app.runbooks.store import load_all as load_runbooks + + try: + alert_name = state.get("alert_name", "") or "" + problem_md = state.get("problem_md", "") or "" + raw = (alert_name + " " + problem_md).lower() + keywords = [w for w in raw.split() if len(w) > 3] + alert_json = state.get("alert_json") or {} + common_labels = alert_json.get("commonLabels", {}) if isinstance(alert_json, dict) else {} + service = common_labels.get("service") or ( + alert_json.get("service") if isinstance(alert_json, dict) else None + ) + matched = retrieve_matching_runbook( + runbooks=load_runbooks(), + keywords=keywords, + service=str(service) if service else None, + pipeline_name=state.get("pipeline_name"), + ) + return matched.to_dict() if matched else None + except Exception: # noqa: BLE001 + return None + + def _merge(state: dict[str, Any], updates: dict[str, Any]) -> None: if not updates: return diff --git a/tests/agent/test_runbook_injection.py b/tests/agent/test_runbook_injection.py new file mode 100644 index 000000000..bfa704466 --- /dev/null +++ b/tests/agent/test_runbook_injection.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from app.agent.prompt import _build_runbook_section, format_alert_context + + +def test_build_runbook_section_returns_empty_when_no_match() -> None: + assert _build_runbook_section(None) == "" + assert _build_runbook_section({}) == "" + + +def test_build_runbook_section_includes_slug_and_body() -> None: + section = _build_runbook_section({"slug": "payments-oom", "body": "Bump heap to 2G."}) + + assert "payments-oom" in section + assert "Bump heap to 2G." in section + assert "payments-oom" in section + + +def test_build_runbook_section_truncates_long_body() -> None: + big = "x" * 5000 + section = _build_runbook_section({"slug": "s", "body": big}) + + assert "x" * 2000 in section + assert "x" * 2001 not in section + + +def test_format_alert_context_injects_matched_runbook() -> None: + state = { + "alert_name": "PaymentsOOM", + "pipeline_name": "payments-api", + "severity": "critical", + "resolved_integrations": {}, + "matched_runbook": {"slug": "payments-oom", "body": "Page on-call."}, + } + + mock_tool = MagicMock() + mock_tool.source = "grafana" + mock_tool.name = "query_grafana_logs" + mock_tool.description = "Query logs" + mock_tool.is_available.return_value = False + + with patch("app.tools.registry.get_registered_tools", return_value=[]): + result = format_alert_context(state) + + assert "payments-oom" in result + assert "Page on-call." in result + + +def test_format_alert_context_omits_section_without_matched_runbook() -> None: + state = { + "alert_name": "SomeAlert", + "pipeline_name": "some-service", + "severity": "warning", + "resolved_integrations": {}, + } + + with patch("app.tools.registry.get_registered_tools", return_value=[]): + result = format_alert_context(state) + + assert "Relevant team runbook" not in result diff --git a/tests/agent/test_runbook_retrieval.py b/tests/agent/test_runbook_retrieval.py new file mode 100644 index 000000000..af6092b0b --- /dev/null +++ b/tests/agent/test_runbook_retrieval.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import pytest + +from app.pipeline.pipeline import _retrieve_runbook + + +def _patch_home(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Path: + home = tmp_path / "opensre_home" + monkeypatch.setattr("app.constants.OPENSRE_HOME_DIR", home) + return home + + +def _write_runbook(home: Path, slug: str, frontmatter: str, body: str = "body") -> None: + target = home / "runbooks" / f"{slug}.md" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(f"---\n{frontmatter}\n---\n{body}", encoding="utf-8") + + +def test_retrieve_runbook_matches_by_service_and_keywords( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + home = _patch_home(monkeypatch, tmp_path) + _write_runbook( + home, + "payments-oom", + frontmatter="service: payments-api\ntriggers:\n - oom\n - memory", + ) + + state: dict[str, Any] = { + "problem_md": "payments-api OOMKilled with memory exhaustion.", + "alert_name": "PaymentsOOM", + "pipeline_name": "payments-api", + "alert_json": {"commonLabels": {"service": "payments-api"}}, + } + + matched = _retrieve_runbook(state) + + assert matched is not None + assert matched["slug"] == "payments-oom" + assert matched["service"] == "payments-api" + + +def test_retrieve_runbook_returns_none_when_no_match( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + _patch_home(monkeypatch, tmp_path) + + matched = _retrieve_runbook( + { + "problem_md": "", + "alert_name": "SomethingElse", + "pipeline_name": "other", + } + ) + + assert matched is None + + +def test_retrieve_runbook_returns_none_on_empty_store( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + _patch_home(monkeypatch, tmp_path) + + matched = _retrieve_runbook({"alert_name": "Any", "pipeline_name": "any"}) + + assert matched is None diff --git a/tests/nodes/plan_actions/test_runbook_retrieval.py b/tests/nodes/plan_actions/test_runbook_retrieval.py deleted file mode 100644 index 196529313..000000000 --- a/tests/nodes/plan_actions/test_runbook_retrieval.py +++ /dev/null @@ -1,100 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from typing import Any - -import pytest - -from app.nodes.investigate.models import InvestigateInput -from app.nodes.plan_actions import node as node_module - - -class _TrackerStub: - def start(self, _name: str, _message: str) -> None: - return - - def complete(self, _name: str, *, fields_updated: list[str], message: str) -> None: - assert message - self.fields_updated = fields_updated - - -def _patch_home(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Path: - home = tmp_path / "opensre_home" - monkeypatch.setattr("app.constants.OPENSRE_HOME_DIR", home) - return home - - -def _write_runbook(home: Path, slug: str, frontmatter: str, body: str = "body") -> None: - target = home / "runbooks" / f"{slug}.md" - target.parent.mkdir(parents=True, exist_ok=True) - target.write_text(f"---\n{frontmatter}\n---\n{body}", encoding="utf-8") - - -def _stub_plan_actions(monkeypatch: pytest.MonkeyPatch) -> None: - plan = node_module.InvestigationPlan( - actions=["get_logs"], - rationale="Logs around failures.", - retrieval_controls=None, - ) - monkeypatch.setattr( - node_module.InvestigateInput, - "from_state", - lambda _state: InvestigateInput(raw_alert={}, context={}, tool_budget=10), - ) - monkeypatch.setattr( - node_module, - "build_plan_actions", - lambda **_kwargs: (plan, {"knowledge": {}}, ["get_logs"], [], False, "", []), - ) - monkeypatch.setattr(node_module, "get_tracker", lambda: _TrackerStub()) - - -def test_node_plan_actions_attaches_matched_runbook( - monkeypatch: pytest.MonkeyPatch, tmp_path: Path -) -> None: - home = _patch_home(monkeypatch, tmp_path) - _write_runbook( - home, - "payments-oom", - frontmatter="service: payments-api\ntriggers:\n - oom\n - memory", - ) - _stub_plan_actions(monkeypatch) - - state: dict[str, Any] = { - "raw_alert": {}, - "context": {}, - "resolved_integrations": {}, - "investigation_loop_count": 0, - "problem_md": "payments-api OOMKilled with memory exhaustion.", - "alert_name": "PaymentsOOM", - "pipeline_name": "payments-api", - "alert_json": {"commonLabels": {"service": "payments-api"}}, - } - - result = node_module.node_plan_actions(state) - - matched = result["matched_runbook"] - assert matched is not None - assert matched["slug"] == "payments-oom" - assert matched["service"] == "payments-api" - - -def test_node_plan_actions_matched_runbook_none_when_no_match( - monkeypatch: pytest.MonkeyPatch, tmp_path: Path -) -> None: - _patch_home(monkeypatch, tmp_path) - _stub_plan_actions(monkeypatch) - - result = node_module.node_plan_actions( - { - "raw_alert": {}, - "context": {}, - "resolved_integrations": {}, - "investigation_loop_count": 0, - "problem_md": "", - "alert_name": "SomethingElse", - "pipeline_name": "other", - } - ) - - assert result["matched_runbook"] is None diff --git a/tests/nodes/root_cause_diagnosis/test_runbook_injection.py b/tests/nodes/root_cause_diagnosis/test_runbook_injection.py deleted file mode 100644 index d97a4ce79..000000000 --- a/tests/nodes/root_cause_diagnosis/test_runbook_injection.py +++ /dev/null @@ -1,50 +0,0 @@ -from __future__ import annotations - -from app.nodes.root_cause_diagnosis.directives import _build_runbook_section -from app.nodes.root_cause_diagnosis.prompt_builder import build_diagnosis_prompt - - -def _base_state() -> dict[str, object]: - return { - "problem_md": "payments-api OOMKilled.", - "alert_name": "PaymentsOOM", - "hypotheses": [], - "raw_alert": {}, - } - - -def test_build_runbook_section_returns_empty_when_no_match() -> None: - assert _build_runbook_section(None) == "" - assert _build_runbook_section({}) == "" - - -def test_build_runbook_section_includes_slug_and_body() -> None: - section = _build_runbook_section({"slug": "payments-oom", "body": "Bump heap to 2G."}) - - assert "RELEVANT TEAM RUNBOOK (payments-oom)" in section - assert "Bump heap to 2G." in section - assert "Cite the runbook slug" in section - - -def test_build_runbook_section_truncates_long_body() -> None: - big = "x" * 5000 - section = _build_runbook_section({"slug": "s", "body": big}) - - assert "x" * 2000 in section - assert "x" * 2001 not in section - - -def test_diagnosis_prompt_injects_matched_runbook() -> None: - state = _base_state() - state["matched_runbook"] = {"slug": "payments-oom", "body": "Page on-call."} - - prompt = build_diagnosis_prompt(state, evidence={}) - - assert "RELEVANT TEAM RUNBOOK (payments-oom)" in prompt - assert "Page on-call." in prompt - - -def test_diagnosis_prompt_omits_section_without_matched_runbook() -> None: - prompt = build_diagnosis_prompt(_base_state(), evidence={}) - - assert "RELEVANT TEAM RUNBOOK" not in prompt diff --git a/tests/synthetic/runbooks/test_runbook_suite.py b/tests/synthetic/runbooks/test_runbook_suite.py index 51e3261a6..017c13cf1 100644 --- a/tests/synthetic/runbooks/test_runbook_suite.py +++ b/tests/synthetic/runbooks/test_runbook_suite.py @@ -21,9 +21,8 @@ import pytest import yaml -from app.nodes.plan_actions.extract_keywords import extract_keywords -from app.nodes.publish_findings.formatters.report import format_slack_message -from app.nodes.publish_findings.report_context import build_report_context +from app.delivery.publish_findings.formatters.report import format_slack_message +from app.delivery.publish_findings.report_context import build_report_context from app.runbooks.retrieval import retrieve_matching_runbook from app.runbooks.store import load_all @@ -58,7 +57,8 @@ def test_runbook_suite_scenario( ) alert_name = common_labels.get("alertname", "") - keywords = extract_keywords(problem_md, alert_name) + raw = (alert_name + " " + problem_md).lower() + keywords = [w for w in raw.split() if len(w) > 3] matched = retrieve_matching_runbook( runbooks=load_all(), keywords=keywords, From 14d9508e1bbe1adc806344771d570ea15f08f153 Mon Sep 17 00:00:00 2001 From: Ankit Juneja Date: Thu, 14 May 2026 23:56:26 +0530 Subject: [PATCH 03/11] fix(runbooks): drop banned docstring ref, fix keyword filter, use public RUNBOOK_DIR, fix docs --- app/cli/commands/runbook.py | 6 +++--- app/pipeline/pipeline.py | 2 +- app/runbooks/store.py | 2 +- docs/runbooks.mdx | 7 +++---- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/app/cli/commands/runbook.py b/app/cli/commands/runbook.py index c04a80c36..257167bd1 100644 --- a/app/cli/commands/runbook.py +++ b/app/cli/commands/runbook.py @@ -7,8 +7,8 @@ import click from app.runbooks.store import ( + RUNBOOK_DIR, RunbookValidationError, - _runbook_dir, load_all, remove, save, @@ -36,7 +36,7 @@ def runbook_list() -> None: """List runbooks currently in the local store.""" runbooks = load_all() if not runbooks: - click.echo(f"No runbooks found in {_runbook_dir()}") + click.echo(f"No runbooks found in {RUNBOOK_DIR}") return for rb in runbooks: service = rb.service or "-" @@ -51,4 +51,4 @@ def runbook_remove(slug: str) -> None: if remove(slug): click.echo(f"✓ Removed runbook '{slug}'") return - raise click.ClickException(f"no runbook with slug '{slug}' in {_runbook_dir()}") + raise click.ClickException(f"no runbook with slug '{slug}' in {RUNBOOK_DIR}") diff --git a/app/pipeline/pipeline.py b/app/pipeline/pipeline.py index 572315100..3d55a7a4d 100644 --- a/app/pipeline/pipeline.py +++ b/app/pipeline/pipeline.py @@ -206,7 +206,7 @@ def _retrieve_runbook(state: dict[str, Any]) -> dict[str, Any] | None: alert_name = state.get("alert_name", "") or "" problem_md = state.get("problem_md", "") or "" raw = (alert_name + " " + problem_md).lower() - keywords = [w for w in raw.split() if len(w) > 3] + keywords = [w for w in raw.split() if len(w) >= 3] alert_json = state.get("alert_json") or {} common_labels = alert_json.get("commonLabels", {}) if isinstance(alert_json, dict) else {} service = common_labels.get("service") or ( diff --git a/app/runbooks/store.py b/app/runbooks/store.py index 2735cb5c1..df3704dbb 100644 --- a/app/runbooks/store.py +++ b/app/runbooks/store.py @@ -48,7 +48,7 @@ class Runbook: extra: dict[str, Any] = field(default_factory=dict) def to_dict(self) -> dict[str, Any]: - """Serialize into a plain dict for the LangGraph state envelope.""" + """Serialize into a plain dict for the agent state envelope.""" return { "slug": self.slug, "title": self.title, diff --git a/docs/runbooks.mdx b/docs/runbooks.mdx index 70038eb2a..18b4d7647 100644 --- a/docs/runbooks.mdx +++ b/docs/runbooks.mdx @@ -62,10 +62,9 @@ score = (2 if runbook.service matches the alert service or pipeline_name) + |alert keywords ∩ runbook.triggers| ``` -Keyword extraction is shared with the planner -(`app.nodes.plan_actions.extract_keywords`), so any term in -`KEYWORD_PATTERNS` that appears in the alert name or problem statement -contributes to the overlap. +Keywords are extracted from the alert name and problem statement using +whitespace tokenisation (words of 3+ characters, lowercased). Any token +that appears in a runbook's `triggers:` list contributes +1 to the score. Ties on score are broken by slug (lexicographic), so output is deterministic. From e55f5e65ad076de6a00ecddaf37d1aab1af3fd17 Mon Sep 17 00:00:00 2001 From: Ankit Juneja Date: Fri, 15 May 2026 00:03:53 +0530 Subject: [PATCH 04/11] fix(tests): avoid assert side-effects flagged by CodeQL --- tests/runbooks/test_store.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/runbooks/test_store.py b/tests/runbooks/test_store.py index c37f369ad..6d614b4e1 100644 --- a/tests/runbooks/test_store.py +++ b/tests/runbooks/test_store.py @@ -99,9 +99,11 @@ def test_remove_returns_true_when_present_false_otherwise( home = _patch_home(monkeypatch, tmp_path) _write_runbook(home / "runbooks" / "x.md", frontmatter="triggers:\n - a") - assert store.remove("x") is True + result_first = store.remove("x") + assert result_first is True assert not (home / "runbooks" / "x.md").exists() - assert store.remove("x") is False + result_second = store.remove("x") + assert result_second is False def test_to_dict_round_trips_fields(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: From 21ed076666eddebdcb944a6105667aa731b109f8 Mon Sep 17 00:00:00 2001 From: Ankit Juneja <55021449+devankitjuneja@users.noreply.github.com> Date: Fri, 15 May 2026 00:06:40 +0530 Subject: [PATCH 05/11] Update tests/synthetic/runbooks/test_runbook_suite.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- tests/synthetic/runbooks/test_runbook_suite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/synthetic/runbooks/test_runbook_suite.py b/tests/synthetic/runbooks/test_runbook_suite.py index 017c13cf1..294aa3f31 100644 --- a/tests/synthetic/runbooks/test_runbook_suite.py +++ b/tests/synthetic/runbooks/test_runbook_suite.py @@ -58,7 +58,7 @@ def test_runbook_suite_scenario( alert_name = common_labels.get("alertname", "") raw = (alert_name + " " + problem_md).lower() - keywords = [w for w in raw.split() if len(w) > 3] + keywords = [w for w in raw.split() if len(w) >= 3] matched = retrieve_matching_runbook( runbooks=load_all(), keywords=keywords, From 6ab2a186c4f4d7e4f6cebe436702c74f771bcfb4 Mon Sep 17 00:00:00 2001 From: Ankit Juneja Date: Fri, 15 May 2026 14:20:56 +0530 Subject: [PATCH 06/11] fix(runbooks): match multi-word triggers by checking all tokens present --- app/runbooks/retrieval.py | 6 +++++- docs/runbooks.mdx | 8 +++++--- tests/runbooks/test_retrieval.py | 27 +++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/app/runbooks/retrieval.py b/app/runbooks/retrieval.py index f27ba318d..42721d147 100644 --- a/app/runbooks/retrieval.py +++ b/app/runbooks/retrieval.py @@ -29,7 +29,11 @@ def _score( ): service_score = 2 - keyword_overlap = len(keyword_set & set(runbook.triggers)) + keyword_overlap = sum( + 1 + for trigger in runbook.triggers + if (parts := trigger.lower().split()) and all(p in keyword_set for p in parts) + ) return service_score + keyword_overlap diff --git a/docs/runbooks.mdx b/docs/runbooks.mdx index 18b4d7647..db9d813a0 100644 --- a/docs/runbooks.mdx +++ b/docs/runbooks.mdx @@ -59,12 +59,14 @@ shipped in Phase 1. ``` score = (2 if runbook.service matches the alert service or pipeline_name) - + |alert keywords ∩ runbook.triggers| + + Σ matched_triggers ``` Keywords are extracted from the alert name and problem statement using -whitespace tokenisation (words of 3+ characters, lowercased). Any token -that appears in a runbook's `triggers:` list contributes +1 to the score. +whitespace tokenisation (words of 3+ characters, lowercased). Each trigger +contributes +1 when **all** of its tokens appear in the alert keyword set. +Single-word triggers (`oom`) need one token present; multi-word triggers +(`exit code 137`) need every token present. Ties on score are broken by slug (lexicographic), so output is deterministic. diff --git a/tests/runbooks/test_retrieval.py b/tests/runbooks/test_retrieval.py index 9af7ae204..10bab2804 100644 --- a/tests/runbooks/test_retrieval.py +++ b/tests/runbooks/test_retrieval.py @@ -93,6 +93,33 @@ def test_ties_broken_by_slug() -> None: assert result.slug == "a-runbook" +def test_multi_word_trigger_matches_when_all_tokens_present() -> None: + runbooks = [_rb("x", triggers=("exit code 137",))] + + result = retrieve_matching_runbook( + runbooks=runbooks, + keywords=["exit", "code", "137"], + service=None, + pipeline_name=None, + ) + + assert result is not None + assert result.slug == "x" + + +def test_multi_word_trigger_no_match_when_partial_tokens() -> None: + runbooks = [_rb("x", triggers=("exit code 137",))] + + result = retrieve_matching_runbook( + runbooks=runbooks, + keywords=["exit", "code"], + service=None, + pipeline_name=None, + ) + + assert result is None + + def test_keyword_case_insensitive() -> None: runbooks = [_rb("x", triggers=("oom",))] From c7f3d8e52f37b28bd296c593ec4acd8019de32af Mon Sep 17 00:00:00 2001 From: Ankit Juneja Date: Fri, 15 May 2026 15:16:27 +0530 Subject: [PATCH 07/11] fix(runbooks): sanitize slug in remove(), fix frontmatter regex, avoid double parse in save() --- app/runbooks/store.py | 11 ++++++++--- tests/runbooks/test_store.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/app/runbooks/store.py b/app/runbooks/store.py index df3704dbb..afea36879 100644 --- a/app/runbooks/store.py +++ b/app/runbooks/store.py @@ -17,7 +17,7 @@ import re import shutil -from dataclasses import dataclass, field +from dataclasses import dataclass, field, replace from pathlib import Path from typing import Any @@ -27,7 +27,8 @@ RUNBOOK_DIR: Path = OPENSRE_HOME_DIR / "runbooks" -_FRONTMATTER_RE = re.compile(r"\A---\s*\n(.*?)\n---\s*\n", re.DOTALL) +_FRONTMATTER_RE = re.compile(r"\A---\s*\n(.*?)\n---\s*(?:\n|\Z)", re.DOTALL) +_SAFE_SLUG_RE = re.compile(r"^[\w-]+$") class RunbookValidationError(ValueError): @@ -173,11 +174,15 @@ def save(source: Path) -> Runbook: dest = directory / f"{parsed.slug}.md" shutil.copyfile(source, dest) - return _parse_runbook_file(dest) + return replace(parsed, path=dest) def remove(slug: str) -> bool: """Delete ``RUNBOOK_DIR/.md``. Return True if the file existed.""" + if not _SAFE_SLUG_RE.match(slug): + raise ValueError( + f"invalid slug {slug!r} — only letters, digits, hyphens, and underscores allowed" + ) target = _runbook_dir() / f"{slug}.md" if not target.exists(): return False diff --git a/tests/runbooks/test_store.py b/tests/runbooks/test_store.py index 6d614b4e1..3a0a04811 100644 --- a/tests/runbooks/test_store.py +++ b/tests/runbooks/test_store.py @@ -106,6 +106,36 @@ def test_remove_returns_true_when_present_false_otherwise( assert result_second is False +def test_remove_rejects_path_traversal_slug( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + _patch_home(monkeypatch, tmp_path) + + with pytest.raises(ValueError, match="invalid slug"): + store.remove("../opensre.conf") + + +def test_remove_rejects_slug_with_slash(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + _patch_home(monkeypatch, tmp_path) + + with pytest.raises(ValueError, match="invalid slug"): + store.remove("foo/bar") + + +def test_load_all_parses_runbook_without_trailing_newline( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + home = _patch_home(monkeypatch, tmp_path) + rb_path = home / "runbooks" / "no-newline.md" + rb_path.parent.mkdir(parents=True, exist_ok=True) + rb_path.write_bytes(b"---\ntriggers:\n - oom\n---\nbody text") + + runbooks = store.load_all() + + assert len(runbooks) == 1 + assert runbooks[0].slug == "no-newline" + + def test_to_dict_round_trips_fields(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: home = _patch_home(monkeypatch, tmp_path) _write_runbook( From 66fdb02eaf1def3ba2e6c349f13bcba53ab5278d Mon Sep 17 00:00:00 2001 From: Ankit Juneja Date: Fri, 15 May 2026 15:43:10 +0530 Subject: [PATCH 08/11] fix(runbooks): validate slug in save(), clean truncation boundary, guard missing slug in prompt --- app/agent/prompt.py | 11 +++++++++-- app/runbooks/store.py | 6 ++++++ tests/agent/test_runbook_injection.py | 19 +++++++++++++++++-- tests/runbooks/test_store.py | 9 +++++++++ 4 files changed, 41 insertions(+), 4 deletions(-) diff --git a/app/agent/prompt.py b/app/agent/prompt.py index 111acffd9..b4d18e0e1 100644 --- a/app/agent/prompt.py +++ b/app/agent/prompt.py @@ -101,8 +101,15 @@ def _build_runbook_section(matched_runbook: dict[str, Any] | None) -> str: """ if not matched_runbook: return "" - body = str(matched_runbook.get("body", ""))[:2000] - slug = str(matched_runbook.get("slug", "unknown")) + slug = str(matched_runbook.get("slug", "")).strip() + if not slug: + return "" + raw = str(matched_runbook.get("body", "")) + if len(raw) > 2000: + cut = raw[:2000].rfind("\n") + body = raw[: cut if cut > 0 else 2000] + "\n…(truncated)" + else: + body = raw return ( f"\n## Relevant team runbook ({slug})\n\n" f"{body}\n\n" diff --git a/app/runbooks/store.py b/app/runbooks/store.py index afea36879..5d405d45f 100644 --- a/app/runbooks/store.py +++ b/app/runbooks/store.py @@ -169,6 +169,12 @@ def save(source: Path) -> Runbook: parsed = _parse_runbook_file(source) + if not _SAFE_SLUG_RE.match(parsed.slug): + raise RunbookValidationError( + f"filename stem {parsed.slug!r} is not a valid slug" + " — use only letters, digits, hyphens, and underscores" + ) + directory = _runbook_dir() directory.mkdir(parents=True, exist_ok=True) dest = directory / f"{parsed.slug}.md" diff --git a/tests/agent/test_runbook_injection.py b/tests/agent/test_runbook_injection.py index bfa704466..2f62c77d6 100644 --- a/tests/agent/test_runbook_injection.py +++ b/tests/agent/test_runbook_injection.py @@ -18,11 +18,26 @@ def test_build_runbook_section_includes_slug_and_body() -> None: assert "payments-oom" in section -def test_build_runbook_section_truncates_long_body() -> None: +def test_build_runbook_section_returns_empty_when_slug_missing() -> None: + assert _build_runbook_section({"body": "some body"}) == "" + assert _build_runbook_section({"slug": "", "body": "some body"}) == "" + + +def test_build_runbook_section_truncates_at_newline_boundary() -> None: + body = ("line\n" * 500)[:2100] # contains newlines, exceeds 2000 + section = _build_runbook_section({"slug": "s", "body": body}) + + assert "…(truncated)" in section + # must not split mid-line — text before truncation marker ends at \n + marker_idx = section.index("…(truncated)") + assert section[marker_idx - 1] == "\n" + + +def test_build_runbook_section_truncates_long_body_no_newline() -> None: big = "x" * 5000 section = _build_runbook_section({"slug": "s", "body": big}) - assert "x" * 2000 in section + assert "…(truncated)" in section assert "x" * 2001 not in section diff --git a/tests/runbooks/test_store.py b/tests/runbooks/test_store.py index 3a0a04811..13f24654b 100644 --- a/tests/runbooks/test_store.py +++ b/tests/runbooks/test_store.py @@ -106,6 +106,15 @@ def test_remove_returns_true_when_present_false_otherwise( assert result_second is False +def test_save_rejects_invalid_stem(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + _patch_home(monkeypatch, tmp_path) + source = tmp_path / "my runbook.md" + source.write_text("---\ntriggers:\n - oom\n---\nbody", encoding="utf-8") + + with pytest.raises(store.RunbookValidationError, match="not a valid slug"): + store.save(source) + + def test_remove_rejects_path_traversal_slug( monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: From e000f9f230966df95229c3e4e6c3b33f7f407eb1 Mon Sep 17 00:00:00 2001 From: Ankit Juneja Date: Fri, 15 May 2026 16:17:03 +0530 Subject: [PATCH 09/11] fix(runbooks): catch ValueError in remove command, show clean error instead of traceback --- app/cli/commands/runbook.py | 6 +++++- tests/cli/test_runbook_command.py | 13 +++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/app/cli/commands/runbook.py b/app/cli/commands/runbook.py index 257167bd1..32408f830 100644 --- a/app/cli/commands/runbook.py +++ b/app/cli/commands/runbook.py @@ -48,7 +48,11 @@ def runbook_list() -> None: @click.argument("slug") def runbook_remove(slug: str) -> None: """Delete a runbook by slug (the filename without .md).""" - if remove(slug): + try: + found = remove(slug) + except (ValueError, RunbookValidationError) as exc: + raise click.ClickException(str(exc)) from exc + if found: click.echo(f"✓ Removed runbook '{slug}'") return raise click.ClickException(f"no runbook with slug '{slug}' in {RUNBOOK_DIR}") diff --git a/tests/cli/test_runbook_command.py b/tests/cli/test_runbook_command.py index a69ddc9ac..1377f4bff 100644 --- a/tests/cli/test_runbook_command.py +++ b/tests/cli/test_runbook_command.py @@ -96,3 +96,16 @@ def test_runbook_remove_unknown_slug_errors( assert result.exit_code != 0 assert "no runbook" in result.output + + +def test_runbook_remove_invalid_slug_shows_clean_error( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + _patch_home(monkeypatch, tmp_path) + + runner = CliRunner() + result = runner.invoke(cli, ["runbook", "remove", "../opensre.conf"]) + + assert result.exit_code != 0 + assert "invalid slug" in result.output + assert "Traceback" not in result.output From 5505ea2f2d541dbc3315d12b67bb7625ae0da159 Mon Sep 17 00:00:00 2001 From: Ankit Juneja Date: Fri, 15 May 2026 17:02:15 +0530 Subject: [PATCH 10/11] fix(runbooks): handle null commonLabels in alert_json to preserve keyword matches --- app/pipeline/pipeline.py | 4 +++- tests/agent/test_runbook_retrieval.py | 19 +++++++++++++++++++ .../synthetic/runbooks/test_runbook_suite.py | 4 ++-- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/app/pipeline/pipeline.py b/app/pipeline/pipeline.py index 3d55a7a4d..0de30a6f4 100644 --- a/app/pipeline/pipeline.py +++ b/app/pipeline/pipeline.py @@ -208,7 +208,9 @@ def _retrieve_runbook(state: dict[str, Any]) -> dict[str, Any] | None: raw = (alert_name + " " + problem_md).lower() keywords = [w for w in raw.split() if len(w) >= 3] alert_json = state.get("alert_json") or {} - common_labels = alert_json.get("commonLabels", {}) if isinstance(alert_json, dict) else {} + common_labels = ( + (alert_json.get("commonLabels") or {}) if isinstance(alert_json, dict) else {} + ) service = common_labels.get("service") or ( alert_json.get("service") if isinstance(alert_json, dict) else None ) diff --git a/tests/agent/test_runbook_retrieval.py b/tests/agent/test_runbook_retrieval.py index af6092b0b..b4bcafd56 100644 --- a/tests/agent/test_runbook_retrieval.py +++ b/tests/agent/test_runbook_retrieval.py @@ -60,6 +60,25 @@ def test_retrieve_runbook_returns_none_when_no_match( assert matched is None +def test_retrieve_runbook_matches_keywords_when_common_labels_null( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + home = _patch_home(monkeypatch, tmp_path) + _write_runbook(home, "oom-runbook", frontmatter="triggers:\n - oom\n - memory") + + state: dict[str, Any] = { + "alert_name": "OOMKilled", + "problem_md": "pod killed due to memory oom", + "pipeline_name": "other", + "alert_json": {"commonLabels": None}, # null in JSON + } + + matched = _retrieve_runbook(state) + + assert matched is not None + assert matched["slug"] == "oom-runbook" + + def test_retrieve_runbook_returns_none_on_empty_store( monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: diff --git a/tests/synthetic/runbooks/test_runbook_suite.py b/tests/synthetic/runbooks/test_runbook_suite.py index 294aa3f31..19d8cf068 100644 --- a/tests/synthetic/runbooks/test_runbook_suite.py +++ b/tests/synthetic/runbooks/test_runbook_suite.py @@ -50,8 +50,8 @@ def test_runbook_suite_scenario( target_runbooks / f"{answer['expected_matched_slug']}.md", ) - common_labels = alert.get("commonLabels", {}) - common_annotations = alert.get("commonAnnotations", {}) + common_labels = alert.get("commonLabels") or {} + common_annotations = alert.get("commonAnnotations") or {} problem_md = ( common_annotations.get("summary", "") + " " + common_annotations.get("description", "") ) From 8202df4d70e2cadc65f0da02afa349e0f11e6864 Mon Sep 17 00:00:00 2001 From: Ankit Juneja Date: Fri, 29 May 2026 13:50:07 +0530 Subject: [PATCH 11/11] fix(runbooks): add /runbook to slash catalog, fix description parity --- app/cli/interactive_shell/command_registry/cli_parity.py | 3 ++- app/cli/interactive_shell/command_registry/slash_catalog.py | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/app/cli/interactive_shell/command_registry/cli_parity.py b/app/cli/interactive_shell/command_registry/cli_parity.py index e013b2954..719629504 100644 --- a/app/cli/interactive_shell/command_registry/cli_parity.py +++ b/app/cli/interactive_shell/command_registry/cli_parity.py @@ -382,8 +382,9 @@ def _cmd_debug(session: ReplSession, console: Console, args: list[str]) -> bool: ), SlashCommand( "/runbook", - "manage local runbooks for diagnosis grounding ('/runbook add|list|remove')", + "Manage local runbooks that ground diagnosis remediation steps.", _cmd_runbook, + usage=("/runbook add ", "/runbook list", "/runbook remove "), execution_tier=ExecutionTier.SAFE, ), ] diff --git a/app/cli/interactive_shell/command_registry/slash_catalog.py b/app/cli/interactive_shell/command_registry/slash_catalog.py index 577827eb5..6397be1ea 100644 --- a/app/cli/interactive_shell/command_registry/slash_catalog.py +++ b/app/cli/interactive_shell/command_registry/slash_catalog.py @@ -284,6 +284,11 @@ def _mcp( "User asks to run a debug check or diagnostic", anti_examples=("User asks a general debugging or troubleshooting question",), ), + "/runbook": _mcp( + "Manage local markdown runbooks that ground investigation remediation steps.", + "User wants to add, list, or remove a local runbook", + "User asks which runbooks are stored locally", + ), }