Tracer-Cloud · X1Vi · May 25, 2026 · May 25, 2026 · May 26, 2026 · May 26, 2026
diff --git a/app/agents/tail.py b/app/agents/tail.py
@@ -192,7 +192,7 @@ def _resolve_macos_target(pid: int) -> _ResolvedTarget:
     return _ResolvedTarget(pid=pid, path=_check_regular_file(Path(fd_name), what="stdout"))
 
 
-def _resolve_target(pid: int) -> _ResolvedTarget:
+def resolve_target(pid: int) -> _ResolvedTarget:
     if sys.platform == "win32":
         raise AttachUnsupported("Windows is not supported")
     # Guard non-positive ids before probing: ``psutil.pid_exists(0)`` can
@@ -427,7 +427,7 @@ def attach(
     vanished, open failed). Caller is responsible for closing the
     session — preferably via ``with attach(pid) as sess: …``.
     """
-    target = _resolve_target(pid)
+    target = resolve_target(pid)
     return AttachSession(
         target,
         buffer_bytes=buffer_bytes,
@@ -446,4 +446,5 @@ def attach(
     "AttachUnsupported",
     "TailBuffer",
     "attach",
+    "resolve_target",
 ]
diff --git a/app/tools/LocalProcessIntrospectTool/__init__.py b/app/tools/LocalProcessIntrospectTool/__init__.py
@@ -0,0 +1,101 @@
+"""Tool for introspecting a local process during incident response.
+
+Returns a psutil snapshot and the last 50 stdout lines for a given PID.
+The investigation planner calls this to diagnose stuck or misbehaving
+local agents from the OpenSRE interactive shell.
+"""
+
+from __future__ import annotations
+
+import os
+from datetime import UTC
+from typing import Any
+
+from app.agents.error_signals import ErrorSignals
+from app.agents.probe import ProcessSnapshot, probe
+from app.agents.tail import DEFAULT_MAX_BYTES, AttachUnsupported, resolve_target
+from app.tools.tool_decorator import tool
+
+
+def _snapshot_to_dict(snapshot: ProcessSnapshot) -> dict[str, Any]:
+    return {
+        "pid": snapshot.pid,
+        "cpu_percent": snapshot.cpu_percent,
+        "rss_mb": snapshot.rss_mb,
+        "num_fds": snapshot.num_fds,
+        "num_connections": snapshot.num_connections,
+        "status": snapshot.status,
+        "started_at": snapshot.started_at.astimezone(UTC).isoformat(),
+    }
+
+
+def _read_stdout_tail(pid: int, max_lines: int = 50) -> str | None:
+    """Read the last ``max_lines`` lines from the process's stdout.
+
+    Linux: resolves ``/proc/<pid>/fd/1``.
+    macOS: resolves fd 1 via ``lsof``.
+    Returns ``None`` when the pid doesn't exist, stdout is a pipe/socket/tty,
+    or we lack permission — the planner treats ``None`` as "unavailable".
+    """
+    try:
+        target = resolve_target(pid)
+    except (AttachUnsupported, OSError):
+        return None
+    try:
+        with open(target.path, "rb") as f:
+            offset = max(0, os.fstat(f.fileno()).st_size - DEFAULT_MAX_BYTES)
+            if offset > 0:
+                f.seek(offset)
+            data = f.read()
+    except (OSError, PermissionError, FileNotFoundError):
+        return None
+    lines = data.decode("utf-8", errors="replace").splitlines()
+    return "\n".join(lines[-max_lines:])
+
+
+@tool(
+    name="local_process_introspect",
+    source="knowledge",
+    description=(
+        "Introspect a local process: return a psutil resource snapshot "
+        "(CPU%, RSS MB, fd count, connection count, status, start time) "
+        "and the last 50 lines of stdout. Use this when the planner needs "
+        "to diagnose a stuck, high-cpu, or misbehaving local agent during "
+        "incident response."
+    ),
+    input_schema={
+        "type": "object",
+        "properties": {
+            "pid": {
+                "type": "integer",
+                "description": "Process ID to introspect.",
+            },
+        },
+        "required": ["pid"],
+    },
+    use_cases=[
+        "Diagnosing a stuck or high-cpu local agent during incident response",
+        "Checking whether a process is alive and making forward progress",
+        "Reading recent stdout output from a local process",
+        "Verifying resource usage of a monitored agent",
+    ],
+    outputs={
+        "snapshot": "psutil ProcessSnapshot dict, or null if the PID is inaccessible",
+        "stdout_tail": "last 50 stdout lines as a string, or null if stdout cannot be read",
+        "error_counts": "error/retry counts per category from recent stdout",
+    },
+    surfaces=("investigation",),
+)
+def local_process_introspect(pid: int) -> dict[str, Any]:
+    snapshot = probe(pid)
+    stdout_tail = _read_stdout_tail(pid)
+    error_counts: dict[str, float] = {}
+    if stdout_tail:
+        signals = ErrorSignals()
+        signals.observe(stdout_tail)
+        error_counts = signals.rate_per_minute()
+    return {
+        "snapshot": _snapshot_to_dict(snapshot) if snapshot else None,
+        "stdout_tail": stdout_tail,
+        "error_counts": error_counts,
+    }
diff --git a/docs/agents.mdx b/docs/agents.mdx
@@ -292,6 +292,43 @@ is required so a stray keypress doesn't abort an in-flight response.
   buggy or hostile agents can emit control sequences affecting the viewer.
   Only trace processes you trust; there is no sandboxing step.
 
+## `local_process_introspect` — point-in-time process health check
+
+The `local_process_introspect` tool is used by the investigation planner to
+diagnose local agent health during incident response. It returns a process
+resource snapshot (CPU, memory, file descriptors, connections, status, start
+time) and the last 50 lines of stdout for a given PID.
+
+Internally it reuses the same machinery as `/agents trace`:
+`resolve_target` for stdout resolution, `probe` for the psutil snapshot, and
+`ErrorSignals` to classify error/retry patterns from recent output.
+
+### When the planner calls it
+
+- Diagnosing a stuck or high-CPU local agent during incident investigation
+- Checking whether a process is alive and making forward progress
+- Reading recent stdout output from a local process
+- Verifying resource usage of a monitored agent
+
+### Output fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `snapshot` | `dict` or `null` | psutil process snapshot: `pid`, `cpu_percent`, `rss_mb`, `num_fds`, `num_connections`, `status`, `started_at` |
+| `stdout_tail` | `str` or `null` | Last 50 lines of stdout; `null` when unavailable (dead PID, pipe/socket/tty, or permission denied) |
+| `error_counts` | `dict[str, float]` | Error/retry counts per category (`traceback`, `rate_limit`, `http_5xx`, `tool_failure`) from recent stdout |
+
+### Example
+
+```python
+local_process_introspect(pid=1234)
+# {
+#   "snapshot": {"pid": 1234, "cpu_percent": 4.2, "rss_mb": 256.0, ...},
+#   "stdout_tail": "line 50\nline 51\n...\nline 99",
+#   "error_counts": {"traceback": 2, "rate_limit": 1, "http_5xx": 0, "tool_failure": 0}
+# }
+```
+
 ## `/agents bus` — shared context channel
 
 The bus is an opt-in, local-only pub/sub channel that carries findings between

diff --git a/tests/agents/test_tail.py b/tests/agents/test_tail.py
@@ -33,9 +33,9 @@
     _parse_lsof_fd1,
     _resolve_linux_target,
     _resolve_macos_target,
-    _resolve_target,
     _ResolvedTarget,
     attach,
+    resolve_target,
 )
 
 
@@ -341,20 +341,20 @@ class TestResolveTargetDispatch:
     def test_windows_rejected(self, monkeypatch: pytest.MonkeyPatch) -> None:
         monkeypatch.setattr(tail_mod.sys, "platform", "win32")
         with pytest.raises(AttachUnsupported, match="Windows"):
-            _resolve_target(1234)
+            resolve_target(1234)
 
     def test_no_such_pid_rejected(self, monkeypatch: pytest.MonkeyPatch) -> None:
         # platform is not "win32" so we hit the pid_exists check
         monkeypatch.setattr(tail_mod.sys, "platform", "linux")
         monkeypatch.setattr(tail_mod, "pid_exists", lambda _pid: False)
         with pytest.raises(AttachUnsupported, match="no such pid"):
-            _resolve_target(99_999_999)
+            resolve_target(99_999_999)
 
     def test_unknown_platform_rejected(self, monkeypatch: pytest.MonkeyPatch) -> None:
         monkeypatch.setattr(tail_mod.sys, "platform", "freebsd13")
         monkeypatch.setattr(tail_mod, "pid_exists", lambda _pid: True)
         with pytest.raises(AttachUnsupported, match="freebsd13"):
-            _resolve_target(1234)
+            resolve_target(1234)
 
     @pytest.mark.parametrize("invalid_pid", [0, -1, -99])
     def test_non_positive_pid_rejected_before_pid_exists(
@@ -363,15 +363,15 @@ def test_non_positive_pid_rejected_before_pid_exists(
         # Regression guard: ``psutil.pid_exists(0)`` can raise
         # ``PermissionError`` on macOS. The slash handler only catches
         # ``AttachUnsupported``, so an unguarded probe would crash the
-        # REPL. ``_resolve_target`` must reject non-positive ids before
+        # REPL. ``resolve_target`` must reject non-positive ids before
         # touching ``pid_exists`` at all.
         def _boom(_pid: int) -> bool:
             raise PermissionError(1, "operation not permitted")
 
         monkeypatch.setattr(tail_mod.sys, "platform", "darwin")
         monkeypatch.setattr(tail_mod, "pid_exists", _boom)
         with pytest.raises(AttachUnsupported, match="must be positive"):
-            _resolve_target(invalid_pid)
+            resolve_target(invalid_pid)
 
 
 class TestAttachEagerValidation:
@@ -384,7 +384,7 @@ def test_attach_raises_immediately_on_unsupported(
         def _fail(_pid: int) -> _ResolvedTarget:
             raise AttachUnsupported("planned failure")
 
-        monkeypatch.setattr(tail_mod, "_resolve_target", _fail)
+        monkeypatch.setattr(tail_mod, "resolve_target", _fail)
         with pytest.raises(AttachUnsupported, match="planned failure"):
             attach(1234)
 
@@ -549,7 +549,7 @@ def test_attach_eagerly_opens_real_file(self, tmp_path: Path) -> None:
         log.write_text("")
         with patch.object(
             tail_mod,
-            "_resolve_target",
+            "resolve_target",
             return_value=_ResolvedTarget(pid=os.getpid(), path=log),
         ):
             sess = attach(os.getpid(), poll_interval_s=0.01)