From 2d2c719a0fcba29d9671afefcfc9fcc2bdff06f7 Mon Sep 17 00:00:00 2001 From: Daniel Ellison Date: Fri, 27 Mar 2026 14:05:57 -0400 Subject: [PATCH 1/3] Extract self-sudo skip into shared resolve_claude_user() utility PR #192 added a self-sudo check to claude.py but missed the identical sudo pattern in triage.py and review.py. In single-user deployments where claude_user matches the bot process user, triage and review subprocesses attempted self-sudo and silently failed. Move the check into config.resolve_claude_user() and apply it in all three locations. Both the spawn path (sudo -u prefix) and cleanup path (killpg vs proc.kill) use the resolved value so they stay in sync. Fixes #194 --- src/kai/claude.py | 26 ++++----------------- src/kai/config.py | 36 +++++++++++++++++++++++++++++ src/kai/review.py | 20 +++++++++------- src/kai/triage.py | 20 +++++++++------- tests/test_claude.py | 14 ++++++----- tests/test_config.py | 37 ++++++++++++++++++++++++++--- tests/test_review.py | 55 ++++++++++++++++++++++++++++++++++++++++++-- tests/test_triage.py | 52 +++++++++++++++++++++++++++++++++++++++-- 8 files changed, 209 insertions(+), 51 deletions(-) diff --git a/src/kai/claude.py b/src/kai/claude.py index aed47d3..3a8bbbb 100644 --- a/src/kai/claude.py +++ b/src/kai/claude.py @@ -29,14 +29,13 @@ import json import logging import os -import pwd import signal import time from collections.abc import AsyncIterator from dataclasses import dataclass from pathlib import Path -from kai.config import DATA_DIR, WorkspaceConfig, parse_env_file +from kai.config import DATA_DIR, WorkspaceConfig, parse_env_file, resolve_claude_user from kai.history import get_recent_history log = logging.getLogger(__name__) @@ -208,26 +207,9 @@ async def _ensure_started(self) -> None: str(self.max_budget_usd), ] - # When running as a different user, spawn via sudo -u. - # The subprocess runs with the target user's UID, home directory, - # and environment - completely isolated from the bot user. - # Skip sudo when the target user is the same as the bot user - - # self-sudo fails (sudoers disallows it) and serves no purpose. - effective_claude_user = self.claude_user - if effective_claude_user: - # getpwuid raises KeyError when the UID has no passwd entry - # (e.g., containers with --user ). Treat that as "unknown - # user" and fall through to the sudo path. - try: - current_user = pwd.getpwuid(os.getuid()).pw_name - except KeyError: - current_user = None - if effective_claude_user == current_user: - log.warning( - "claude_user %r matches the bot process user; skipping sudo (no isolation)", - effective_claude_user, - ) - effective_claude_user = None + # Resolve self-sudo: skip sudo when claude_user matches the bot + # process user. The shared utility logs a warning when skipping. + effective_claude_user = resolve_claude_user(self.claude_user) if effective_claude_user: cmd = ["sudo", "-u", effective_claude_user, "--"] + claude_cmd diff --git a/src/kai/config.py b/src/kai/config.py index 6968728..d41c85e 100644 --- a/src/kai/config.py +++ b/src/kai/config.py @@ -14,6 +14,7 @@ import logging import os +import pwd import subprocess from dataclasses import dataclass, field from pathlib import Path @@ -47,6 +48,41 @@ IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp"} +# ── Self-sudo resolution ───────────────────────────────────────────── + +log = logging.getLogger(__name__) + + +def resolve_claude_user(claude_user: str | None) -> str | None: + """ + Return None if claude_user matches the current process user. + + When the bot process runs as the same OS user specified in + claude_user, sudo -u is both unnecessary and likely to fail + (sudoers typically disallows self-sudo). This function detects + that case and returns None so callers skip the sudo wrapper. + + Returns claude_user unchanged when it differs from the current + user, or when the current user cannot be determined (e.g., + containers with unmapped UIDs). + """ + if not claude_user: + return None + try: + current_user = pwd.getpwuid(os.getuid()).pw_name + except KeyError: + # UID has no passwd entry (e.g., containers with --user ). + # Can't determine if it's a self-sudo, so fall through to sudo. + return claude_user + if claude_user == current_user: + log.warning( + "claude_user %r matches bot process user; skipping sudo", + claude_user, + ) + return None + return claude_user + + # ── Per-workspace configuration ────────────────────────────────────── diff --git a/src/kai/review.py b/src/kai/review.py index 20c39fd..038647f 100644 --- a/src/kai/review.py +++ b/src/kai/review.py @@ -34,6 +34,7 @@ import aiohttp +from kai.config import resolve_claude_user from kai.prompt_utils import make_boundary log = logging.getLogger(__name__) @@ -640,12 +641,15 @@ async def run_review( str(_REVIEW_BUDGET_USD), ] - # When running as a different user, spawn via sudo -u. - # Same isolation pattern as PersistentClaude._ensure_started(). - if claude_user: - cmd = ["sudo", "-u", claude_user, "--"] + cmd + # Resolve self-sudo: skip sudo when claude_user matches the bot + # process user. Uses the resolved value for both the spawn command + # and the cleanup path below. + effective_user = resolve_claude_user(claude_user) - # When claude_user is set, start in a new process group so the + if effective_user: + cmd = ["sudo", "-u", effective_user, "--"] + cmd + + # When spawned via sudo, start in a new process group so the # entire tree (sudo + claude) can be killed via os.killpg(). # Without this, killing sudo orphans the claude Node.js process. proc = await asyncio.create_subprocess_exec( @@ -653,7 +657,7 @@ async def run_review( stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, - start_new_session=bool(claude_user), + start_new_session=bool(effective_user), ) try: @@ -663,10 +667,10 @@ async def run_review( ) except TimeoutError: # Kill the subprocess tree if it exceeds the timeout. - # When claude_user is set, start_new_session=True puts the + # When effective_user is set, start_new_session=True puts the # process in a new group (PGID == PID). Kill the group so # both sudo and its claude child die, preventing orphans. - if claude_user: + if effective_user: try: os.killpg(proc.pid, signal.SIGKILL) except ProcessLookupError: diff --git a/src/kai/triage.py b/src/kai/triage.py index 23f6bf3..61406a9 100644 --- a/src/kai/triage.py +++ b/src/kai/triage.py @@ -34,6 +34,7 @@ import aiohttp +from kai.config import resolve_claude_user from kai.prompt_utils import make_boundary log = logging.getLogger(__name__) @@ -370,12 +371,15 @@ async def run_triage( str(_TRIAGE_BUDGET_USD), ] - # When running as a different user, spawn via sudo -u. - # Same isolation pattern as PersistentClaude._ensure_started(). - if claude_user: - cmd = ["sudo", "-u", claude_user, "--"] + cmd + # Resolve self-sudo: skip sudo when claude_user matches the bot + # process user. Uses the resolved value for both the spawn command + # and the cleanup path below. + effective_user = resolve_claude_user(claude_user) - # When claude_user is set, start in a new process group so the + if effective_user: + cmd = ["sudo", "-u", effective_user, "--"] + cmd + + # When spawned via sudo, start in a new process group so the # entire tree (sudo + claude) can be killed via os.killpg(). # Without this, killing sudo orphans the claude Node.js process. proc = await asyncio.create_subprocess_exec( @@ -383,7 +387,7 @@ async def run_triage( stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, - start_new_session=bool(claude_user), + start_new_session=bool(effective_user), ) try: @@ -393,10 +397,10 @@ async def run_triage( ) except TimeoutError: # Kill the subprocess tree if it exceeds the timeout. - # When claude_user is set, start_new_session=True puts the + # When effective_user is set, start_new_session=True puts the # process in a new group (PGID == PID). Kill the group so # both sudo and its claude child die, preventing orphans. - if claude_user: + if effective_user: try: os.killpg(proc.pid, signal.SIGKILL) except ProcessLookupError: diff --git a/tests/test_claude.py b/tests/test_claude.py index f76e743..1e7f10f 100644 --- a/tests/test_claude.py +++ b/tests/test_claude.py @@ -188,7 +188,7 @@ async def test_self_sudo_skipped(self, caplog): mock_proc.stderr = AsyncMock() mock_exec.return_value = mock_proc - with caplog.at_level("WARNING", logger="kai.claude"): + with caplog.at_level("WARNING", logger="kai.config"): await claude._ensure_started() args = mock_exec.call_args @@ -199,7 +199,7 @@ async def test_self_sudo_skipped(self, caplog): # No process group isolation assert args[1].get("start_new_session") is False assert claude._pgid is None - # Warning was logged + # Warning was logged by resolve_claude_user() assert "skipping sudo" in caplog.text @pytest.mark.asyncio @@ -207,12 +207,13 @@ async def test_different_user_still_uses_sudo(self): """When claude_user is a different user, sudo is still used.""" claude = _make_claude(claude_user="some_other_user") - # Patch pwd.getpwuid to return a fixed value so the test doesn't - # depend on the real system user being different from "some_other_user". + # Patch pwd.getpwuid in config.py (where resolve_claude_user lives) + # to return a fixed value so the test doesn't depend on the real + # system user being different from "some_other_user". mock_pw = MagicMock(pw_name="kai") with ( patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec, - patch("kai.claude.pwd.getpwuid", return_value=mock_pw), + patch("kai.config.pwd.getpwuid", return_value=mock_pw), ): mock_proc = MagicMock() mock_proc.returncode = None @@ -233,9 +234,10 @@ async def test_pwd_keyerror_falls_through_to_sudo(self): claude = _make_claude(claude_user="container_user") # Simulate a container environment where the UID has no passwd entry. + # Patch in config.py where resolve_claude_user() calls pwd.getpwuid. with ( patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec, - patch("kai.claude.pwd.getpwuid", side_effect=KeyError("uid not found")), + patch("kai.config.pwd.getpwuid", side_effect=KeyError("uid not found")), ): mock_proc = MagicMock() mock_proc.returncode = None diff --git a/tests/test_config.py b/tests/test_config.py index 92e98f7..e6d61f3 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,13 +1,14 @@ -"""Tests for config.py load_config(), DATA_DIR, and _read_protected_file().""" +"""Tests for config.py load_config(), DATA_DIR, _read_protected_file(), and resolve_claude_user().""" import os +import pwd import subprocess from pathlib import Path -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest -from kai.config import _read_protected_file, load_config +from kai.config import _read_protected_file, load_config, resolve_claude_user # All env vars that load_config reads _CONFIG_ENV_VARS = [ @@ -528,3 +529,33 @@ def test_enabled(self, monkeypatch): monkeypatch.setenv("ISSUE_TRIAGE_ENABLED", "true") config = load_config() assert config.issue_triage_enabled is True + + +# ── resolve_claude_user() ──────────────────────────────────────────── + + +class TestResolveClaudeUser: + def test_none_returns_none(self): + """None input returns None.""" + assert resolve_claude_user(None) is None + + def test_empty_string_returns_none(self): + """Empty string is falsy, returns None.""" + assert resolve_claude_user("") is None + + def test_self_sudo_returns_none(self): + """When claude_user matches current process user, returns None.""" + try: + current_user = pwd.getpwuid(os.getuid()).pw_name + except KeyError: + pytest.skip("UID has no passwd entry") + assert resolve_claude_user(current_user) is None + + def test_different_user_returns_unchanged(self): + """When claude_user differs from current user, returns it unchanged.""" + assert resolve_claude_user("some_nonexistent_user_xyz") == "some_nonexistent_user_xyz" + + def test_unknown_uid_returns_unchanged(self, monkeypatch): + """When pwd.getpwuid raises KeyError, returns claude_user unchanged.""" + monkeypatch.setattr("kai.config.pwd.getpwuid", MagicMock(side_effect=KeyError("no entry"))) + assert resolve_claude_user("container_user") == "container_user" diff --git a/tests/test_review.py b/tests/test_review.py index ddeaacc..cadaa1b 100644 --- a/tests/test_review.py +++ b/tests/test_review.py @@ -1,6 +1,8 @@ """Tests for review.py PR review agent - metadata, prompts, subprocess, and output.""" import json +import os +import pwd import re import signal from unittest.mock import AsyncMock, MagicMock, patch @@ -556,6 +558,8 @@ async def test_timeout_with_claude_user_kills_group(self): patch("kai.review.asyncio.create_subprocess_exec", return_value=mock_proc), patch("kai.review.asyncio.wait_for", side_effect=TimeoutError()), patch("kai.review.os.killpg") as mock_killpg, + # Ensure resolve_claude_user doesn't skip sudo on this machine + patch("kai.config.pwd.getpwuid", return_value=MagicMock(pw_name="notkai")), pytest.raises(RuntimeError, match="timed out"), ): await run_review("review this code", claude_user="kai") @@ -569,7 +573,10 @@ async def test_claude_user_starts_new_session(self): """claude_user spawns with start_new_session=True for group kill.""" mock_proc = _mock_process(stdout=b"review output") - with patch("kai.review.asyncio.create_subprocess_exec", return_value=mock_proc) as mock_exec: + with ( + patch("kai.review.asyncio.create_subprocess_exec", return_value=mock_proc) as mock_exec, + patch("kai.config.pwd.getpwuid", return_value=MagicMock(pw_name="notkai")), + ): await run_review("prompt", claude_user="kai") kwargs = mock_exec.call_args[1] @@ -591,7 +598,10 @@ async def test_with_claude_user(self): """When claude_user is set, command is prefixed with sudo -u.""" mock_proc = _mock_process(stdout=b"review output") - with patch("kai.review.asyncio.create_subprocess_exec", return_value=mock_proc) as mock_exec: + with ( + patch("kai.review.asyncio.create_subprocess_exec", return_value=mock_proc) as mock_exec, + patch("kai.config.pwd.getpwuid", return_value=MagicMock(pw_name="notkai")), + ): await run_review("prompt", claude_user="kai") cmd = mock_exec.call_args[0] @@ -611,6 +621,47 @@ async def test_without_claude_user(self): assert cmd[0] == "claude" assert "sudo" not in cmd + @pytest.mark.asyncio + async def test_self_sudo_skipped(self): + """When claude_user matches bot user, review skips sudo.""" + try: + current_user = pwd.getpwuid(os.getuid()).pw_name + except KeyError: + pytest.skip("UID has no passwd entry") + mock_proc = _mock_process(stdout=b"review output") + + with patch("kai.review.asyncio.create_subprocess_exec", return_value=mock_proc) as mock_exec: + await run_review("prompt", claude_user=current_user) + + cmd = mock_exec.call_args[0] + assert cmd[0] != "sudo", "Self-sudo should be skipped" + assert cmd[0] == "claude" + kwargs = mock_exec.call_args[1] + assert kwargs["start_new_session"] is False + + @pytest.mark.asyncio + async def test_self_sudo_timeout_kills_directly(self): + """When self-sudo is skipped, timeout kills proc directly (not killpg).""" + try: + current_user = pwd.getpwuid(os.getuid()).pw_name + except KeyError: + pytest.skip("UID has no passwd entry") + mock_proc = AsyncMock() + mock_proc.pid = 12345 + mock_proc.communicate = AsyncMock(side_effect=TimeoutError) + mock_proc.wait = AsyncMock() + + with ( + patch("kai.review.asyncio.create_subprocess_exec", return_value=mock_proc), + patch("os.killpg") as mock_killpg, + pytest.raises(RuntimeError, match="timed out"), + ): + await run_review("prompt", claude_user=current_user) + + # Should kill the process directly, not the process group + mock_proc.kill.assert_called_once() + mock_killpg.assert_not_called() + @pytest.mark.asyncio async def test_prompt_sent_via_stdin(self): """The review prompt is sent to the subprocess via stdin, not as an argument.""" diff --git a/tests/test_triage.py b/tests/test_triage.py index 3adbc4b..ea1b635 100644 --- a/tests/test_triage.py +++ b/tests/test_triage.py @@ -1,9 +1,11 @@ """Tests for triage.py issue triage pipeline.""" import json +import os +import pwd import re import signal -from unittest.mock import AsyncMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -338,6 +340,8 @@ async def test_timeout_with_claude_user_kills_group(self): with ( patch("kai.triage.asyncio.create_subprocess_exec", return_value=mock_proc), patch("kai.triage.os.killpg") as mock_killpg, + # Ensure resolve_claude_user doesn't skip sudo on this machine + patch("kai.config.pwd.getpwuid", return_value=MagicMock(pw_name="notkai")), pytest.raises(RuntimeError, match="timed out"), ): await run_triage("test prompt", claude_user="kai") @@ -350,7 +354,10 @@ async def test_claude_user_starts_new_session(self): """claude_user spawns with start_new_session=True.""" mock_proc = _mock_subprocess(returncode=0, stdout='{"labels": []}') - with patch("kai.triage.asyncio.create_subprocess_exec", return_value=mock_proc) as mock_exec: + with ( + patch("kai.triage.asyncio.create_subprocess_exec", return_value=mock_proc) as mock_exec, + patch("kai.config.pwd.getpwuid", return_value=MagicMock(pw_name="notkai")), + ): await run_triage("prompt", claude_user="kai") kwargs = mock_exec.call_args[1] @@ -367,6 +374,47 @@ async def test_no_claude_user_no_new_session(self): kwargs = mock_exec.call_args[1] assert kwargs.get("start_new_session") is False + @pytest.mark.asyncio + async def test_self_sudo_skipped(self): + """When claude_user matches bot user, triage skips sudo.""" + try: + current_user = pwd.getpwuid(os.getuid()).pw_name + except KeyError: + pytest.skip("UID has no passwd entry") + mock_proc = _mock_subprocess(returncode=0, stdout='{"labels": [], "priority": "low", "summary": "test"}') + + with patch("kai.triage.asyncio.create_subprocess_exec", return_value=mock_proc) as mock_exec: + await run_triage("prompt", claude_user=current_user) + + cmd = mock_exec.call_args[0] + assert cmd[0] != "sudo", "Self-sudo should be skipped" + assert cmd[0] == "claude" + kwargs = mock_exec.call_args[1] + assert kwargs["start_new_session"] is False + + @pytest.mark.asyncio + async def test_self_sudo_timeout_kills_directly(self): + """When self-sudo is skipped, timeout kills proc directly (not killpg).""" + try: + current_user = pwd.getpwuid(os.getuid()).pw_name + except KeyError: + pytest.skip("UID has no passwd entry") + mock_proc = AsyncMock() + mock_proc.pid = 12345 + mock_proc.communicate = AsyncMock(side_effect=TimeoutError) + mock_proc.wait = AsyncMock() + + with ( + patch("kai.triage.asyncio.create_subprocess_exec", return_value=mock_proc), + patch("os.killpg") as mock_killpg, + pytest.raises(RuntimeError, match="timed out"), + ): + await run_triage("prompt", claude_user=current_user) + + # Should kill the process directly, not the process group + mock_proc.kill.assert_called_once() + mock_killpg.assert_not_called() + @pytest.mark.asyncio async def test_nonzero_exit(self): """Non-zero exit code raises RuntimeError.""" From aff7d77465d21c341f592ee20bfb22fdb69e7b36 Mon Sep 17 00:00:00 2001 From: Daniel Ellison Date: Fri, 27 Mar 2026 14:09:50 -0400 Subject: [PATCH 2/3] Fix review: remove duplicate log, use module-scoped patches, match timeout pattern --- src/kai/config.py | 2 -- tests/test_review.py | 4 ++-- tests/test_triage.py | 4 ++-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/kai/config.py b/src/kai/config.py index d41c85e..b302410 100644 --- a/src/kai/config.py +++ b/src/kai/config.py @@ -50,8 +50,6 @@ # ── Self-sudo resolution ───────────────────────────────────────────── -log = logging.getLogger(__name__) - def resolve_claude_user(claude_user: str | None) -> str | None: """ diff --git a/tests/test_review.py b/tests/test_review.py index cadaa1b..4131b6c 100644 --- a/tests/test_review.py +++ b/tests/test_review.py @@ -648,12 +648,12 @@ async def test_self_sudo_timeout_kills_directly(self): pytest.skip("UID has no passwd entry") mock_proc = AsyncMock() mock_proc.pid = 12345 - mock_proc.communicate = AsyncMock(side_effect=TimeoutError) mock_proc.wait = AsyncMock() with ( patch("kai.review.asyncio.create_subprocess_exec", return_value=mock_proc), - patch("os.killpg") as mock_killpg, + patch("kai.review.asyncio.wait_for", side_effect=TimeoutError()), + patch("kai.review.os.killpg") as mock_killpg, pytest.raises(RuntimeError, match="timed out"), ): await run_review("prompt", claude_user=current_user) diff --git a/tests/test_triage.py b/tests/test_triage.py index ea1b635..c409da4 100644 --- a/tests/test_triage.py +++ b/tests/test_triage.py @@ -401,12 +401,12 @@ async def test_self_sudo_timeout_kills_directly(self): pytest.skip("UID has no passwd entry") mock_proc = AsyncMock() mock_proc.pid = 12345 - mock_proc.communicate = AsyncMock(side_effect=TimeoutError) mock_proc.wait = AsyncMock() with ( patch("kai.triage.asyncio.create_subprocess_exec", return_value=mock_proc), - patch("os.killpg") as mock_killpg, + patch("kai.triage.asyncio.wait_for", side_effect=TimeoutError()), + patch("kai.triage.os.killpg") as mock_killpg, pytest.raises(RuntimeError, match="timed out"), ): await run_triage("prompt", claude_user=current_user) From e7d88fdbb440e82937cb4b859fc72167767ac115 Mon Sep 17 00:00:00 2001 From: Daniel Ellison Date: Fri, 27 Mar 2026 14:17:33 -0400 Subject: [PATCH 3/3] Make test_different_user_returns_unchanged hermetic with monkeypatch --- tests/test_config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index e6d61f3..d40efed 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -551,9 +551,10 @@ def test_self_sudo_returns_none(self): pytest.skip("UID has no passwd entry") assert resolve_claude_user(current_user) is None - def test_different_user_returns_unchanged(self): + def test_different_user_returns_unchanged(self, monkeypatch): """When claude_user differs from current user, returns it unchanged.""" - assert resolve_claude_user("some_nonexistent_user_xyz") == "some_nonexistent_user_xyz" + monkeypatch.setattr("kai.config.pwd.getpwuid", MagicMock(return_value=MagicMock(pw_name="localuser"))) + assert resolve_claude_user("some_other_user") == "some_other_user" def test_unknown_uid_returns_unchanged(self, monkeypatch): """When pwd.getpwuid raises KeyError, returns claude_user unchanged."""