From d63b85872eadd20bd3aea75e2e0e688930cd050a Mon Sep 17 00:00:00 2001 From: jp Date: Tue, 2 Jun 2026 13:37:55 -0300 Subject: [PATCH 1/4] Harden macOS OrbStack runtime recovery --- .github/workflows/fresh-host-core.yml | 35 ++- tests/suites/unit/ci/test_hosted_docker.py | 199 +++++++++++- .../helpers/_hosted_docker/diagnostics.py | 118 +++++-- tests/utils/helpers/_hosted_docker/models.py | 30 +- tests/utils/helpers/_hosted_docker/runtime.py | 287 +++++++++++++++++- tests/utils/helpers/hosted_docker.py | 2 + 6 files changed, 624 insertions(+), 47 deletions(-) diff --git a/.github/workflows/fresh-host-core.yml b/.github/workflows/fresh-host-core.yml index 7d683c0..b16a67a 100644 --- a/.github/workflows/fresh-host-core.yml +++ b/.github/workflows/fresh-host-core.yml @@ -130,19 +130,28 @@ jobs: - name: Set up Docker via OrbStack # v1.6.0+ panics on GHA macos-15-intel (Skylake CPU check); v1.5.1 is safe. # Placed first to overlap OrbStack startup with toolchain setup (~90s saved). - run: >- - test -f /tmp/orbstack.dmg || - curl -fsSL - "https://cdn-updates.orbstack.dev/amd64/OrbStack_v1.5.1_16857_amd64.dmg" - -o /tmp/orbstack.dmg && - hdiutil attach -quiet -nobrowse -mountpoint /tmp/orbstack_mnt /tmp/orbstack.dmg && - cp -R /tmp/orbstack_mnt/OrbStack.app /Applications/ && - hdiutil detach -quiet /tmp/orbstack_mnt && - sudo ln -sf /Applications/OrbStack.app/Contents/MacOS/bin/orb /usr/local/bin/orb && - HOMEBREW_NO_AUTO_UPDATE=1 HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 - brew install --quiet docker docker-compose && - mkdir -p ~/.docker/cli-plugins && - ln -sfn "$(brew --prefix)/bin/docker-compose" ~/.docker/cli-plugins/docker-compose && + env: + ORBSTACK_DMG_PATH: /tmp/orbstack.dmg + ORBSTACK_DMG_SHA256: fb95108ded54a27603b68184a13f7e666e0e758167652c0b65cd4dc5eff94617 + ORBSTACK_DMG_URL: https://cdn-updates.orbstack.dev/amd64/OrbStack_v1.5.1_16857_amd64.dmg + run: | + set -euo pipefail + if [[ ! -f "${ORBSTACK_DMG_PATH}" ]]; then + curl -fsSL "${ORBSTACK_DMG_URL}" -o "${ORBSTACK_DMG_PATH}" + fi + actual_sha="$(shasum -a 256 "${ORBSTACK_DMG_PATH}" | awk '{print $1}')" + if [[ "${actual_sha}" != "${ORBSTACK_DMG_SHA256}" ]]; then + echo "OrbStack DMG checksum mismatch: expected ${ORBSTACK_DMG_SHA256}, got ${actual_sha}" >&2 + rm -f "${ORBSTACK_DMG_PATH}" + exit 1 + fi + hdiutil attach -quiet -nobrowse -mountpoint /tmp/orbstack_mnt "${ORBSTACK_DMG_PATH}" + cp -R /tmp/orbstack_mnt/OrbStack.app /Applications/ + hdiutil detach -quiet /tmp/orbstack_mnt + sudo ln -sf /Applications/OrbStack.app/Contents/MacOS/bin/orb /usr/local/bin/orb + HOMEBREW_NO_AUTO_UPDATE=1 HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 brew install --quiet docker docker-compose + mkdir -p ~/.docker/cli-plugins + ln -sfn "$(brew --prefix)/bin/docker-compose" ~/.docker/cli-plugins/docker-compose nohup orb start > /tmp/orb-start.log 2>&1 & - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 with: diff --git a/tests/suites/unit/ci/test_hosted_docker.py b/tests/suites/unit/ci/test_hosted_docker.py index 2fc5a2d..8ca6e70 100644 --- a/tests/suites/unit/ci/test_hosted_docker.py +++ b/tests/suites/unit/ci/test_hosted_docker.py @@ -4,6 +4,7 @@ import json import subprocess +import sys from pathlib import Path from typing import Any, Protocol, cast @@ -35,6 +36,10 @@ def __call__(self, *, cwd: Path, env: dict[str, str], max_attempts: int = 60) -> _WaitForDockerReady, cast(Any, hosted_docker)._wait_for_docker_ready, ) +_ORBSTACK_ONLY = pytest.mark.skipif( + sys.platform != "darwin", + reason="OrbStack is only available on macOS.", +) def _sleep(_: float) -> None: @@ -581,6 +586,7 @@ def fake_run_command( assert attempts["count"] == 3 +@_ORBSTACK_ONLY def test_collect_runtime_diagnostics_uses_compose_probe_env( tmp_path: Path, test_context: TestContext, @@ -591,6 +597,7 @@ def test_collect_runtime_diagnostics_uses_compose_probe_env( workspace = tmp_path / "workspace" workspace.mkdir() test_context.env.set("GITHUB_EVENT_NAME", "push") + test_context.env.set("DOCKER_HOST", "unix:///tmp/orbstack-test.sock") context = fresh_host.prepare_context( scenario_id="macos-sidecars", @@ -637,12 +644,21 @@ def fake_sysctl_int(name: str) -> int | None: hosted_docker.collect_runtime_diagnostics(Path(context.context_path)) + command_names = {" ".join(command) for command, _env in commands} + assert "docker context ls" in command_names + assert "orb status" in command_names + assert "orb logs" in command_names compose_commands = [ env for command, env in commands if command[:3] == ["docker", "compose", "-f"] ] assert compose_commands assert all(env["NEO4J_PASSWORD"] == "runtime-secret" for env in compose_commands) assert all("COMPOSE_PROJECT_NAME" in env for env in compose_commands) + socket_state = ( + Path(context.diagnostics_dir) / "docker-socket-state.txt" + ).read_text(encoding="utf-8") + assert "DOCKER_HOST=unix:///tmp/orbstack-test.sock" in socket_state + assert "path=/tmp/orbstack-test.sock" in socket_state def test_wait_runtime_ready_rejects_non_macos_context( @@ -668,6 +684,7 @@ def test_wait_runtime_ready_rejects_non_macos_context( hosted_docker_runtime.wait_runtime_ready(Path(context.context_path)) +@_ORBSTACK_ONLY def test_wait_runtime_ready_sets_orbstack_socket_when_docker_host_unset( tmp_path: Path, test_context: TestContext, @@ -693,7 +710,9 @@ def test_wait_runtime_ready_sets_orbstack_socket_when_docker_host_unset( def fake_wait_for_docker_ready( *, cwd: Path, env: dict[str, str], max_attempts: int = 60 ) -> None: - captured.append({"DOCKER_HOST": env.get("DOCKER_HOST", ""), "max_attempts": str(max_attempts)}) # type: ignore[dict-item] + captured.append( + {"DOCKER_HOST": env.get("DOCKER_HOST", ""), "max_attempts": str(max_attempts)} + ) def fake_run_checked( command: list[str], @@ -736,6 +755,184 @@ def fake_sysctl_int(name: str) -> int | None: assert written_env.get("DOCKER_HOST") == expected_docker_host +@_ORBSTACK_ONLY +def test_wait_runtime_ready_recovers_orbstack_with_stop_start_fallback( + tmp_path: Path, + test_context: TestContext, +) -> None: + """wait_runtime_ready should recover OrbStack when Docker reports socket EOF.""" + github_env = tmp_path / "github.env" + runner_temp = tmp_path / "runner-temp" + workspace = tmp_path / "workspace" + workspace.mkdir() + test_context.env.set("GITHUB_EVENT_NAME", "push") + test_context.env.set("DOCKER_HOST", "unix:///tmp/orbstack-test.sock") + + context = fresh_host.prepare_context( + scenario_id="macos-sidecars", + repo_root=workspace, + runner_temp=runner_temp, + workspace=workspace, + github_env_file=github_env, + ) + wait_calls = 0 + recovery_commands: list[list[str]] = [] + + def fake_wait_for_docker_ready( + *, cwd: Path, env: dict[str, str], max_attempts: int = 60 + ) -> None: + nonlocal wait_calls + del cwd, env + wait_calls += 1 + if wait_calls == 1: + raise fresh_host.FreshHostError("error during connect: EOF") + assert max_attempts == 90 + + def fake_runtime_run_command( + command: list[str], + *, + cwd: Path, + env: dict[str, str], + timeout_seconds: int = 3600, + capture_output: bool = False, + ) -> subprocess.CompletedProcess[str]: + del cwd, env, timeout_seconds, capture_output + recovery_commands.append(command) + if command == ["orb", "restart", "docker"]: + return subprocess.CompletedProcess(command, 1, stdout="", stderr="restart failed") + return subprocess.CompletedProcess(command, 0, stdout="ok", stderr="") + + def fake_diagnostics_run_command( + command: list[str], + *, + cwd: Path, + env: dict[str, str], + timeout_seconds: int = 3600, + capture_output: bool = False, + ) -> subprocess.CompletedProcess[str]: + del cwd, env, timeout_seconds, capture_output + return subprocess.CompletedProcess(command, 0, stdout="ok", stderr="") + + def fake_run_checked( + command: list[str], + *, + cwd: Path, + env: dict[str, str], + timeout_seconds: int = 3600, + capture_output: bool = False, + ) -> subprocess.CompletedProcess[str]: + del cwd, env, timeout_seconds, capture_output + return subprocess.CompletedProcess(command, 0, stdout="ok", stderr="") + + def fake_sysctl_int(name: str) -> int | None: + return 4 if name == "hw.ncpu" else 8 * 1073741824 + + test_context.patch.patch_object( + hosted_docker_runtime, "wait_for_docker_ready", new=fake_wait_for_docker_ready + ) + test_context.patch.patch_object( + hosted_docker_runtime, "run_command", new=fake_runtime_run_command + ) + test_context.patch.patch_object(hosted_docker_runtime, "run_checked", new=fake_run_checked) + test_context.patch.patch_object(hosted_docker_runtime.time, "sleep", new=_sleep) + test_context.patch.patch_object(hosted_docker_runtime, "sysctl_int", new=fake_sysctl_int) + test_context.patch.patch_object( + hosted_docker_diagnostics, "run_command", new=fake_diagnostics_run_command + ) + test_context.patch.patch_object( + hosted_docker_diagnostics, "sysctl_int", new=fake_sysctl_int + ) + + report = hosted_docker_runtime.wait_runtime_ready(Path(context.context_path)) + + assert report.failure_reason is None + assert report.failure_phase is None + assert report.recovery_attempt_count == 1 + assert report.recovery_attempts[0].status == "success" + assert report.recovery_attempts[0].trigger_reason == "docker_socket_eof" + assert recovery_commands == [ + ["orb", "restart", "docker"], + ["orb", "stop"], + ["orb", "start"], + ] + assert (Path(context.diagnostics_dir) / "runtime-recovery/attempt-01/before").is_dir() + assert (Path(context.diagnostics_dir) / "runtime-recovery/attempt-01/after").is_dir() + payload = json.loads(Path(context.runtime_report_path or "").read_text(encoding="utf-8")) + assert payload["recovery_attempt_count"] == 1 + assert payload["recovery_attempts"][0]["recovery_exit_code"] == 0 + + +@_ORBSTACK_ONLY +def test_wait_runtime_ready_reports_exhausted_orbstack_recovery( + tmp_path: Path, + test_context: TestContext, +) -> None: + """wait_runtime_ready should fail with structured recovery metadata.""" + github_env = tmp_path / "github.env" + runner_temp = tmp_path / "runner-temp" + workspace = tmp_path / "workspace" + workspace.mkdir() + test_context.env.set("GITHUB_EVENT_NAME", "push") + test_context.env.set("DOCKER_HOST", "unix:///tmp/orbstack-test.sock") + + context = fresh_host.prepare_context( + scenario_id="macos-sidecars", + repo_root=workspace, + runner_temp=runner_temp, + workspace=workspace, + github_env_file=github_env, + ) + + def fake_wait_for_docker_ready( + *, cwd: Path, env: dict[str, str], max_attempts: int = 60 + ) -> None: + del cwd, env, max_attempts + raise fresh_host.FreshHostError("error during connect: EOF") + + def fake_run_command( + command: list[str], + *, + cwd: Path, + env: dict[str, str], + timeout_seconds: int = 3600, + capture_output: bool = False, + ) -> subprocess.CompletedProcess[str]: + del cwd, env, timeout_seconds, capture_output + return subprocess.CompletedProcess(command, 0, stdout="ok", stderr="") + + def fake_sysctl_int(name: str) -> int | None: + return 4 if name == "hw.ncpu" else 8 * 1073741824 + + test_context.patch.patch_object( + hosted_docker_runtime, "wait_for_docker_ready", new=fake_wait_for_docker_ready + ) + test_context.patch.patch_object(hosted_docker_runtime, "run_command", new=fake_run_command) + test_context.patch.patch_object(hosted_docker_runtime.time, "sleep", new=_sleep) + test_context.patch.patch_object(hosted_docker_runtime, "sysctl_int", new=fake_sysctl_int) + test_context.patch.patch_object( + hosted_docker_diagnostics, "run_command", new=fake_run_command + ) + test_context.patch.patch_object( + hosted_docker_diagnostics, "sysctl_int", new=fake_sysctl_int + ) + + with pytest.raises( + fresh_host.FreshHostError, + match="orbstack_recovery_failed: docker_socket_eof", + ): + hosted_docker_runtime.wait_runtime_ready(Path(context.context_path)) + + payload = json.loads(Path(context.runtime_report_path or "").read_text(encoding="utf-8")) + assert payload["failure_reason"] == "orbstack_recovery_failed" + assert payload["failure_phase"] == "post_recovery_probe" + assert payload["recovery_attempt_count"] == 2 + assert [attempt["status"] for attempt in payload["recovery_attempts"]] == [ + "failure", + "failure", + ] + assert payload["recovery_attempts"][-1]["readiness_reason"] == "docker_socket_eof" + + def test_run_checked_handles_none_outputs_when_capture_output_is_false( tmp_path: Path, test_context: TestContext, diff --git a/tests/utils/helpers/_hosted_docker/diagnostics.py b/tests/utils/helpers/_hosted_docker/diagnostics.py index 6c1468f..799823d 100644 --- a/tests/utils/helpers/_hosted_docker/diagnostics.py +++ b/tests/utils/helpers/_hosted_docker/diagnostics.py @@ -6,35 +6,88 @@ import subprocess from pathlib import Path +from tests.utils.helpers._fresh_host.models import FreshHostContext from tests.utils.helpers._fresh_host.shell import compose_probe_env, phase_env from tests.utils.helpers._fresh_host.storage import load_context from tests.utils.helpers._hosted_docker.shell import run_command, sysctl_int -def collect_runtime_diagnostics(context_path: Path) -> None: - """Collect best-effort runtime diagnostics for hosted macOS.""" - context = load_context(context_path) +def _socket_state(env: dict[str, str]) -> str: + """Return diagnostic state for Docker socket paths.""" + docker_host = env.get("DOCKER_HOST") or os.environ.get("DOCKER_HOST", "") + socket_paths: list[Path] = [] + if docker_host.startswith("unix://"): + socket_paths.append(Path(docker_host.removeprefix("unix://"))) + canonical_orbstack_socket = Path.home() / ".orbstack" / "run" / "docker.sock" + if canonical_orbstack_socket not in socket_paths: + socket_paths.append(canonical_orbstack_socket) + + lines = [f"DOCKER_HOST={docker_host or '(not set)'}"] + for socket_path in socket_paths: + lines.append(f"path={socket_path}") + lines.append(f"exists={socket_path.exists()}") + lines.append(f"is_socket={socket_path.is_socket()}") + return "\n".join(lines) + "\n" + + +def _diagnostic_command_env( + context: FreshHostContext, + *, + command: list[str], + base_env: dict[str, str], +) -> dict[str, str]: + """Return the environment for one runtime diagnostic command.""" + if len(command) < 4 or command[:3] != ["docker", "compose", "-f"]: + return base_env + + command_env = compose_probe_env( + context, + compose_file=Path(command[3]), + repo_local_state=True, + ) + for key in ("DOCKER_CONFIG", "DOCKER_HOST", "PATH"): + if key in base_env: + command_env[key] = base_env[key] + return command_env + + +def collect_runtime_diagnostics_for_context( + context: FreshHostContext, + diagnostics_dir: Path, + *, + env: dict[str, str] | None = None, +) -> None: + """Collect best-effort runtime diagnostics for one hosted macOS context.""" if context.platform != "macos": return - diagnostics_dir = Path(context.diagnostics_dir).resolve() - diagnostics_dir.mkdir(parents=True, exist_ok=True) + resolved_diagnostics_dir = diagnostics_dir.resolve() + resolved_diagnostics_dir.mkdir(parents=True, exist_ok=True) repo_root = Path(context.repo_root).resolve() - env = phase_env(context) + base_env = dict(env) if env is not None else phase_env(context) commands = { - diagnostics_dir / "docker-info.txt": ["docker", "info"], - diagnostics_dir / "docker-system-df.txt": ["docker", "system", "df"], - diagnostics_dir / "docker-images.jsonl": ["docker", "images", "--format", "{{json .}}"], + resolved_diagnostics_dir / "docker-info.txt": ["docker", "info"], + resolved_diagnostics_dir / "docker-context-ls.txt": ["docker", "context", "ls"], + resolved_diagnostics_dir / "docker-system-df.txt": ["docker", "system", "df"], + resolved_diagnostics_dir / "docker-ps-all.txt": ["docker", "ps", "-a"], + resolved_diagnostics_dir / "docker-images.jsonl": [ + "docker", + "images", + "--format", + "{{json .}}", + ], + resolved_diagnostics_dir / "orb-status.txt": ["orb", "status"], + resolved_diagnostics_dir / "orb-logs.txt": ["orb", "logs"], } if context.compose_files: primary_compose_file = context.compose_files[0] - commands[diagnostics_dir / "compose-ps.txt"] = [ + commands[resolved_diagnostics_dir / "compose-ps.txt"] = [ "docker", "compose", "-f", primary_compose_file, "ps", ] - commands[diagnostics_dir / "compose-logs.txt"] = [ + commands[resolved_diagnostics_dir / "compose-logs.txt"] = [ "docker", "compose", "-f", @@ -43,14 +96,10 @@ def collect_runtime_diagnostics(context_path: Path) -> None: "--no-color", ] for output_path, command in commands.items(): - command_env = ( - compose_probe_env( - context, - compose_file=Path(command[3]), - repo_local_state=True, - ) - if len(command) >= 4 and command[:3] == ["docker", "compose", "-f"] - else env + command_env = _diagnostic_command_env( + context, + command=command, + base_env=base_env, ) try: completed = run_command( @@ -71,10 +120,37 @@ def collect_runtime_diagnostics(context_path: Path) -> None: encoding="utf-8", ) for output_path, content in { - diagnostics_dir / "host-cpu-count.txt": str(sysctl_int("hw.ncpu") or ""), - diagnostics_dir / "host-memory-bytes.txt": str(sysctl_int("hw.memsize") or ""), + resolved_diagnostics_dir / "host-cpu-count.txt": str(sysctl_int("hw.ncpu") or ""), + resolved_diagnostics_dir / "host-memory-bytes.txt": str( + sysctl_int("hw.memsize") or "" + ), }.items(): output_path.write_text(f"{content}\n", encoding="utf-8") + (resolved_diagnostics_dir / "docker-socket-state.txt").write_text( + _socket_state(base_env), + encoding="utf-8", + ) + orb_start_log = Path("/tmp/orb-start.log") + if orb_start_log.is_file(): + try: + orb_start_content = orb_start_log.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + orb_start_content = f"failed to read {orb_start_log}: {exc}\n" + (resolved_diagnostics_dir / "orb-start.log").write_text( + orb_start_content, + encoding="utf-8", + ) + + +def collect_runtime_diagnostics(context_path: Path) -> None: + """Collect best-effort runtime diagnostics for hosted macOS.""" + context = load_context(context_path) + diagnostics_dir = Path(context.diagnostics_dir).resolve() + collect_runtime_diagnostics_for_context(context, diagnostics_dir) + if context.platform != "macos": + return + repo_root = Path(context.repo_root).resolve() + env = phase_env(context) cache_root = os.environ.get("FRESH_HOST_CACHE_ROOT", "") if cache_root: output_path = diagnostics_dir / "workflow-cache-usage.txt" diff --git a/tests/utils/helpers/_hosted_docker/models.py b/tests/utils/helpers/_hosted_docker/models.py index d2086cb..3848588 100644 --- a/tests/utils/helpers/_hosted_docker/models.py +++ b/tests/utils/helpers/_hosted_docker/models.py @@ -2,7 +2,7 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Final LOG_PREFIX: Final[str] = "[hosted-docker]" @@ -21,6 +21,29 @@ class PullReport: retried_images: list[str] +@dataclass(slots=True) +class RuntimeRecoveryAttemptReport: + """Structured report describing one runtime recovery attempt.""" + + attempt: int + trigger_reason: str + status: str + recovery_commands: list[list[str]] + recovery_exit_code: int | None + recovery_timed_out: bool + readiness_reason: str | None + before_diagnostics_dir: str + after_diagnostics_dir: str + recovery_output_path: str + started_at: str + finished_at: str + + +def _empty_recovery_attempts() -> list[RuntimeRecoveryAttemptReport]: + """Return a typed empty recovery-attempt list.""" + return [] + + @dataclass(slots=True) class RuntimeInstallReport: """Structured report describing runtime installation.""" @@ -37,6 +60,11 @@ class RuntimeInstallReport: finished_at: str duration_seconds: float created_at: str + failure_phase: str | None = None + recovery_attempt_count: int = 0 + recovery_attempts: list[RuntimeRecoveryAttemptReport] = field( + default_factory=_empty_recovery_attempts + ) @dataclass(slots=True) diff --git a/tests/utils/helpers/_hosted_docker/runtime.py b/tests/utils/helpers/_hosted_docker/runtime.py index c1cdc0e..3ac30db 100644 --- a/tests/utils/helpers/_hosted_docker/runtime.py +++ b/tests/utils/helpers/_hosted_docker/runtime.py @@ -3,21 +3,264 @@ from __future__ import annotations import os +import subprocess import time from dataclasses import asdict from pathlib import Path -from tests.utils.helpers._fresh_host.models import FreshHostError +from tests.utils.helpers._fresh_host.models import FreshHostContext, FreshHostError from tests.utils.helpers._fresh_host.storage import load_context, load_report, write_report +from tests.utils.helpers._hosted_docker.diagnostics import collect_runtime_diagnostics_for_context from tests.utils.helpers._hosted_docker.io import log, now_iso, write_github_env, write_json -from tests.utils.helpers._hosted_docker.models import RuntimeInstallReport +from tests.utils.helpers._hosted_docker.models import ( + RuntimeInstallReport, + RuntimeRecoveryAttemptReport, +) from tests.utils.helpers._hosted_docker.shell import ( macos_env, run_checked, + run_command, sysctl_int, wait_for_docker_ready, ) +_INITIAL_DOCKER_READY_ATTEMPTS = 300 +_POST_RECOVERY_DOCKER_READY_ATTEMPTS = 90 +_RECOVERY_BACKOFF_SECONDS = (5, 15) +_RECOVERY_COMMAND_TIMEOUT_SECONDS = 120 +_READINESS_PROBE_TIMEOUT_SECONDS = 15 +_RECOVERABLE_RUNTIME_REASONS = { + "docker_not_ready", + "docker_probe_timeout", + "docker_socket_eof", + "orbstack_socket_missing", +} +_RUNTIME_VALIDATION_COMMANDS = ( + ["docker", "version"], + ["docker", "compose", "version"], + ["docker", "info"], +) + + +def _docker_socket_path(docker_host: str) -> Path | None: + """Return the local Unix socket path from DOCKER_HOST when present.""" + if not docker_host.startswith("unix://"): + return None + return Path(docker_host.removeprefix("unix://")) + + +def _classify_failure_text(text: str) -> str | None: + """Map Docker/OrbStack command output to one structured failure reason.""" + lowered = text.lower() + if not lowered.strip(): + return None + if "eof" in lowered: + return "docker_socket_eof" + if "timed out" in lowered or "timeout" in lowered: + return "docker_probe_timeout" + if "docker did not become ready" in lowered: + return "docker_not_ready" + if "cannot connect" in lowered or "connection refused" in lowered: + return "docker_not_ready" + if "daemon" in lowered and ("unavailable" in lowered or "not running" in lowered): + return "docker_not_ready" + if "no such file or directory" in lowered and "docker" in lowered: + return "docker_binary_missing" + return None + + +def _classify_runtime_failure(*, cwd: Path, env: dict[str, str], exc: BaseException) -> str: + """Return a structured runtime failure reason.""" + if isinstance(exc, subprocess.TimeoutExpired): + return "docker_probe_timeout" + if isinstance(exc, FileNotFoundError): + return "docker_binary_missing" + + exception_reason = _classify_failure_text(str(exc)) + if exception_reason is not None and exception_reason != "docker_not_ready": + return exception_reason + + docker_host = env.get("DOCKER_HOST") or os.environ.get("DOCKER_HOST", "") + socket_path = _docker_socket_path(docker_host) + if socket_path is not None and (not socket_path.exists() or not socket_path.is_socket()): + return "orbstack_socket_missing" + + try: + completed = run_command( + ["docker", "info"], + cwd=cwd, + env=env, + timeout_seconds=_READINESS_PROBE_TIMEOUT_SECONDS, + capture_output=True, + ) + except subprocess.TimeoutExpired: + return "docker_probe_timeout" + except FileNotFoundError: + return "docker_binary_missing" + except OSError as probe_exc: + return _classify_failure_text(str(probe_exc)) or "docker_not_ready" + + if completed.returncode == 0: + return exception_reason or "runtime_tool_validation_failed" + probe_text = "\n".join( + chunk for chunk in (completed.stdout.strip(), completed.stderr.strip()) if chunk + ) + return _classify_failure_text(probe_text) or exception_reason or "docker_not_ready" + + +def _validate_runtime_ready( + *, + cwd: Path, + env: dict[str, str], + max_attempts: int, +) -> None: + """Verify Docker readiness and required CLI tooling.""" + wait_for_docker_ready(cwd=cwd, env=env, max_attempts=max_attempts) + for command in _RUNTIME_VALIDATION_COMMANDS: + run_checked(command, cwd=cwd, env=env, timeout_seconds=120) + + +def _write_recovery_command_output( + *, + output_path: Path, + command: list[str], + completed: subprocess.CompletedProcess[str] | None = None, + exc: BaseException | None = None, +) -> None: + """Append one recovery command result to the attempt command artifact.""" + output_path.parent.mkdir(parents=True, exist_ok=True) + lines = [f"$ {' '.join(command)}"] + if completed is not None: + lines.append(f"exit_code={completed.returncode}") + stdout = (completed.stdout or "").strip() + stderr = (completed.stderr or "").strip() + if stdout: + lines.extend(("stdout:", stdout)) + if stderr: + lines.extend(("stderr:", stderr)) + if exc is not None: + lines.append(f"error={exc}") + with output_path.open("a", encoding="utf-8") as stream: + stream.write("\n".join(lines) + "\n\n") + + +def _run_recovery_commands( + *, + cwd: Path, + env: dict[str, str], + trigger_reason: str, + output_path: Path, +) -> tuple[list[list[str]], int | None, bool]: + """Run bounded OrbStack recovery commands and capture their output.""" + primary_command = ( + ["orb", "start"] + if trigger_reason == "orbstack_socket_missing" + else ["orb", "restart", "docker"] + ) + commands = [primary_command] + exit_code: int | None = None + timed_out = False + + for command in commands: + try: + completed = run_command( + command, + cwd=cwd, + env=env, + timeout_seconds=_RECOVERY_COMMAND_TIMEOUT_SECONDS, + capture_output=True, + ) + except subprocess.TimeoutExpired as exc: + timed_out = True + _write_recovery_command_output(output_path=output_path, command=command, exc=exc) + exit_code = None + break + except OSError as exc: + _write_recovery_command_output(output_path=output_path, command=command, exc=exc) + exit_code = None + break + + exit_code = completed.returncode + _write_recovery_command_output( + output_path=output_path, + command=command, + completed=completed, + ) + if completed.returncode == 0 and command != ["orb", "stop"]: + return commands, exit_code, timed_out + if command == ["orb", "restart", "docker"]: + fallback_commands = [["orb", "stop"], ["orb", "start"]] + commands.extend(fallback_commands) + + return commands, exit_code, timed_out + + +def _recover_orbstack_runtime( + *, + context: FreshHostContext, + repo_root: Path, + env: dict[str, str], + trigger_reason: str, +) -> tuple[bool, list[RuntimeRecoveryAttemptReport], str | None]: + """Attempt bounded OrbStack recovery and return structured attempt reports.""" + attempts: list[RuntimeRecoveryAttemptReport] = [] + last_readiness_reason: str | None = trigger_reason + recovery_root = Path(context.diagnostics_dir).resolve() / "runtime-recovery" + + for attempt_number, backoff_seconds in enumerate(_RECOVERY_BACKOFF_SECONDS, start=1): + attempt_root = recovery_root / f"attempt-{attempt_number:02d}" + before_dir = attempt_root / "before" + after_dir = attempt_root / "after" + output_path = attempt_root / "recovery-command.txt" + started_at = now_iso() + log( + "OrbStack recovery attempt " + f"{attempt_number}/{len(_RECOVERY_BACKOFF_SECONDS)} " + f"triggered by {last_readiness_reason or trigger_reason}." + ) + collect_runtime_diagnostics_for_context(context, before_dir, env=env) + commands, exit_code, timed_out = _run_recovery_commands( + cwd=repo_root, + env=env, + trigger_reason=last_readiness_reason or trigger_reason, + output_path=output_path, + ) + time.sleep(backoff_seconds) + try: + _validate_runtime_ready( + cwd=repo_root, + env=env, + max_attempts=_POST_RECOVERY_DOCKER_READY_ATTEMPTS, + ) + except Exception as exc: # noqa: BLE001 + last_readiness_reason = _classify_runtime_failure(cwd=repo_root, env=env, exc=exc) + status = "failure" + else: + last_readiness_reason = None + status = "success" + collect_runtime_diagnostics_for_context(context, after_dir, env=env) + finished_at = now_iso() + attempts.append( + RuntimeRecoveryAttemptReport( + attempt=attempt_number, + trigger_reason=trigger_reason, + status=status, + recovery_commands=commands, + recovery_exit_code=exit_code, + recovery_timed_out=timed_out, + readiness_reason=last_readiness_reason, + before_diagnostics_dir=str(before_dir), + after_diagnostics_dir=str(after_dir), + recovery_output_path=str(output_path), + started_at=started_at, + finished_at=finished_at, + ) + ) + if status == "success": + return True, attempts, None + + return False, attempts, last_readiness_reason + def wait_runtime_ready( context_path: Path, *, github_env_file: Path | None = None @@ -49,7 +292,9 @@ def wait_runtime_ready( if docker_host: env["DOCKER_HOST"] = docker_host + failure_phase: str | None = None failure_reason: str | None = None + recovery_attempts: list[RuntimeRecoveryAttemptReport] = [] started_at = now_iso() started = time.monotonic() @@ -63,16 +308,29 @@ def wait_runtime_ready( log(f"wait_runtime_ready: DOCKER_HOST={docker_host!r} DOCKER_CONFIG={docker_config!r}") try: - # OrbStack starts in the background on GHA; poll for up to 10 minutes. - wait_for_docker_ready(cwd=repo_root, env=env, max_attempts=300) - for command in ( - ["docker", "version"], - ["docker", "compose", "version"], - ["docker", "info"], - ): - run_checked(command, cwd=repo_root, env=env, timeout_seconds=120) + _validate_runtime_ready( + cwd=repo_root, + env=env, + max_attempts=_INITIAL_DOCKER_READY_ATTEMPTS, + ) except Exception as exc: # noqa: BLE001 - failure_reason = str(exc) + initial_reason = _classify_runtime_failure(cwd=repo_root, env=env, exc=exc) + log(f"Initial hosted Docker runtime health gate failed: {initial_reason}.") + if runtime_provider == "orbstack" and initial_reason in _RECOVERABLE_RUNTIME_REASONS: + recovered, recovery_attempts, last_readiness_reason = _recover_orbstack_runtime( + context=context, + repo_root=repo_root, + env=env, + trigger_reason=initial_reason, + ) + if not recovered: + failure_reason = "orbstack_recovery_failed" + failure_phase = "post_recovery_probe" + if last_readiness_reason: + log(f"OrbStack recovery exhausted; last reason: {last_readiness_reason}.") + else: + failure_reason = initial_reason + failure_phase = "initial_probe" finished_at = now_iso() duration_seconds = round(time.monotonic() - started, 3) @@ -90,6 +348,9 @@ def wait_runtime_ready( finished_at=finished_at, duration_seconds=duration_seconds, created_at=finished_at, + failure_phase=failure_phase, + recovery_attempt_count=len(recovery_attempts), + recovery_attempts=recovery_attempts, ) write_json(asdict(report), report_path) main_report = load_report(Path(context.report_path).resolve()) @@ -109,5 +370,9 @@ def wait_runtime_ready( github_env_vars["DOCKER_HOST"] = docker_host write_github_env(github_env_vars, github_env_file) if failure_reason is not None: + if recovery_attempts and recovery_attempts[-1].readiness_reason: + raise FreshHostError( + f"{failure_reason}: {recovery_attempts[-1].readiness_reason}" + ) raise FreshHostError(failure_reason) return report diff --git a/tests/utils/helpers/hosted_docker.py b/tests/utils/helpers/hosted_docker.py index f617c20..5d9c384 100644 --- a/tests/utils/helpers/hosted_docker.py +++ b/tests/utils/helpers/hosted_docker.py @@ -15,6 +15,7 @@ PULL_HEARTBEAT_SECONDS, ImageEnsureReport, PullReport, + RuntimeRecoveryAttemptReport, RuntimeInstallReport, ) from tests.utils.helpers._hosted_docker.runtime import ( @@ -29,6 +30,7 @@ "ImageEnsureReport", "PULL_HEARTBEAT_SECONDS", "PullReport", + "RuntimeRecoveryAttemptReport", "RuntimeInstallReport", "_pull_one_image", "_run_checked", From 4ed93c0c172f69cc62bdd8fbdc7201afd0275097 Mon Sep 17 00:00:00 2001 From: jp Date: Tue, 2 Jun 2026 13:48:49 -0300 Subject: [PATCH 2/4] Move OrbStack setup into hosted Docker helper --- .github/workflows/fresh-host-core.yml | 24 +--- tests/scripts/hosted_docker.py | 16 +++ tests/suites/unit/ci/test_hosted_docker.py | 19 +++ tests/utils/helpers/_hosted_docker/runtime.py | 118 ++++++++++++++++++ tests/utils/helpers/hosted_docker.py | 2 + 5 files changed, 160 insertions(+), 19 deletions(-) diff --git a/.github/workflows/fresh-host-core.yml b/.github/workflows/fresh-host-core.yml index b16a67a..d081fca 100644 --- a/.github/workflows/fresh-host-core.yml +++ b/.github/workflows/fresh-host-core.yml @@ -134,25 +134,11 @@ jobs: ORBSTACK_DMG_PATH: /tmp/orbstack.dmg ORBSTACK_DMG_SHA256: fb95108ded54a27603b68184a13f7e666e0e758167652c0b65cd4dc5eff94617 ORBSTACK_DMG_URL: https://cdn-updates.orbstack.dev/amd64/OrbStack_v1.5.1_16857_amd64.dmg - run: | - set -euo pipefail - if [[ ! -f "${ORBSTACK_DMG_PATH}" ]]; then - curl -fsSL "${ORBSTACK_DMG_URL}" -o "${ORBSTACK_DMG_PATH}" - fi - actual_sha="$(shasum -a 256 "${ORBSTACK_DMG_PATH}" | awk '{print $1}')" - if [[ "${actual_sha}" != "${ORBSTACK_DMG_SHA256}" ]]; then - echo "OrbStack DMG checksum mismatch: expected ${ORBSTACK_DMG_SHA256}, got ${actual_sha}" >&2 - rm -f "${ORBSTACK_DMG_PATH}" - exit 1 - fi - hdiutil attach -quiet -nobrowse -mountpoint /tmp/orbstack_mnt "${ORBSTACK_DMG_PATH}" - cp -R /tmp/orbstack_mnt/OrbStack.app /Applications/ - hdiutil detach -quiet /tmp/orbstack_mnt - sudo ln -sf /Applications/OrbStack.app/Contents/MacOS/bin/orb /usr/local/bin/orb - HOMEBREW_NO_AUTO_UPDATE=1 HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 brew install --quiet docker docker-compose - mkdir -p ~/.docker/cli-plugins - ln -sfn "$(brew --prefix)/bin/docker-compose" ~/.docker/cli-plugins/docker-compose - nohup orb start > /tmp/orb-start.log 2>&1 & + run: >- + ./tests/scripts/hosted_docker.py setup-orbstack + --dmg-path "${ORBSTACK_DMG_PATH}" + --url "${ORBSTACK_DMG_URL}" + --sha256 "${ORBSTACK_DMG_SHA256}" - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 with: python-version: "3.12" diff --git a/tests/scripts/hosted_docker.py b/tests/scripts/hosted_docker.py index 3d07328..2549b35 100755 --- a/tests/scripts/hosted_docker.py +++ b/tests/scripts/hosted_docker.py @@ -17,6 +17,7 @@ from tests.utils.helpers.hosted_docker import ( # noqa: E402 collect_runtime_diagnostics, ensure_images, + setup_orbstack, wait_runtime_ready, ) @@ -26,6 +27,14 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace: parser = argparse.ArgumentParser(description=__doc__) subparsers = parser.add_subparsers(dest="command", required=True) + setup_parser = subparsers.add_parser( + "setup-orbstack", + help="Install OrbStack and Docker tooling for hosted macOS runtime checks.", + ) + setup_parser.add_argument("--dmg-path", type=Path, required=True) + setup_parser.add_argument("--url", required=True) + setup_parser.add_argument("--sha256", required=True) + wait_parser = subparsers.add_parser( "wait-runtime-ready", help="Poll until the OrbStack Docker runtime responds and write the install report.", @@ -51,6 +60,13 @@ def main(argv: list[str] | None = None) -> int: """Run the requested hosted-docker subcommand.""" args = _parse_args(argv) try: + if args.command == "setup-orbstack": + setup_orbstack( + dmg_path=Path(args.dmg_path).expanduser().resolve(), + dmg_url=str(args.url), + expected_sha256=str(args.sha256), + ) + return 0 if args.command == "wait-runtime-ready": wait_runtime_ready( Path(args.context).expanduser().resolve(), diff --git a/tests/suites/unit/ci/test_hosted_docker.py b/tests/suites/unit/ci/test_hosted_docker.py index 8ca6e70..1450b43 100644 --- a/tests/suites/unit/ci/test_hosted_docker.py +++ b/tests/suites/unit/ci/test_hosted_docker.py @@ -684,6 +684,25 @@ def test_wait_runtime_ready_rejects_non_macos_context( hosted_docker_runtime.wait_runtime_ready(Path(context.context_path)) +def test_setup_orbstack_rejects_checksum_mismatch( + tmp_path: Path, + test_context: TestContext, +) -> None: + """setup_orbstack should reject and remove a cached DMG with the wrong digest.""" + dmg_path = tmp_path / "orbstack.dmg" + dmg_path.write_bytes(b"not the expected dmg") + test_context.patch.patch_object(hosted_docker_runtime.sys, "platform", new="darwin") + + with pytest.raises(fresh_host.FreshHostError, match="checksum mismatch"): + hosted_docker_runtime.setup_orbstack( + dmg_path=dmg_path, + dmg_url="https://example.invalid/orbstack.dmg", + expected_sha256="0" * 64, + ) + + assert not dmg_path.exists() + + @_ORBSTACK_ONLY def test_wait_runtime_ready_sets_orbstack_socket_when_docker_host_unset( tmp_path: Path, diff --git a/tests/utils/helpers/_hosted_docker/runtime.py b/tests/utils/helpers/_hosted_docker/runtime.py index 3ac30db..ab7a130 100644 --- a/tests/utils/helpers/_hosted_docker/runtime.py +++ b/tests/utils/helpers/_hosted_docker/runtime.py @@ -2,8 +2,10 @@ from __future__ import annotations +import hashlib import os import subprocess +import sys import time from dataclasses import asdict from pathlib import Path @@ -40,6 +42,122 @@ ["docker", "compose", "version"], ["docker", "info"], ) +_ORBSTACK_MOUNTPOINT = Path("/tmp/orbstack_mnt") +_ORB_START_LOG = Path("/tmp/orb-start.log") + + +def _sha256(path: Path) -> str: + """Return the SHA-256 hex digest for one file.""" + digest = hashlib.sha256() + with path.open("rb") as stream: + for chunk in iter(lambda: stream.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def setup_orbstack(*, dmg_path: Path, dmg_url: str, expected_sha256: str) -> None: + """Install OrbStack and Docker tooling for hosted macOS runtime checks.""" + if sys.platform != "darwin": + raise FreshHostError("OrbStack setup is only supported on macOS") + + cwd = Path.cwd() + env = macos_env() + resolved_dmg_path = dmg_path.resolve() + if not resolved_dmg_path.is_file(): + run_checked( + ["curl", "-fsSL", dmg_url, "-o", str(resolved_dmg_path)], + cwd=cwd, + env=env, + timeout_seconds=600, + ) + + actual_sha256 = _sha256(resolved_dmg_path) + if actual_sha256 != expected_sha256: + resolved_dmg_path.unlink(missing_ok=True) + raise FreshHostError( + "OrbStack DMG checksum mismatch: " + f"expected {expected_sha256}, got {actual_sha256}" + ) + + attached = False + try: + run_checked( + [ + "hdiutil", + "attach", + "-quiet", + "-nobrowse", + "-mountpoint", + str(_ORBSTACK_MOUNTPOINT), + str(resolved_dmg_path), + ], + cwd=cwd, + env=env, + timeout_seconds=300, + ) + attached = True + run_checked( + ["cp", "-R", str(_ORBSTACK_MOUNTPOINT / "OrbStack.app"), "/Applications/"], + cwd=cwd, + env=env, + timeout_seconds=300, + ) + finally: + if attached: + run_checked( + ["hdiutil", "detach", "-quiet", str(_ORBSTACK_MOUNTPOINT)], + cwd=cwd, + env=env, + timeout_seconds=120, + ) + + run_checked( + [ + "sudo", + "ln", + "-sf", + "/Applications/OrbStack.app/Contents/MacOS/bin/orb", + "/usr/local/bin/orb", + ], + cwd=cwd, + env=env, + timeout_seconds=120, + ) + run_checked( + ["brew", "install", "--quiet", "docker", "docker-compose"], + cwd=cwd, + env=env, + timeout_seconds=600, + ) + brew_prefix = run_checked( + ["brew", "--prefix"], + cwd=cwd, + env=env, + timeout_seconds=120, + capture_output=True, + ).stdout.strip() + cli_plugins_dir = Path.home() / ".docker" / "cli-plugins" + cli_plugins_dir.mkdir(parents=True, exist_ok=True) + run_checked( + [ + "ln", + "-sfn", + str(Path(brew_prefix) / "bin" / "docker-compose"), + str(cli_plugins_dir / "docker-compose"), + ], + cwd=cwd, + env=env, + timeout_seconds=120, + ) + with _ORB_START_LOG.open("wb") as orb_start_log: + subprocess.Popen( + ["orb", "start"], + cwd=cwd, + env=env, + stdout=orb_start_log, + stderr=subprocess.STDOUT, + start_new_session=True, + ) def _docker_socket_path(docker_host: str) -> Path | None: diff --git a/tests/utils/helpers/hosted_docker.py b/tests/utils/helpers/hosted_docker.py index 5d9c384..1d49fcd 100644 --- a/tests/utils/helpers/hosted_docker.py +++ b/tests/utils/helpers/hosted_docker.py @@ -19,6 +19,7 @@ RuntimeInstallReport, ) from tests.utils.helpers._hosted_docker.runtime import ( + setup_orbstack, wait_runtime_ready, ) from tests.utils.helpers._hosted_docker.shell import run_checked as _run_checked @@ -39,6 +40,7 @@ "collect_runtime_diagnostics", "ensure_images", "list_local_images", + "setup_orbstack", "wait_runtime_ready", "pull_images", "resolve_compose_images", From 3e751a85d8ac8d116ebb100d67475fa53eace989 Mon Sep 17 00:00:00 2001 From: jp Date: Tue, 2 Jun 2026 14:27:49 -0300 Subject: [PATCH 3/4] Retry npm tool bootstrap after cache miss --- src/clawops/strongclaw_bootstrap.py | 25 +++++++++++++++- .../unit/clawops/test_strongclaw_bootstrap.py | 29 +++++++++++++++++++ tests/utils/helpers/hosted_docker.py | 4 +-- 3 files changed, 55 insertions(+), 3 deletions(-) diff --git a/src/clawops/strongclaw_bootstrap.py b/src/clawops/strongclaw_bootstrap.py index 77e70e8..54893d5 100644 --- a/src/clawops/strongclaw_bootstrap.py +++ b/src/clawops/strongclaw_bootstrap.py @@ -55,6 +55,8 @@ DEFAULT_QMD_PACKAGE = f"@tobilu/qmd@{DEFAULT_QMD_VERSION}" _UV_SYNC_MAX_ATTEMPTS = 3 _UV_SYNC_RETRY_DELAY_SECONDS = 5 +_NPM_GLOBAL_INSTALL_MAX_ATTEMPTS = 2 +_NPM_GLOBAL_INSTALL_RETRY_DELAY_SECONDS = 15 def _stream_checked( @@ -75,6 +77,27 @@ def _stream_checked( raise CommandError(f"command failed with exit code {returncode}: {' '.join(command)}") +def install_global_node_tools(command: list[str]) -> None: + """Install pinned global npm tools with one registry/cache recovery attempt.""" + for attempt in range(1, _NPM_GLOBAL_INSTALL_MAX_ATTEMPTS + 1): + try: + _stream_checked(command, timeout_seconds=3600) + return + except CommandError as err: + if attempt == _NPM_GLOBAL_INSTALL_MAX_ATTEMPTS: + raise + print( + "npm global tool install failed; cleaning npm cache before retry: " + f"{err}", + file=sys.stderr, + ) + cache_clean_command = ["npm", "cache", "clean", "--force"] + if command and command[0] == "sudo": + cache_clean_command.insert(0, "sudo") + _stream_checked(cache_clean_command, timeout_seconds=300) + time.sleep(_NPM_GLOBAL_INSTALL_RETRY_DELAY_SECONDS) + + def _ensure_brew_formula(formula_name: str) -> None: """Install a Homebrew formula when required.""" _stream_checked(["brew", "install", formula_name], timeout_seconds=1800) @@ -570,7 +593,7 @@ def bootstrap_host( ] if normalized_host_os == "Linux": npm_install_command.insert(0, "sudo") - _stream_checked(npm_install_command, timeout_seconds=3600) + install_global_node_tools(npm_install_command) ensure_common_state_roots(home_dir=home_dir) _render_post_bootstrap_config(repo_root, profile=profile, home_dir=home_dir) diff --git a/tests/suites/unit/clawops/test_strongclaw_bootstrap.py b/tests/suites/unit/clawops/test_strongclaw_bootstrap.py index ba0e8f2..06c32b5 100644 --- a/tests/suites/unit/clawops/test_strongclaw_bootstrap.py +++ b/tests/suites/unit/clawops/test_strongclaw_bootstrap.py @@ -180,6 +180,35 @@ def fake_stream_checked(command: list[str], **kwargs: object) -> None: assert seen_sleeps == [5, 10] +def test_install_global_node_tools_retries_after_cache_clean(test_context: TestContext) -> None: + """Global npm tool install should recover once from transient registry/cache errors.""" + install_command = ["sudo", "npm", "install", "-g", "openclaw@2026.3.13", "acpx@0.3.0"] + seen_commands: list[list[str]] = [] + seen_sleeps: list[int] = [] + + def fake_stream_checked(command: list[str], **kwargs: object) -> None: + seen_commands.append(command) + assert kwargs["timeout_seconds"] in {300, 3600} + if command == install_command and seen_commands.count(install_command) == 1: + raise strongclaw_bootstrap.CommandError("temporary registry miss") + + test_context.patch.patch_object( + strongclaw_bootstrap, + "_stream_checked", + new=fake_stream_checked, + ) + test_context.patch.patch_object(strongclaw_bootstrap.time, "sleep", new=seen_sleeps.append) + + strongclaw_bootstrap.install_global_node_tools(install_command) + + assert seen_commands == [ + install_command, + ["sudo", "npm", "cache", "clean", "--force"], + install_command, + ] + assert seen_sleeps == [15] + + def test_resolve_node_command_falls_back_to_nodejs(test_context: TestContext) -> None: """Prefer `nodejs` when `node` is unavailable.""" diff --git a/tests/utils/helpers/hosted_docker.py b/tests/utils/helpers/hosted_docker.py index 1d49fcd..6992cde 100644 --- a/tests/utils/helpers/hosted_docker.py +++ b/tests/utils/helpers/hosted_docker.py @@ -15,8 +15,8 @@ PULL_HEARTBEAT_SECONDS, ImageEnsureReport, PullReport, - RuntimeRecoveryAttemptReport, RuntimeInstallReport, + RuntimeRecoveryAttemptReport, ) from tests.utils.helpers._hosted_docker.runtime import ( setup_orbstack, @@ -40,8 +40,8 @@ "collect_runtime_diagnostics", "ensure_images", "list_local_images", + "pull_images", "setup_orbstack", "wait_runtime_ready", - "pull_images", "resolve_compose_images", ] From 2a711b4bd88c1b3a20e76b434c3e95fb01941407 Mon Sep 17 00:00:00 2001 From: jp Date: Tue, 2 Jun 2026 15:06:09 -0300 Subject: [PATCH 4/4] Format runtime resilience changes --- src/clawops/strongclaw_bootstrap.py | 3 +-- tests/suites/unit/ci/test_hosted_docker.py | 18 ++++++------------ .../helpers/_hosted_docker/diagnostics.py | 7 +++---- tests/utils/helpers/_hosted_docker/runtime.py | 7 ++----- 4 files changed, 12 insertions(+), 23 deletions(-) diff --git a/src/clawops/strongclaw_bootstrap.py b/src/clawops/strongclaw_bootstrap.py index 54893d5..49b5b66 100644 --- a/src/clawops/strongclaw_bootstrap.py +++ b/src/clawops/strongclaw_bootstrap.py @@ -87,8 +87,7 @@ def install_global_node_tools(command: list[str]) -> None: if attempt == _NPM_GLOBAL_INSTALL_MAX_ATTEMPTS: raise print( - "npm global tool install failed; cleaning npm cache before retry: " - f"{err}", + f"npm global tool install failed; cleaning npm cache before retry: {err}", file=sys.stderr, ) cache_clean_command = ["npm", "cache", "clean", "--force"] diff --git a/tests/suites/unit/ci/test_hosted_docker.py b/tests/suites/unit/ci/test_hosted_docker.py index 1450b43..dafd36a 100644 --- a/tests/suites/unit/ci/test_hosted_docker.py +++ b/tests/suites/unit/ci/test_hosted_docker.py @@ -654,9 +654,9 @@ def fake_sysctl_int(name: str) -> int | None: assert compose_commands assert all(env["NEO4J_PASSWORD"] == "runtime-secret" for env in compose_commands) assert all("COMPOSE_PROJECT_NAME" in env for env in compose_commands) - socket_state = ( - Path(context.diagnostics_dir) / "docker-socket-state.txt" - ).read_text(encoding="utf-8") + socket_state = (Path(context.diagnostics_dir) / "docker-socket-state.txt").read_text( + encoding="utf-8" + ) assert "DOCKER_HOST=unix:///tmp/orbstack-test.sock" in socket_state assert "path=/tmp/orbstack-test.sock" in socket_state @@ -858,9 +858,7 @@ def fake_sysctl_int(name: str) -> int | None: test_context.patch.patch_object( hosted_docker_diagnostics, "run_command", new=fake_diagnostics_run_command ) - test_context.patch.patch_object( - hosted_docker_diagnostics, "sysctl_int", new=fake_sysctl_int - ) + test_context.patch.patch_object(hosted_docker_diagnostics, "sysctl_int", new=fake_sysctl_int) report = hosted_docker_runtime.wait_runtime_ready(Path(context.context_path)) @@ -928,12 +926,8 @@ def fake_sysctl_int(name: str) -> int | None: test_context.patch.patch_object(hosted_docker_runtime, "run_command", new=fake_run_command) test_context.patch.patch_object(hosted_docker_runtime.time, "sleep", new=_sleep) test_context.patch.patch_object(hosted_docker_runtime, "sysctl_int", new=fake_sysctl_int) - test_context.patch.patch_object( - hosted_docker_diagnostics, "run_command", new=fake_run_command - ) - test_context.patch.patch_object( - hosted_docker_diagnostics, "sysctl_int", new=fake_sysctl_int - ) + test_context.patch.patch_object(hosted_docker_diagnostics, "run_command", new=fake_run_command) + test_context.patch.patch_object(hosted_docker_diagnostics, "sysctl_int", new=fake_sysctl_int) with pytest.raises( fresh_host.FreshHostError, diff --git a/tests/utils/helpers/_hosted_docker/diagnostics.py b/tests/utils/helpers/_hosted_docker/diagnostics.py index 799823d..3530628 100644 --- a/tests/utils/helpers/_hosted_docker/diagnostics.py +++ b/tests/utils/helpers/_hosted_docker/diagnostics.py @@ -69,7 +69,8 @@ def collect_runtime_diagnostics_for_context( resolved_diagnostics_dir / "docker-context-ls.txt": ["docker", "context", "ls"], resolved_diagnostics_dir / "docker-system-df.txt": ["docker", "system", "df"], resolved_diagnostics_dir / "docker-ps-all.txt": ["docker", "ps", "-a"], - resolved_diagnostics_dir / "docker-images.jsonl": [ + resolved_diagnostics_dir + / "docker-images.jsonl": [ "docker", "images", "--format", @@ -121,9 +122,7 @@ def collect_runtime_diagnostics_for_context( ) for output_path, content in { resolved_diagnostics_dir / "host-cpu-count.txt": str(sysctl_int("hw.ncpu") or ""), - resolved_diagnostics_dir / "host-memory-bytes.txt": str( - sysctl_int("hw.memsize") or "" - ), + resolved_diagnostics_dir / "host-memory-bytes.txt": str(sysctl_int("hw.memsize") or ""), }.items(): output_path.write_text(f"{content}\n", encoding="utf-8") (resolved_diagnostics_dir / "docker-socket-state.txt").write_text( diff --git a/tests/utils/helpers/_hosted_docker/runtime.py b/tests/utils/helpers/_hosted_docker/runtime.py index ab7a130..c23b690 100644 --- a/tests/utils/helpers/_hosted_docker/runtime.py +++ b/tests/utils/helpers/_hosted_docker/runtime.py @@ -75,8 +75,7 @@ def setup_orbstack(*, dmg_path: Path, dmg_url: str, expected_sha256: str) -> Non if actual_sha256 != expected_sha256: resolved_dmg_path.unlink(missing_ok=True) raise FreshHostError( - "OrbStack DMG checksum mismatch: " - f"expected {expected_sha256}, got {actual_sha256}" + f"OrbStack DMG checksum mismatch: expected {expected_sha256}, got {actual_sha256}" ) attached = False @@ -489,8 +488,6 @@ def wait_runtime_ready( write_github_env(github_env_vars, github_env_file) if failure_reason is not None: if recovery_attempts and recovery_attempts[-1].readiness_reason: - raise FreshHostError( - f"{failure_reason}: {recovery_attempts[-1].readiness_reason}" - ) + raise FreshHostError(f"{failure_reason}: {recovery_attempts[-1].readiness_reason}") raise FreshHostError(failure_reason) return report