diff --git a/CHANGELOG.md b/CHANGELOG.md index d443a8d..c252010 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,40 @@ # Changelog +## 0.19.0 — 2026-04-13 + +- **Codex CLI driver.** New `codex-cli` driver + (`lib/drivers/codex-cli.sh`) implements the full interface for + OpenAI's Codex CLI: headless mode with `codex exec --json`, + JSONL stats extraction (with cached-token deduplication for + accurate cost), activity parsing, fatal/retriable error + detection, and reasoning effort support. +- **ChatGPT subscription auth.** Codex agents can authenticate + via `"auth": "chatgpt"` (mounts `~/.codex/auth.json`) or + `"auth": "apikey"` (uses `OPENAI_API_KEY`). Auto-detection + when both are present. Usage-limit errors from ChatGPT + subscriptions are retriable. +- **Bridge .claude/ conventions to Codex.** When `AGENTS.md` is + absent, copies `.claude/CLAUDE.md` (or root `CLAUDE.md`) so + Codex picks up project instructions. When `.agents/skills/` + is absent, symlinks `.claude/skills/` so Codex discovers + existing skills. Both are git-excluded to avoid committing + bridged files. +- **Context stripping hooks.** Git hooks (`post-merge`, + `post-checkout`, `post-rewrite`) re-strip `.claude/` after + `git pull --rebase`, preventing agents from seeing files + removed by `context: slim` or `context: none`. +- **Stale rebase cleanup.** Push safety net cleans up stale + `.git/rebase-merge` and `.git/rebase-apply` before retries, + preventing repeated `git pull --rebase` failures. +- **Inline system prompt for Codex.** System instructions are + prepended directly to the prompt text rather than relying on + project-level instruction files, ensuring rules are always + applied under `--skip-git-repo-check`. +- **Document per-driver effort values.** USAGE.md now lists + valid effort values for each driver (Claude Code: + low/medium/high/max; Codex CLI: none/low/medium/high/xhigh). + Gemini CLI ignores the field. + ## 0.18.4 — 2026-04-11 - **Tag and driver in setup wizard.** `setup.sh` now prompts for diff --git a/Dockerfile b/Dockerfile index 0b0cc08..b8e0e3d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,13 +30,24 @@ RUN if echo ",$SWARM_AGENTS," | grep -q ",claude-code,"; then \ fi ENV PATH="/home/agent/.local/bin:${PATH}" -# --- Gemini CLI (requires Node.js) --- +# --- Node.js (shared by Gemini CLI and Codex CLI) --- USER root -RUN if echo ",$SWARM_AGENTS," | grep -q ",gemini-cli,"; then \ +RUN if echo ",$SWARM_AGENTS," | grep -qE ",(gemini-cli|codex-cli),"; then \ curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ && apt-get install -y --no-install-recommends nodejs \ - && rm -rf /var/lib/apt/lists/* \ - && npm install -g @google/gemini-cli; \ + && rm -rf /var/lib/apt/lists/*; \ + fi + +# --- Gemini CLI --- +RUN if echo ",$SWARM_AGENTS," | grep -q ",gemini-cli,"; then \ + npm install -g @google/gemini-cli; \ + fi + +# --- Codex CLI --- +RUN if echo ",$SWARM_AGENTS," | grep -q ",codex-cli,"; then \ + npm install -g @openai/codex \ + && mkdir -p /home/agent/.codex \ + && chown agent:agent /home/agent/.codex; \ fi USER agent diff --git a/README.md b/README.md index a118fd0..abdb4c8 100644 --- a/README.md +++ b/README.md @@ -130,5 +130,6 @@ Each driver implements a fixed role interface: | `agent_activity_jq` | jq filter for activity display | Built-in drivers: `claude-code` (default), `gemini-cli`, -`fake` (test double). See [USAGE.md](USAGE.md#writing-a-new-driver) -for the full interface and guide to writing a new driver. +`codex-cli`, `fake` (test double). See +[USAGE.md](USAGE.md#writing-a-new-driver) for the full interface +and guide to writing a new driver. diff --git a/USAGE.md b/USAGE.md index 9155f8b..a03311f 100644 --- a/USAGE.md +++ b/USAGE.md @@ -32,6 +32,9 @@ Credentials stay as env vars (not in shell history). |----------|---------|-------------| | `ANTHROPIC_API_KEY` | | API key (or use `CLAUDE_CODE_OAUTH_TOKEN`). | | `CLAUDE_CODE_OAUTH_TOKEN` | | OAuth token via `claude setup-token`. | +| `OPENAI_API_KEY` | | OpenAI API key (for Codex CLI driver). | +| `CODEX_AUTH_JSON` | `~/.codex/auth.json` | Path to Codex auth file (ChatGPT subscription). | +| `GEMINI_API_KEY` | | Google API key (for Gemini CLI driver). | | `SWARM_CONFIG` | | Path to swarmfile (or place `swarm.json` in repo root). | | `SWARM_TITLE` | | Dashboard title override. | | `SWARM_SKIP_DEP_CHECK` | | Set to `1` to silence dependency version warnings. | @@ -48,16 +51,22 @@ Per-group fields in `swarm.json` `agents` array: |-------|--------|-------| | `model` | model name | Required. | | `count` | integer | Number of agents in this group. | -| `effort` | `low`, `medium`, `high` | Adaptive reasoning depth. | +| `effort` | string | Reasoning depth (see below). | | `context` | `full`, `slim`, `none` | How much of `.claude/` to keep (default: `full`). | | `prompt` | file path | Per-group prompt override (default: top-level). | -| `auth` | `apikey`, `oauth`, omit | Which host credential to inject (see [Auth modes](#auth-modes)). | +| `auth` | `apikey`, `oauth`, `chatgpt`, omit | Which host credential to inject (see [Auth modes](#auth-modes)). | | `api_key` | key or `$VAR` | Per-group API key for third-party endpoints. | | `auth_token` | key or `$VAR` | Per-group Bearer token (OpenRouter-style). | | `base_url` | URL | Per-group API endpoint. | | `tag` | string or `$VAR` | Label for grouping runs (default: top-level). | | `driver` | driver name | Agent driver override (default: top-level or `claude-code`). | +**Effort values** are driver-dependent: + +- Claude Code: `low`, `medium`, `high`, `max` (Opus only). +- Codex CLI: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`. +- Gemini CLI: ignored. + Top-level fields: `prompt`, `setup`, `max_idle` (default: `3`), `max_retry_wait`, `driver`, `inject_git_rules`, `git_user` (`name`, `email`), `claude_code_version`, `title`, @@ -271,12 +280,9 @@ agent runs a different prompt to validate and normalize findings. Three credential mechanisms serve different purposes: -- **`auth`** — Controls which host credential - (`ANTHROPIC_API_KEY` vs `CLAUDE_CODE_OAUTH_TOKEN`) is - forwarded to the container. Use when both credentials are - set on the host and you want per-group billing control - (e.g. some agents on API, others on subscription). - Values: `apikey`, `oauth`, or omit (pass both). +- **`auth`** — Controls which host credential is forwarded to + the container. Values: `apikey`, `oauth`, `chatgpt`, or + omit (auto-detect). - **`api_key`** — Per-group API key for third-party endpoints (MiniMax, etc.). Passed as `ANTHROPIC_API_KEY` inside the @@ -287,17 +293,49 @@ Three credential mechanisms serve different purposes: `ANTHROPIC_API_KEY` so Claude Code enters third-party mode. Supports `$VAR` references. +### Claude Code + +| `auth` value | Credential injected | +|---|---| +| `apikey` | `ANTHROPIC_API_KEY` only | +| `oauth` | `CLAUDE_CODE_OAUTH_TOKEN` only | +| omit | Both (CLI decides) | + For subscription auth (Pro/Max/Teams/Enterprise), generate an OAuth token with `claude setup-token` and export `CLAUDE_CODE_OAUTH_TOKEN`. +### Codex CLI + +| `auth` value | Credential injected | +|---|---| +| `apikey` | `OPENAI_API_KEY` only | +| `chatgpt` | Mounts `~/.codex/auth.json` (ChatGPT subscription) | +| omit | API key if set + auth.json if found | + +For ChatGPT subscription auth (Plus/Pro/Team/Enterprise), +run `codex login` on the host to create `~/.codex/auth.json`, +then set `"auth": "chatgpt"` in your swarm config: + +```json +{ + "driver": "codex-cli", + "agents": [{ "model": "gpt-5.4", "auth": "chatgpt" }] +} +``` + +The auth file is bind-mounted read-only into containers. +Override the path with `CODEX_AUTH_JSON=/path/to/auth.json`. + +### General rules + Groups with `api_key` or `auth_token` ignore the `auth` field; their custom credential is always used. When neither is set, `auth` determines which host credential to inject. The dashboard **Auth** column reflects the actual credential -source: `key`, `oauth`, `token`, or `auto` (see Dashboard -columns). +source: `key`, `oauth`, `chatgpt`, `token`, or `auto` (see +Dashboard columns). ## Git coordination @@ -320,8 +358,9 @@ Stats collected per session inside each container Dashboard columns: - **Auth** — credential source: `key` (API key), `oauth` - (subscription token), `token` (Bearer / OpenRouter-style), - `auto` (both key + OAuth present, CLI decides). + (Claude subscription token), `chatgpt` (ChatGPT subscription), + `token` (Bearer / OpenRouter-style), + `auto` (multiple credentials present, CLI decides). - **Ctx** — context mode: `bare` (no `.claude/`), `slim` (only `CLAUDE.md`), or blank for full context. - **Cost** — cumulative API cost in USD. @@ -348,6 +387,7 @@ Built-in drivers: |--------|-----|---------| | `claude-code` | `claude` | Yes | | `gemini-cli` | `gemini` | | +| `codex-cli` | `codex` | | | `fake` | (none) | Test double for unit testing | Set the driver globally in `swarm.json`: diff --git a/VERSION b/VERSION index 0cc9884..1cf0537 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.18.4 +0.19.0 diff --git a/dashboard.sh b/dashboard.sh index d09c5c5..b736aa8 100755 --- a/dashboard.sh +++ b/dashboard.sh @@ -220,6 +220,7 @@ short_driver() { case "${1:-}" in claude-code) printf 'claude' ;; gemini-cli) printf 'gemini' ;; + codex-cli) printf 'codex' ;; *) printf '%s' "${1:-}" ;; esac } @@ -293,7 +294,7 @@ emit_row() { printf " ${open}%-3s %-${MODEL_COL_W}s" "$id_str" "$model_str" if $SHOW_DRIVER; then printf " %-${DRV_COL_W}s" "$driver_str"; fi - if $SHOW_AUTH; then printf " %-6s" "$auth_str"; fi + if $SHOW_AUTH; then printf " %-7s" "$auth_str"; fi printf " %b%-14s%b %7s" "$status_color" "$status_str" "$RESET" "$cost_str" if $SHOW_INOUT; then printf " %13s" "$inout_str"; fi if $SHOW_CACHE; then printf " %7s" "$cache_str"; fi @@ -307,7 +308,7 @@ emit_row() { emit_header() { printf " ${BOLD}%-3s %-${MODEL_COL_W}s" "#" "Model" if $SHOW_DRIVER; then printf " %-${DRV_COL_W}s" "Driver"; fi - if $SHOW_AUTH; then printf " %-6s" "Auth"; fi + if $SHOW_AUTH; then printf " %-7s" "Auth"; fi printf " %-14s %7s" "Status" "Cost" if $SHOW_INOUT; then printf " %13s" "In/Out"; fi if $SHOW_CACHE; then printf " %7s" "Cache"; fi @@ -361,7 +362,7 @@ draw() { local avail=$((TERM_COLS - base_w)) if $HAS_MULTI_DRIVERS && [ "$avail" -ge $((DRV_COL_W + 1)) ]; then SHOW_DRIVER=true; avail=$((avail - DRV_COL_W - 1)); fi if [ "$avail" -ge 14 ]; then SHOW_INOUT=true; avail=$((avail - 14)); fi - if [ "$avail" -ge 7 ]; then SHOW_AUTH=true; avail=$((avail - 7)); fi + if [ "$avail" -ge 9 ]; then SHOW_AUTH=true; avail=$((avail - 9)); fi if [ "$avail" -ge 7 ]; then SHOW_TURNS=true; avail=$((avail - 7)); fi if [ "$avail" -ge 7 ]; then SHOW_TPS=true; avail=$((avail - 7)); fi if [ "$avail" -ge 8 ]; then SHOW_CACHE=true; avail=$((avail - 8)); fi diff --git a/lib/drivers/codex-cli.sh b/lib/drivers/codex-cli.sh new file mode 100755 index 0000000..2b23b00 --- /dev/null +++ b/lib/drivers/codex-cli.sh @@ -0,0 +1,295 @@ +#!/bin/bash +# shellcheck disable=SC2034 +# Agent driver: OpenAI Codex CLI +# Implements the role interface for OpenAI's Codex CLI. + +# shellcheck source=_common.sh +source "$(dirname "${BASH_SOURCE[0]}")/_common.sh" + +agent_default_model() { echo "gpt-5.4"; } +agent_name() { echo "Codex CLI"; } +agent_cmd() { echo "codex"; } + +agent_version() { + local v + v=$(codex --version 2>/dev/null || echo "unknown") + # `codex --version` prints "codex-cli 0.120.0"; extract the number. + v="${v##* }" + echo "${v:-unknown}" +} + +# Run one agent session. +# Args: [append_system_prompt_file] +agent_run() { + local model="$1" prompt_text="$2" logfile="$3" + local append_file="${4:-}" + + # Prepend system instructions directly into the prompt. + # codex exec with --skip-git-repo-check may not load + # .codex/instructions.md, so inline them to be safe. + if [ -n "$append_file" ] && [ -f "$append_file" ]; then + prompt_text="$(cat "$append_file")"$'\n\n'"$prompt_text" + fi + + local effort_args=() + if [ -n "${CODEX_EFFORT:-}" ]; then + effort_args=(-c "model_reasoning_effort=\"${CODEX_EFFORT}\"") + fi + + codex exec \ + --dangerously-bypass-approvals-and-sandbox \ + -m "$model" \ + --json \ + --skip-git-repo-check \ + "${effort_args[@]+"${effort_args[@]}"}" \ + "$prompt_text" \ + 2>"${logfile}.err" \ + | stdbuf -oL tee "$logfile" +} + +# Write agent-specific settings and authenticate. +# Config goes to ~/.codex/ (where Codex CLI looks by default), +# NOT /workspace/.codex/ (which is only for instructions.md). +agent_settings() { + local _workspace="$1" + local codex_home="${HOME}/.codex" + mkdir -p "$codex_home" 2>/dev/null \ + || { sudo mkdir -p "$codex_home" && sudo chown "$(id -u):$(id -g)" "$codex_home"; } + + cat > "${codex_home}/config.toml" <<'TOML' +cli_auth_credentials_store = "file" +TOML + + # Codex reads AGENTS.md for project instructions, not + # .claude/CLAUDE.md. Bridge the gap when AGENTS.md is absent. + if [ ! -f "${_workspace}/AGENTS.md" ]; then + local _src="" + [ -f "${_workspace}/.claude/CLAUDE.md" ] \ + && _src="${_workspace}/.claude/CLAUDE.md" + [ -z "$_src" ] && [ -f "${_workspace}/CLAUDE.md" ] \ + && _src="${_workspace}/CLAUDE.md" + if [ -n "$_src" ]; then + cp "$_src" "${_workspace}/AGENTS.md" + mkdir -p "${_workspace}/.git/info" + echo "AGENTS.md" >> "${_workspace}/.git/info/exclude" + fi + fi + + # Codex reads skills from .agents/skills/, not .claude/skills/. + # Symlink when the Codex location is absent (Codex supports + # symlinked skill folders). Only fires when .claude/skills/ + # exists (context=full); slim/none strip it so this is a no-op. + if [ ! -d "${_workspace}/.agents/skills" ] \ + && [ -d "${_workspace}/.claude/skills" ]; then + mkdir -p "${_workspace}/.agents" + ln -s "../.claude/skills" "${_workspace}/.agents/skills" + mkdir -p "${_workspace}/.git/info" + echo ".agents/" >> "${_workspace}/.git/info/exclude" + fi + + if [ -n "${OPENAI_API_KEY:-}" ]; then + CODEX_HOME="$codex_home" \ + printenv OPENAI_API_KEY \ + | codex login --with-api-key 2>/dev/null || true + fi +} + +# Extract stats from Codex JSONL output. +# Codex emits turn.completed events with usage; sum across turns. +# {"type":"turn.completed","usage":{"input_tokens":N, +# "cached_input_tokens":N,"output_tokens":N}} +# NOTE: OpenAI includes cached tokens inside input_tokens, but +# the harness pricing formula treats them separately (like Claude). +# Subtract cached from input so they aren't double-counted. +agent_extract_stats() { + local logfile="$1" + local stats + stats=$(grep '"type"[[:space:]]*:[[:space:]]*"turn.completed"' \ + "$logfile" 2>/dev/null \ + | jq -s '{ + tok_in: (([.[].usage.input_tokens // 0] | add) + - ([.[].usage.cached_input_tokens // 0] | add)), + tok_out: [.[].usage.output_tokens // 0] | add, + cached: [.[].usage.cached_input_tokens // 0] | add, + turns: length + }' 2>/dev/null || true) + if [ -z "$stats" ] || [ "$stats" = "null" ]; then + printf "0\t0\t0\t0\t0\t0\t0\t0" + return + fi + local tok_in tok_out cached turns + tok_in=$(echo "$stats" | jq -r '.tok_in // 0') + tok_out=$(echo "$stats" | jq -r '.tok_out // 0') + cached=$(echo "$stats" | jq -r '.cached // 0') + turns=$(echo "$stats" | jq -r '.turns // 0') + # No native cost or timing from Codex JSONL; use pricing config. + printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" \ + "0" "$tok_in" "$tok_out" "$cached" "0" "0" "0" "$turns" +} + +# Return the jq program for parsing activity from Codex JSONL. +# Codex emits item.started/item.completed events with item types: +# command_execution, agent_message, file_change, mcp_tool_call, etc. +# File paths live in .changes[].path (not .file_path). +# No separate reasoning type; thinking is internal to the model. +agent_activity_jq() { + cat <<'JQ' +def truncate(n): + if length > n then .[:n-3] + "..." else . end; + +def first_line: + split("\n")[0] // .; + +def ts: + now | strftime("%H:%M:%S"); + +def prefix: + "\u001b[33m\(ts) agent[\($id)]"; + +def reset: + "\u001b[0m"; + +fromjson? // empty | +if .type == "item.started" then + .item | + if .type == "command_execution" then + "\(prefix) Shell: " + ((.command // "") | first_line | truncate(80)) + reset + elif .type == "web_search" then + "\(prefix) Search: " + (.query // "") + reset + else empty end +elif .type == "item.completed" then + .item | + if .type == "file_change" then + "\(prefix) Edit " + ((.changes[0].path // "") | first_line) + reset + elif .type == "mcp_tool_call" then + "\(prefix) MCP: " + (.tool_name // "unknown") + reset + else empty end +else empty end +JQ +} + +# Detect fatal errors in a Codex session log. +# Checks for turn.failed and error events. +agent_detect_fatal() { + local logfile="$1" + + local fail_line + fail_line=$(grep '"type"[[:space:]]*:[[:space:]]*"turn.failed"' \ + "$logfile" 2>/dev/null | head -1 || true) + if [ -n "$fail_line" ]; then + echo "$fail_line" | jq -r '.error // .message // "turn failed"' \ + 2>/dev/null || true + return + fi + + local error_line + error_line=$(grep '"type"[[:space:]]*:[[:space:]]*"error"' \ + "$logfile" 2>/dev/null | head -1 || true) + if [ -n "$error_line" ]; then + echo "$error_line" | jq -r '.message // .error // "unknown error"' \ + 2>/dev/null || true + return + fi + + # Check stderr for errors when log is empty or has no tokens. + if [ -f "${logfile}.err" ]; then + local err_msg + err_msg=$(grep -i 'error\|invalid.*key\|unauthorized' \ + "${logfile}.err" 2>/dev/null \ + | grep -iv 'could not update PATH\|proceeding.*PATH' \ + | head -1 || true) + if [ -n "$err_msg" ] && \ + ! grep -q '"type"[[:space:]]*:[[:space:]]*"turn.completed"' \ + "$logfile" 2>/dev/null; then + echo "$err_msg" + fi + fi +} + +# Detect retriable errors (rate limits, quota). +# Returns non-empty string if the error is retriable, empty if fatal. +# Args: +agent_is_retriable() { + local logfile="$1" + local _pattern='429\|rate.limit\|too many requests\|quota\|usage.limit\|hit your.*limit' + grep -qi "$_pattern" \ + "$logfile" 2>/dev/null && echo "rate_limited" && return + if [ -f "${logfile}.err" ]; then + grep -qi "$_pattern" \ + "${logfile}.err" 2>/dev/null && echo "rate_limited" && return + fi + return 0 +} + +# Map effort to Codex config override. +# Args: +agent_docker_env() { + local effort="${1:-}" + if [ -n "$effort" ]; then + printf -- '-e\nCODEX_EFFORT=%s\n' "$effort" + fi +} + +# Resolve auth credentials and emit Docker flags. +# Args: +# Reads host env: OPENAI_API_KEY, CODEX_AUTH_JSON +# +# Auth modes: +# chatgpt — Mount ~/.codex/auth.json (ChatGPT subscription). +# apikey — Use OPENAI_API_KEY only. +# (empty) — Auto-detect: auth.json if found, else API key. +agent_docker_auth() { + local api_key="$1" _auth_token="$2" auth_mode="$3" _base_url="$4" + + local label="" + local key="${api_key:-${OPENAI_API_KEY:-}}" + local auth_json="${CODEX_AUTH_JSON:-${HOME}/.codex/auth.json}" + + # Guard: detect a corrupted auth.json (directory instead of file), + # which older code or a stale Docker -v mount may have created. + if [ -d "$auth_json" ]; then + echo "WARNING: ${auth_json} is a directory (should be a file)." >&2 + echo " Fix with: sudo rm -rf '${auth_json}'" >&2 + fi + + # Use --mount instead of -v so Docker errors out (rather than + # silently creating a directory) if the source file is missing. + local _mount_fmt='--mount\ntype=bind,source=%s,target=/home/agent/.codex/auth.json,readonly\n' + + case "${auth_mode}" in + chatgpt) + if [ -f "$auth_json" ]; then + printf -- "$_mount_fmt" "$auth_json" + label="chatgpt" + else + echo "WARNING: auth=chatgpt but ${auth_json} not found" >&2 + fi + ;; + apikey) + if [ -n "$key" ]; then + printf -- '-e\nOPENAI_API_KEY=%s\n' "$key" + label="key" + fi + ;; + *) + if [ -n "$key" ]; then + printf -- '-e\nOPENAI_API_KEY=%s\n' "$key" + label="key" + fi + if [ -f "$auth_json" ]; then + printf -- "$_mount_fmt" "$auth_json" + if [ -n "$label" ]; then label="auto" + else label="chatgpt"; fi + fi + ;; + esac + + printf -- '-e\nSWARM_AUTH_MODE=%s\n' "$label" +} + +# Dockerfile fragment to install this agent's CLI. +agent_install_cmd() { + cat <<'INSTALL' +RUN npm install -g @openai/codex +INSTALL +} diff --git a/lib/harness.sh b/lib/harness.sh index ddec198..55f6c9e 100755 --- a/lib/harness.sh +++ b/lib/harness.sh @@ -151,6 +151,26 @@ if [ ! -d "/workspace/.git" ]; then esac fi + # Install git hooks that re-strip context after pulls/checkouts. + # Without these, `git pull --rebase` restores the stripped files. + # Claude Code respects context internally, but other drivers + # (Codex, Gemini) see the raw filesystem. + if [ "$SWARM_CONTEXT" != "full" ]; then + cat > .git/hooks/_strip_context </dev/null ;; + slim) [ -d .claude ] && find .claude -mindepth 1 -maxdepth 1 ! -name CLAUDE.md -exec rm -rf {} + 2>/dev/null ;; +esac +CTXHOOK + chmod +x .git/hooks/_strip_context + for _hook in post-merge post-checkout; do + printf '#!/bin/bash\n.git/hooks/_strip_context\n' \ + > ".git/hooks/$_hook" + chmod +x ".git/hooks/$_hook" + done + fi + # Run project-specific setup if provided. if [ -n "$SWARM_SETUP" ] && [ -f "$SWARM_SETUP" ]; then hlog "running setup ${SWARM_SETUP}" @@ -207,6 +227,8 @@ ctx_label="$SWARM_CONTEXT" trailer+=$(printf '> Ctx: %s\n' "$ctx_label") || true git commit --amend --no-verify --no-edit -m "${msg}${trailer}" \ --allow-empty >/dev/null 2>&1 || true +# Re-strip .claude context after rebase (covers git pull --rebase). +[ -x .git/hooks/_strip_context ] && .git/hooks/_strip_context HOOK chmod +x .git/hooks/post-rewrite @@ -268,8 +290,10 @@ while true; do fi AGENT_RUN_EXIT=0 + _run_start=$SECONDS agent_run "$SWARM_MODEL" "$(cat "$SWARM_PROMPT")" "$LOGFILE" "$APPEND_FILE" \ | /activity-filter.sh || AGENT_RUN_EXIT=$? + _run_elapsed_ms=$(( (SECONDS - _run_start) * 1000 )) # Extract usage stats via the driver. STATS_LINE=$(agent_extract_stats "$LOGFILE") @@ -277,6 +301,9 @@ while true; do cost="${cost:-0}"; tok_in="${tok_in:-0}"; tok_out="${tok_out:-0}" cache_rd="${cache_rd:-0}"; cache_cr="${cache_cr:-0}" dur="${dur:-0}"; api_ms="${api_ms:-0}"; turns="${turns:-0}" + # Fall back to wall-clock time when the driver has no native timing. + [ "${dur:-0}" = "0" ] && dur="$_run_elapsed_ms" + [ "${api_ms:-0}" = "0" ] && api_ms="$_run_elapsed_ms" # Compute cost from token counts when the driver doesn't report # it natively (e.g. Gemini CLI). Pricing is $/M tokens, passed @@ -336,13 +363,17 @@ while true; do hlog "retry: starting session" rm -f "$RETRY_FILE" AGENT_RUN_EXIT=0 + _run_start=$SECONDS agent_run "$SWARM_MODEL" "$(cat "$SWARM_PROMPT")" "$LOGFILE" "$APPEND_FILE" \ | /activity-filter.sh || AGENT_RUN_EXIT=$? + _run_elapsed_ms=$(( (SECONDS - _run_start) * 1000 )) STATS_LINE=$(agent_extract_stats "$LOGFILE") IFS=$'\t' read -r cost tok_in tok_out cache_rd cache_cr dur api_ms turns <<< "$STATS_LINE" cost="${cost:-0}"; tok_in="${tok_in:-0}"; tok_out="${tok_out:-0}" cache_rd="${cache_rd:-0}"; cache_cr="${cache_cr:-0}" dur="${dur:-0}"; api_ms="${api_ms:-0}"; turns="${turns:-0}" + [ "${dur:-0}" = "0" ] && dur="$_run_elapsed_ms" + [ "${api_ms:-0}" = "0" ] && api_ms="$_run_elapsed_ms" if [ -n "${SWARM_PRICE_INPUT:-}" ]; then cost=$(awk "BEGIN {printf \"%.6f\", (${tok_in} * ${SWARM_PRICE_INPUT} + ${tok_out} * ${SWARM_PRICE_OUTPUT:-0} + ${cache_rd} * ${SWARM_PRICE_CACHED:-0}) / 1000000}") @@ -392,6 +423,10 @@ while true; do _push_ok=false for _try in 1 2 3; do sleep $((RANDOM % 5 + 1)) + # Clean up stale rebase state that blocks git pull --rebase. + if [ -d .git/rebase-merge ] || [ -d .git/rebase-apply ]; then + git rebase --abort 2>/dev/null || rm -rf .git/rebase-merge .git/rebase-apply + fi if git pull --rebase origin agent-work 2>&1 | hlog_pipe \ && git push origin agent-work 2>&1 | hlog_pipe; then _push_ok=true diff --git a/tests/configs/README.md b/tests/configs/README.md index 963be72..843da51 100644 --- a/tests/configs/README.md +++ b/tests/configs/README.md @@ -13,12 +13,13 @@ export CLAUDE_CODE_OAUTH_TOKEN="sk-ant-oat01-..." export OPENROUTER_API_KEY="sk-or-v1-..." export MINIMAX_API_KEY="sk-api-..." export GEMINI_API_KEY="AI..." +export OPENAI_API_KEY="sk-..." ``` Verify they're set: ```bash -for v in ANTHROPIC_API_KEY CLAUDE_CODE_OAUTH_TOKEN OPENROUTER_API_KEY MINIMAX_API_KEY GEMINI_API_KEY; do +for v in ANTHROPIC_API_KEY CLAUDE_CODE_OAUTH_TOKEN OPENROUTER_API_KEY MINIMAX_API_KEY GEMINI_API_KEY OPENAI_API_KEY; do printf "%-30s %s\n" "$v" "${!v:+(set)}" done ``` @@ -37,6 +38,10 @@ done | `driver-inheritance.json` | gemini-2.5-pro + gemini-2.5-flash | gemini-cli | `GEMINI_API_KEY` | | `driver-post-process.json` | 2x gemini-2.5-pro (+ flash PP) | gemini-cli | `GEMINI_API_KEY` | | `heterogeneous-kitchen-sink.json` | Opus + 5x Gemini + Sonnet (+ PP) | mixed | `CLAUDE_CODE_OAUTH_TOKEN` + `ANTHROPIC_API_KEY` + `GEMINI_API_KEY` | +| `codex-only.json` | 2x gpt-5.4 | codex-cli | `OPENAI_API_KEY` | +| `codex-chatgpt.json` | 2x gpt-5.4 (chatgpt auth) | codex-cli | `~/.codex/auth.json` | +| `codex-auth-mixed.json` | gpt-5.4 (chatgpt) + gpt-5.3-codex (apikey) + gpt-5.4 (auto) | codex-cli | `~/.codex/auth.json` + `OPENAI_API_KEY` | +| `codex-mixed.json` | Opus + gpt-5.4 + gpt-5.3-codex + gpt-5.2 | mixed | `CLAUDE_CODE_OAUTH_TOKEN` + `OPENAI_API_KEY` | ## Usage @@ -51,6 +56,10 @@ done ./tests/test.sh --config tests/configs/driver-inheritance.json ./tests/test.sh --config tests/configs/driver-post-process.json ./tests/test.sh --config tests/configs/heterogeneous-kitchen-sink.json +./tests/test.sh --config tests/configs/codex-only.json +./tests/test.sh --config tests/configs/codex-chatgpt.json +./tests/test.sh --config tests/configs/codex-auth-mixed.json +./tests/test.sh --config tests/configs/codex-mixed.json ``` The test runner injects its own prompt and setup script into the config, diff --git a/tests/configs/codex-auth-mixed.json b/tests/configs/codex-auth-mixed.json new file mode 100644 index 0000000..c6f88f6 --- /dev/null +++ b/tests/configs/codex-auth-mixed.json @@ -0,0 +1,24 @@ +{ + "prompt": "unused", + "driver": "codex-cli", + "pricing": { + "gpt-5.4": {"input": 2.50, "output": 15.00, "cached": 0.25}, + "gpt-5.3-codex": {"input": 1.75, "output": 14.00, "cached": 0.175} + }, + "agents": [ + { + "count": 1, + "model": "gpt-5.4", + "auth": "chatgpt" + }, + { + "count": 1, + "model": "gpt-5.3-codex", + "auth": "apikey" + }, + { + "count": 1, + "model": "gpt-5.4" + } + ] +} diff --git a/tests/configs/codex-chatgpt.json b/tests/configs/codex-chatgpt.json new file mode 100644 index 0000000..055319a --- /dev/null +++ b/tests/configs/codex-chatgpt.json @@ -0,0 +1,21 @@ +{ + "prompt": "unused", + "driver": "codex-cli", + "pricing": { + "gpt-5.4": {"input": 2.5, "output": 15.0, "cached": 0.25}, + "gpt-5.3-codex": {"input": 1.75, "output": 14.0, "cached": 0.175} + }, + "agents": [ + { + "count": 1, + "model": "gpt-5.4", + "auth": "chatgpt", + "effort": "high" + }, + { + "count": 1, + "model": "gpt-5.3-codex", + "auth": "chatgpt" + } + ] +} diff --git a/tests/configs/codex-mixed.json b/tests/configs/codex-mixed.json new file mode 100644 index 0000000..d666dee --- /dev/null +++ b/tests/configs/codex-mixed.json @@ -0,0 +1,54 @@ +{ + "prompt": "unused", + "pricing": { + "gpt-5.4": {"input": 2.50, "output": 15.00, "cached": 0.25}, + "gpt-5.4-pro": {"input": 30.00, "output": 180.00}, + "gpt-5.4-mini": {"input": 0.75, "output": 4.50, "cached": 0.075}, + "gpt-5.4-nano": {"input": 0.20, "output": 1.25, "cached": 0.02}, + "gpt-5.3-codex": {"input": 1.75, "output": 14.00, "cached": 0.175}, + "gpt-5.2": {"input": 1.75, "output": 14.00, "cached": 0.175}, + "gpt-5.2-codex": {"input": 1.75, "output": 14.00, "cached": 0.175}, + "gpt-5.2-pro": {"input": 21.00, "output": 168.00}, + "gpt-5.1": {"input": 1.25, "output": 10.00, "cached": 0.125}, + "gpt-5.1-codex": {"input": 1.25, "output": 10.00, "cached": 0.125}, + "gpt-5.1-codex-max": {"input": 1.25, "output": 10.00, "cached": 0.125}, + "gpt-5.1-codex-mini": {"input": 0.25, "output": 2.00, "cached": 0.025}, + "gpt-5": {"input": 1.25, "output": 10.00, "cached": 0.125}, + "gpt-5-codex": {"input": 1.25, "output": 10.00, "cached": 0.125}, + "gpt-5-pro": {"input": 15.00, "output": 120.00}, + "gpt-5-mini": {"input": 0.25, "output": 2.00, "cached": 0.025}, + "gpt-5-nano": {"input": 0.05, "output": 0.40, "cached": 0.005}, + "gpt-4.1": {"input": 2.00, "output": 8.00, "cached": 0.50}, + "gpt-4.1-mini": {"input": 0.40, "output": 1.60, "cached": 0.10}, + "gpt-4.1-nano": {"input": 0.10, "output": 0.40, "cached": 0.025}, + "o3": {"input": 2.00, "output": 8.00, "cached": 0.50}, + "o3-pro": {"input": 20.00, "output": 80.00}, + "o4-mini": {"input": 1.10, "output": 4.40, "cached": 0.275}, + "o3-mini": {"input": 1.10, "output": 4.40, "cached": 0.55} + }, + "agents": [ + { + "count": 1, + "model": "claude-opus-4-6", + "driver": "claude-code", + "auth": "oauth" + }, + { + "count": 1, + "model": "gpt-5.4", + "driver": "codex-cli", + "effort": "high" + }, + { + "count": 1, + "model": "gpt-5.3-codex", + "driver": "codex-cli", + "effort": "medium" + }, + { + "count": 1, + "model": "gpt-5.2", + "driver": "codex-cli" + } + ] +} diff --git a/tests/configs/codex-only.json b/tests/configs/codex-only.json new file mode 100644 index 0000000..278d594 --- /dev/null +++ b/tests/configs/codex-only.json @@ -0,0 +1,42 @@ +{ + "prompt": "unused", + "driver": "codex-cli", + "pricing": { + "gpt-5.4": {"input": 2.50, "output": 15.00, "cached": 0.25}, + "gpt-5.4-pro": {"input": 30.00, "output": 180.00}, + "gpt-5.4-mini": {"input": 0.75, "output": 4.50, "cached": 0.075}, + "gpt-5.4-nano": {"input": 0.20, "output": 1.25, "cached": 0.02}, + "gpt-5.3-codex": {"input": 1.75, "output": 14.00, "cached": 0.175}, + "gpt-5.2": {"input": 1.75, "output": 14.00, "cached": 0.175}, + "gpt-5.2-codex": {"input": 1.75, "output": 14.00, "cached": 0.175}, + "gpt-5.2-pro": {"input": 21.00, "output": 168.00}, + "gpt-5.1": {"input": 1.25, "output": 10.00, "cached": 0.125}, + "gpt-5.1-codex": {"input": 1.25, "output": 10.00, "cached": 0.125}, + "gpt-5.1-codex-max": {"input": 1.25, "output": 10.00, "cached": 0.125}, + "gpt-5.1-codex-mini": {"input": 0.25, "output": 2.00, "cached": 0.025}, + "gpt-5": {"input": 1.25, "output": 10.00, "cached": 0.125}, + "gpt-5-codex": {"input": 1.25, "output": 10.00, "cached": 0.125}, + "gpt-5-pro": {"input": 15.00, "output": 120.00}, + "gpt-5-mini": {"input": 0.25, "output": 2.00, "cached": 0.025}, + "gpt-5-nano": {"input": 0.05, "output": 0.40, "cached": 0.005}, + "gpt-4.1": {"input": 2.00, "output": 8.00, "cached": 0.50}, + "gpt-4.1-mini": {"input": 0.40, "output": 1.60, "cached": 0.10}, + "gpt-4.1-nano": {"input": 0.10, "output": 0.40, "cached": 0.025}, + "o3": {"input": 2.00, "output": 8.00, "cached": 0.50}, + "o3-pro": {"input": 20.00, "output": 80.00}, + "o4-mini": {"input": 1.10, "output": 4.40, "cached": 0.275}, + "o3-mini": {"input": 1.10, "output": 4.40, "cached": 0.55} + }, + "agents": [ + { + "count": 1, + "model": "gpt-5.4", + "effort": "high" + }, + { + "count": 1, + "model": "gpt-5.3-codex", + "effort": "low" + } + ] +} diff --git a/tests/test_config.sh b/tests/test_config.sh index e6e4b59..285ddde 100755 --- a/tests/test_config.sh +++ b/tests/test_config.sh @@ -7,6 +7,7 @@ set -euo pipefail PASS=0 FAIL=0 +FAILURES="" TMPDIR=$(mktemp -d) trap 'rm -rf "$TMPDIR"' EXIT @@ -22,6 +23,21 @@ assert_eq() { echo " expected: ${expected}" echo " actual: ${actual}" FAIL=$((FAIL + 1)) + FAILURES="${FAILURES} - ${label} (expected '${expected}', got '${actual}')\n" + fi +} + +assert_contains() { + local label="$1" needle="$2" haystack="$3" + if echo "$haystack" | grep -qF -- "$needle"; then + echo " PASS: ${label}" + PASS=$((PASS + 1)) + else + echo " FAIL: ${label}" + echo " expected to contain: ${needle}" + echo " actual: ${haystack}" + FAIL=$((FAIL + 1)) + FAILURES="${FAILURES} - ${label} (missing '${needle}')\n" fi } @@ -972,10 +988,177 @@ EOF DA_SINGLE=$(parse_docker_args "$TMPDIR/docker_args_single.json") assert_eq "docker_args single" "--network=host" "$DA_SINGLE" +# ============================================================ +echo "" +echo "=== 29. Codex-only config ===" + +CFG="$TESTS_DIR/configs/codex-only.json" + +assert_eq "codex-only count" "2" "$(jq '[.agents[].count] | add' "$CFG")" +assert_eq "codex-only driver" "codex-cli" "$(jq -r '.driver' "$CFG")" +assert_eq "codex-only model[0]" "gpt-5.4" "$(jq -r '.agents[0].model' "$CFG")" +assert_eq "codex-only model[1]" "gpt-5.3-codex" "$(jq -r '.agents[1].model' "$CFG")" +assert_eq "codex-only effort[0]" "high" "$(jq -r '.agents[0].effort' "$CFG")" +assert_eq "codex-only effort[1]" "low" "$(jq -r '.agents[1].effort' "$CFG")" + +# Agents inherit top-level driver. +CODEX_AGENTS=$(jq -r '.driver as $dd | .agents[] | range(.count) as $i | + (.driver // $dd // "claude-code")' "$CFG") +assert_eq "codex-only agent inherits driver" "codex-cli" \ + "$(echo "$CODEX_AGENTS" | sed -n '1p')" + +# ============================================================ +echo "" +echo "=== 30. Codex-mixed config ===" + +CFG="$TESTS_DIR/configs/codex-mixed.json" + +MIXED_COUNT=$(jq '[.agents[].count] | add' "$CFG") +assert_eq "codex-mixed count" "4" "$MIXED_COUNT" + +MIXED_AGENTS=$(jq -r '.driver as $dd | .agents[] | range(.count) as $i | + [(.model), (.driver // $dd // "claude-code")] | join("|")' "$CFG") +MA1=$(echo "$MIXED_AGENTS" | sed -n '1p') +MA2=$(echo "$MIXED_AGENTS" | sed -n '2p') +MA3=$(echo "$MIXED_AGENTS" | sed -n '3p') +MA4=$(echo "$MIXED_AGENTS" | sed -n '4p') +assert_eq "codex-mixed agent1 driver" "claude-code" "$(echo "$MA1" | cut -d'|' -f2)" +assert_eq "codex-mixed agent1 model" "claude-opus-4-6" "$(echo "$MA1" | cut -d'|' -f1)" +assert_eq "codex-mixed agent2 driver" "codex-cli" "$(echo "$MA2" | cut -d'|' -f2)" +assert_eq "codex-mixed agent2 model" "gpt-5.4" "$(echo "$MA2" | cut -d'|' -f1)" +assert_eq "codex-mixed agent2 effort" "high" "$(jq -r '.agents[1].effort' "$CFG")" +assert_eq "codex-mixed agent3 driver" "codex-cli" "$(echo "$MA3" | cut -d'|' -f2)" +assert_eq "codex-mixed agent3 model" "gpt-5.3-codex" "$(echo "$MA3" | cut -d'|' -f1)" +assert_eq "codex-mixed agent3 effort" "medium" "$(jq -r '.agents[2].effort' "$CFG")" +assert_eq "codex-mixed agent4 driver" "codex-cli" "$(echo "$MA4" | cut -d'|' -f2)" +assert_eq "codex-mixed agent4 model" "gpt-5.2" "$(echo "$MA4" | cut -d'|' -f1)" +assert_eq "codex-mixed agent4 effort" "null" "$(jq -r '.agents[3].effort // null' "$CFG")" + +# ============================================================ +echo "" +echo "=== 31. Codex ChatGPT auth config ===" + +CFG="$TESTS_DIR/configs/codex-chatgpt.json" + +assert_eq "codex-chatgpt count" "2" "$(jq '[.agents[].count] | add' "$CFG")" +assert_eq "codex-chatgpt driver" "codex-cli" "$(jq -r '.driver' "$CFG")" +assert_eq "codex-chatgpt model[0]" "gpt-5.4" "$(jq -r '.agents[0].model' "$CFG")" +assert_eq "codex-chatgpt model[1]" "gpt-5.3-codex" "$(jq -r '.agents[1].model' "$CFG")" +assert_eq "codex-chatgpt auth[0]" "chatgpt" "$(jq -r '.agents[0].auth' "$CFG")" +assert_eq "codex-chatgpt auth[1]" "chatgpt" "$(jq -r '.agents[1].auth' "$CFG")" +assert_eq "codex-chatgpt effort[0]" "high" "$(jq -r '.agents[0].effort' "$CFG")" +assert_eq "codex-chatgpt effort[1]" "null" "$(jq -r '.agents[1].effort // null' "$CFG")" + +# Verify pricing exists for the model used. +assert_eq "codex-chatgpt pricing input" "2.5" \ + "$(jq -r '.pricing["gpt-5.4"].input' "$CFG")" + +# All agents inherit chatgpt auth. +_all_auths=$(jq -r '.agents[] | range(.count) as $i | (.auth // "auto")' "$CFG") +_chatgpt_count=$(echo "$_all_auths" | grep -c "chatgpt" || true) +assert_eq "codex-chatgpt all auth=chatgpt" "2" "$_chatgpt_count" + +# ============================================================ +echo "" +echo "=== 32. Codex auth-mixed config ===" + +CFG="$TESTS_DIR/configs/codex-auth-mixed.json" + +assert_eq "codex-auth-mixed count" "3" "$(jq '[.agents[].count] | add' "$CFG")" +assert_eq "codex-auth-mixed groups" "3" "$(jq '.agents | length' "$CFG")" + +# Group 1: explicit chatgpt auth. +assert_eq "codex-auth-mixed[0] auth" "chatgpt" "$(jq -r '.agents[0].auth' "$CFG")" +assert_eq "codex-auth-mixed[0] model" "gpt-5.4" "$(jq -r '.agents[0].model' "$CFG")" + +# Group 2: explicit apikey auth. +assert_eq "codex-auth-mixed[1] auth" "apikey" "$(jq -r '.agents[1].auth' "$CFG")" +assert_eq "codex-auth-mixed[1] model" "gpt-5.3-codex" "$(jq -r '.agents[1].model' "$CFG")" + +# Group 3: no auth field (auto-detect at runtime). +assert_eq "codex-auth-mixed[2] auth" "null" "$(jq -r '.agents[2].auth // "null"' "$CFG")" +assert_eq "codex-auth-mixed[2] model" "gpt-5.4" "$(jq -r '.agents[2].model' "$CFG")" + +# All inherit codex-cli driver. +_all_drivers=$(jq -r '.driver as $dd | .agents[] | range(.count) as $i | + (.driver // $dd // "claude-code")' "$CFG") +_codex_count=$(echo "$_all_drivers" | grep -c "codex-cli" || true) +assert_eq "codex-auth-mixed all codex-cli" "3" "$_codex_count" + +# ============================================================ +echo "" +echo "=== 33. Codex auth.json driver-level resolution ===" + +# Verify that agent_docker_auth correctly resolves auth modes +# for each group in codex-auth-mixed.json. +# This tests the driver function directly against config-derived args. + +source "$TESTS_DIR/../lib/drivers/codex-cli.sh" +_fake_auth="$TMPDIR/fake-codex-auth.json" +echo '{"access_token":"test-tok","expires_at":"2099-01-01"}' > "$_fake_auth" + +# Group 1 (auth=chatgpt): must mount the auth.json file. +AUTH_OUT=$(OPENAI_API_KEY="" CODEX_AUTH_JSON="$_fake_auth" \ + agent_docker_auth "" "" "chatgpt" "") +assert_contains "resolve chatgpt mounts" "$_fake_auth" "$AUTH_OUT" +assert_contains "resolve chatgpt label" "SWARM_AUTH_MODE=chatgpt" "$AUTH_OUT" +_key_count=$(echo "$AUTH_OUT" | grep -c "OPENAI_API_KEY" || true) +assert_eq "resolve chatgpt no key" "0" "$_key_count" + +# Group 2 (auth=apikey): must pass API key, no mount. +AUTH_OUT=$(OPENAI_API_KEY="sk-test-key" CODEX_AUTH_JSON="$_fake_auth" \ + agent_docker_auth "" "" "apikey" "") +assert_contains "resolve apikey has key" "OPENAI_API_KEY=sk-test-key" "$AUTH_OUT" +assert_contains "resolve apikey label" "SWARM_AUTH_MODE=key" "$AUTH_OUT" +_mount_count=$(echo "$AUTH_OUT" | grep -c -- "--mount" || true) +assert_eq "resolve apikey no mount" "0" "$_mount_count" + +# Group 3 (auth omitted, both creds available): auto-detect. +AUTH_OUT=$(OPENAI_API_KEY="sk-auto-key" CODEX_AUTH_JSON="$_fake_auth" \ + agent_docker_auth "" "" "" "") +assert_contains "resolve auto has key" "OPENAI_API_KEY=sk-auto-key" "$AUTH_OUT" +assert_contains "resolve auto mounts" "$_fake_auth" "$AUTH_OUT" +assert_contains "resolve auto label" "SWARM_AUTH_MODE=auto" "$AUTH_OUT" + +# Group 3 variant (auth omitted, only auth.json): chatgpt label. +AUTH_OUT=$(OPENAI_API_KEY="" CODEX_AUTH_JSON="$_fake_auth" \ + agent_docker_auth "" "" "" "") +assert_contains "resolve auto chatgpt-only mount" "$_fake_auth" "$AUTH_OUT" +assert_contains "resolve auto chatgpt-only label" "SWARM_AUTH_MODE=chatgpt" "$AUTH_OUT" + +# CODEX_AUTH_JSON env var override: custom path is used. +_custom_auth="$TMPDIR/custom-path-auth.json" +echo '{"access_token":"custom"}' > "$_custom_auth" +AUTH_OUT=$(OPENAI_API_KEY="" CODEX_AUTH_JSON="$_custom_auth" \ + agent_docker_auth "" "" "chatgpt" "") +assert_contains "resolve custom path mounts" "$_custom_auth" "$AUTH_OUT" +assert_contains "resolve custom path label" "SWARM_AUTH_MODE=chatgpt" "$AUTH_OUT" + +# CODEX_AUTH_JSON pointing to nonexistent file: no mount, warning. +AUTH_OUT=$(OPENAI_API_KEY="" CODEX_AUTH_JSON="/tmp/nonexistent-auth.json" \ + agent_docker_auth "" "" "chatgpt" "" 2>/dev/null) +_mount_count=$(echo "$AUTH_OUT" | grep -c -- "--mount" || true) +assert_eq "resolve missing auth no mount" "0" "$_mount_count" + +# Default path fallback (~/.codex/auth.json): tested by unsetting CODEX_AUTH_JSON. +_default_path="${HOME}/.codex/auth.json" +if [ -f "$_default_path" ]; then + AUTH_OUT=$(unset CODEX_AUTH_JSON; OPENAI_API_KEY="" \ + agent_docker_auth "" "" "chatgpt" "") + assert_contains "resolve default path mounts" "$_default_path" "$AUTH_OUT" +else + echo " SKIP: ~/.codex/auth.json not present on host (default path test)" +fi + # ============================================================ echo "" echo "===============================" echo " ${PASS} passed, ${FAIL} failed" +if [ "$FAIL" -gt 0 ]; then + echo "" + echo " Failed:" + printf '%b' "$FAILURES" +fi echo "===============================" [ "$FAIL" -eq 0 ] diff --git a/tests/test_drivers.sh b/tests/test_drivers.sh index 9c4acbf..d31b240 100755 --- a/tests/test_drivers.sh +++ b/tests/test_drivers.sh @@ -28,7 +28,7 @@ assert_eq() { assert_contains() { local label="$1" needle="$2" haystack="$3" - if echo "$haystack" | grep -qF "$needle"; then + if echo "$haystack" | grep -qF -- "$needle"; then echo " PASS: ${label}" PASS=$((PASS + 1)) else @@ -57,6 +57,8 @@ assert_eq "claude-code driver exists" "true" \ "$([ -f "$DRIVERS_DIR/claude-code.sh" ] && echo true || echo false)" assert_eq "fake driver exists" "true" \ "$([ -f "$DRIVERS_DIR/fake.sh" ] && echo true || echo false)" +assert_eq "codex driver exists" "true" \ + "$([ -f "$DRIVERS_DIR/codex-cli.sh" ] && echo true || echo false)" assert_eq "_common.sh exists" "true" \ "$([ -f "$DRIVERS_DIR/_common.sh" ] && echo true || echo false)" @@ -389,6 +391,12 @@ for fn in "${_required_fns[@]}"; do "$(type -t "$fn" &>/dev/null && echo true || echo false)" done +source "$DRIVERS_DIR/codex-cli.sh" +for fn in "${_required_fns[@]}"; do + assert_eq "codex has $fn" "true" \ + "$(type -t "$fn" &>/dev/null && echo true || echo false)" +done + # ============================================================ echo "" echo "=== 17. Gemini CLI driver — role interface ===" @@ -517,6 +525,9 @@ assert_eq "fake default model" "fake-model" "$(agent_default_model)" source "$DRIVERS_DIR/gemini-cli.sh" assert_eq "gemini default model" "gemini-2.5-pro" "$(agent_default_model)" +source "$DRIVERS_DIR/codex-cli.sh" +assert_eq "codex default model" "gpt-5.4" "$(agent_default_model)" + # ============================================================ echo "" echo "=== 22. agent_docker_auth — Claude Code driver ===" @@ -761,6 +772,483 @@ source "$DRIVERS_DIR/fake.sh" RETRY_OUT=$(agent_is_retriable "$TMPDIR/cc-rate-limit.jsonl" 1) assert_eq "fake driver never retriable" "" "$RETRY_OUT" +# ============================================================ +echo "" +echo "=== 30. Codex driver — role interface ===" + +assert_eq "codex driver exists" "true" \ + "$([ -f "$DRIVERS_DIR/codex-cli.sh" ] && echo true || echo false)" + +source "$DRIVERS_DIR/codex-cli.sh" + +assert_eq "codex name" "Codex CLI" "$(agent_name)" +assert_eq "codex cmd" "codex" "$(agent_cmd)" +assert_eq "codex default" "gpt-5.4" "$(agent_default_model)" + +CDX_JQ=$(agent_activity_jq) +assert_not_empty "codex jq filter" "$CDX_JQ" +assert_contains "codex jq has command_execution" "command_execution" "$CDX_JQ" +assert_contains "codex jq has file_change" "file_change" "$CDX_JQ" +assert_contains "codex jq has changes path" "changes" "$CDX_JQ" + +CDX_INSTALL=$(agent_install_cmd) +assert_contains "codex install has npm" "npm" "$CDX_INSTALL" +assert_contains "codex install has @openai/codex" "@openai/codex" "$CDX_INSTALL" + +# ============================================================ +echo "" +echo "=== 31. Codex driver — agent_settings ===" + +CWORK="$TMPDIR/codex-workspace" +mkdir -p "$CWORK/.git/info" +_test_home="$TMPDIR/fakehome" +mkdir -p "$_test_home" +HOME="$_test_home" agent_settings "$CWORK" + +assert_eq "codex config dir created" "true" \ + "$([ -d "$_test_home/.codex" ] && echo true || echo false)" +assert_eq "codex config.toml created" "true" \ + "$([ -f "$_test_home/.codex/config.toml" ] && echo true || echo false)" +assert_contains "codex config has file store" "file" \ + "$(cat "$_test_home/.codex/config.toml")" + +# 31b. AGENTS.md bridge: .claude/CLAUDE.md copied when no AGENTS.md. +CWORK_B="$TMPDIR/codex-bridge" +mkdir -p "$CWORK_B/.claude" "$CWORK_B/.git/info" +echo "# Project rules" > "$CWORK_B/.claude/CLAUDE.md" +HOME="$_test_home" agent_settings "$CWORK_B" +assert_eq "AGENTS.md created from .claude/CLAUDE.md" "true" \ + "$([ -f "$CWORK_B/AGENTS.md" ] && echo true || echo false)" +assert_eq "AGENTS.md content matches" "# Project rules" \ + "$(cat "$CWORK_B/AGENTS.md")" +assert_contains "AGENTS.md in git exclude" "AGENTS.md" \ + "$(cat "$CWORK_B/.git/info/exclude")" + +# 31c. AGENTS.md bridge: CLAUDE.md at root used as fallback. +CWORK_C="$TMPDIR/codex-bridge-root" +mkdir -p "$CWORK_C/.git/info" +echo "# Root rules" > "$CWORK_C/CLAUDE.md" +HOME="$_test_home" agent_settings "$CWORK_C" +assert_eq "AGENTS.md from root CLAUDE.md" "# Root rules" \ + "$(cat "$CWORK_C/AGENTS.md")" + +# 31d. AGENTS.md bridge: existing AGENTS.md not overwritten. +CWORK_D="$TMPDIR/codex-bridge-existing" +mkdir -p "$CWORK_D/.claude" "$CWORK_D/.git/info" +echo "# Codex rules" > "$CWORK_D/AGENTS.md" +echo "# Claude rules" > "$CWORK_D/.claude/CLAUDE.md" +HOME="$_test_home" agent_settings "$CWORK_D" +assert_eq "existing AGENTS.md preserved" "# Codex rules" \ + "$(cat "$CWORK_D/AGENTS.md")" + +# 31e. AGENTS.md bridge: no CLAUDE.md at all, no AGENTS.md created. +CWORK_E="$TMPDIR/codex-bridge-none" +mkdir -p "$CWORK_E/.git/info" +HOME="$_test_home" agent_settings "$CWORK_E" +assert_eq "no AGENTS.md without CLAUDE.md" "false" \ + "$([ -f "$CWORK_E/AGENTS.md" ] && echo true || echo false)" + +# 31f. Skills bridge: .claude/skills/ symlinked to .agents/skills/. +CWORK_F="$TMPDIR/codex-skills" +mkdir -p "$CWORK_F/.claude/skills/add-fuzz-target" "$CWORK_F/.git/info" +echo "---" > "$CWORK_F/.claude/skills/add-fuzz-target/SKILL.md" +HOME="$_test_home" agent_settings "$CWORK_F" +assert_eq "skills symlink created" "true" \ + "$([ -L "$CWORK_F/.agents/skills" ] && echo true || echo false)" +assert_eq "skills symlink target resolves" "true" \ + "$([ -f "$CWORK_F/.agents/skills/add-fuzz-target/SKILL.md" ] && echo true || echo false)" +assert_contains ".agents/ in git exclude" ".agents/" \ + "$(cat "$CWORK_F/.git/info/exclude")" + +# 31g. Skills bridge: existing .agents/skills/ not overwritten. +CWORK_G="$TMPDIR/codex-skills-existing" +mkdir -p "$CWORK_G/.agents/skills/custom" "$CWORK_G/.claude/skills/other" "$CWORK_G/.git/info" +echo "custom" > "$CWORK_G/.agents/skills/custom/SKILL.md" +HOME="$_test_home" agent_settings "$CWORK_G" +assert_eq "existing .agents/skills preserved" "custom" \ + "$(cat "$CWORK_G/.agents/skills/custom/SKILL.md")" +assert_eq ".agents/skills not a symlink" "false" \ + "$([ -L "$CWORK_G/.agents/skills" ] && echo true || echo false)" + +# 31h. Skills bridge: no .claude/skills/, no symlink created. +CWORK_H="$TMPDIR/codex-skills-none" +mkdir -p "$CWORK_H/.git/info" +HOME="$_test_home" agent_settings "$CWORK_H" +assert_eq "no .agents without .claude/skills" "false" \ + "$([ -d "$CWORK_H/.agents" ] && echo true || echo false)" + +# 31i. Priority: .claude/CLAUDE.md wins over root CLAUDE.md. +CWORK_I="$TMPDIR/codex-priority" +mkdir -p "$CWORK_I/.claude" "$CWORK_I/.git/info" +echo "# inner" > "$CWORK_I/.claude/CLAUDE.md" +echo "# outer" > "$CWORK_I/CLAUDE.md" +HOME="$_test_home" agent_settings "$CWORK_I" +assert_eq ".claude/CLAUDE.md wins over root" "# inner" \ + "$(cat "$CWORK_I/AGENTS.md")" + +# 31j. Full context: both .claude/CLAUDE.md and .claude/skills/ +# present, no AGENTS.md, no .agents/skills/ → both bridged. +CWORK_J="$TMPDIR/codex-full-both" +mkdir -p "$CWORK_J/.claude/skills/triage" "$CWORK_J/.git/info" +echo "# full rules" > "$CWORK_J/.claude/CLAUDE.md" +echo "---" > "$CWORK_J/.claude/skills/triage/SKILL.md" +HOME="$_test_home" agent_settings "$CWORK_J" +assert_eq "full: AGENTS.md bridged" "# full rules" \ + "$(cat "$CWORK_J/AGENTS.md")" +assert_eq "full: skills symlinked" "true" \ + "$([ -L "$CWORK_J/.agents/skills" ] && echo true || echo false)" +assert_eq "full: skill resolves" "---" \ + "$(cat "$CWORK_J/.agents/skills/triage/SKILL.md")" + +# 31k. AGENTS.md exists + .claude/skills/ present → only skills +# bridged, AGENTS.md untouched. +CWORK_K="$TMPDIR/codex-agents-exists-skills" +mkdir -p "$CWORK_K/.claude/skills/build-poc" "$CWORK_K/.git/info" +echo "# own agents" > "$CWORK_K/AGENTS.md" +echo "# claude" > "$CWORK_K/.claude/CLAUDE.md" +echo "---" > "$CWORK_K/.claude/skills/build-poc/SKILL.md" +HOME="$_test_home" agent_settings "$CWORK_K" +assert_eq "AGENTS.md kept, not overwritten" "# own agents" \ + "$(cat "$CWORK_K/AGENTS.md")" +assert_eq "skills still bridged" "true" \ + "$([ -L "$CWORK_K/.agents/skills" ] && echo true || echo false)" + +# 31l. .claude/CLAUDE.md present + .agents/skills/ exists → only +# AGENTS.md bridged, skills untouched. +CWORK_L="$TMPDIR/codex-claude-exists-agentskills" +mkdir -p "$CWORK_L/.claude" "$CWORK_L/.agents/skills/own" "$CWORK_L/.git/info" +echo "# project" > "$CWORK_L/.claude/CLAUDE.md" +echo "own-skill" > "$CWORK_L/.agents/skills/own/SKILL.md" +HOME="$_test_home" agent_settings "$CWORK_L" +assert_eq "AGENTS.md bridged" "# project" \ + "$(cat "$CWORK_L/AGENTS.md")" +assert_eq ".agents/skills not a symlink" "false" \ + "$([ -L "$CWORK_L/.agents/skills" ] && echo true || echo false)" +assert_eq "own skill preserved" "own-skill" \ + "$(cat "$CWORK_L/.agents/skills/own/SKILL.md")" + +# 31m. Both AGENTS.md and .agents/skills/ exist → nothing bridged. +CWORK_M="$TMPDIR/codex-all-exist" +mkdir -p "$CWORK_M/.claude/skills/x" "$CWORK_M/.agents/skills/y" "$CWORK_M/.git/info" +echo "# codex agents" > "$CWORK_M/AGENTS.md" +echo "# claude" > "$CWORK_M/.claude/CLAUDE.md" +echo "x" > "$CWORK_M/.claude/skills/x/SKILL.md" +echo "y" > "$CWORK_M/.agents/skills/y/SKILL.md" +HOME="$_test_home" agent_settings "$CWORK_M" +assert_eq "all-exist: AGENTS.md untouched" "# codex agents" \ + "$(cat "$CWORK_M/AGENTS.md")" +assert_eq "all-exist: .agents/skills not symlink" "false" \ + "$([ -L "$CWORK_M/.agents/skills" ] && echo true || echo false)" +assert_eq "all-exist: own skill intact" "y" \ + "$(cat "$CWORK_M/.agents/skills/y/SKILL.md")" + +# 31n. Only .claude/skills/ (no CLAUDE.md) → skills bridged, +# no AGENTS.md created. +CWORK_N="$TMPDIR/codex-skills-only" +mkdir -p "$CWORK_N/.claude/skills/scan" "$CWORK_N/.git/info" +echo "---" > "$CWORK_N/.claude/skills/scan/SKILL.md" +HOME="$_test_home" agent_settings "$CWORK_N" +assert_eq "skills-only: no AGENTS.md" "false" \ + "$([ -f "$CWORK_N/AGENTS.md" ] && echo true || echo false)" +assert_eq "skills-only: symlink created" "true" \ + "$([ -L "$CWORK_N/.agents/skills" ] && echo true || echo false)" + +# 31o. Only AGENTS.md exists, no .claude/ at all → no bridging. +CWORK_O="$TMPDIR/codex-agents-only" +mkdir -p "$CWORK_O/.git/info" +echo "# native" > "$CWORK_O/AGENTS.md" +HOME="$_test_home" agent_settings "$CWORK_O" +assert_eq "agents-only: AGENTS.md intact" "# native" \ + "$(cat "$CWORK_O/AGENTS.md")" +assert_eq "agents-only: no .agents dir" "false" \ + "$([ -d "$CWORK_O/.agents" ] && echo true || echo false)" + +# 31p. .claude/CLAUDE.md + no skills, no .agents/ → only AGENTS.md +# bridged, no .agents/ dir created. +CWORK_P="$TMPDIR/codex-claude-noskills" +mkdir -p "$CWORK_P/.claude" "$CWORK_P/.git/info" +echo "# rules" > "$CWORK_P/.claude/CLAUDE.md" +HOME="$_test_home" agent_settings "$CWORK_P" +assert_eq "noskills: AGENTS.md bridged" "# rules" \ + "$(cat "$CWORK_P/AGENTS.md")" +assert_eq "noskills: no .agents dir" "false" \ + "$([ -d "$CWORK_P/.agents" ] && echo true || echo false)" + +# 31q. .agents/skills/ exists but no .claude/skills/ → untouched. +CWORK_Q="$TMPDIR/codex-agentskills-noclaudeskills" +mkdir -p "$CWORK_Q/.agents/skills/mine" "$CWORK_Q/.git/info" +echo "kept" > "$CWORK_Q/.agents/skills/mine/SKILL.md" +HOME="$_test_home" agent_settings "$CWORK_Q" +assert_eq "no-claude-skills: .agents preserved" "kept" \ + "$(cat "$CWORK_Q/.agents/skills/mine/SKILL.md")" +assert_eq "no-claude-skills: not a symlink" "false" \ + "$([ -L "$CWORK_Q/.agents/skills" ] && echo true || echo false)" + +# ============================================================ +echo "" +echo "=== 32. Codex driver — agent_extract_stats ===" + +cat > "$TMPDIR/codex-session.jsonl" <<'EOF' +{"type":"turn.started","turn_id":"t1"} +{"type":"item.started","item":{"type":"command_execution","command":"ls -la"}} +{"type":"item.completed","item":{"type":"command_execution","command":"ls -la"}} +{"type":"turn.completed","turn_id":"t1","usage":{"input_tokens":500,"output_tokens":200,"cached_input_tokens":100}} +{"type":"turn.started","turn_id":"t2"} +{"type":"item.started","item":{"type":"file_change","file_path":"src/main.ts"}} +{"type":"item.completed","item":{"type":"file_change","file_path":"src/main.ts"}} +{"type":"turn.completed","turn_id":"t2","usage":{"input_tokens":800,"output_tokens":300,"cached_input_tokens":400}} +EOF + +CSTATS=$(agent_extract_stats "$TMPDIR/codex-session.jsonl") +IFS=$'\t' read -r c_cost c_in c_out c_cache_rd c_cache_cr c_dur c_api_ms c_turns <<< "$CSTATS" + +assert_eq "codex cost is 0" "0" "$c_cost" +assert_eq "codex tok_in summed" "800" "$c_in" +assert_eq "codex tok_out summed" "500" "$c_out" +assert_eq "codex cached summed" "500" "$c_cache_rd" +assert_eq "codex cache_cr" "0" "$c_cache_cr" +assert_eq "codex dur" "0" "$c_dur" +assert_eq "codex api_ms" "0" "$c_api_ms" +assert_eq "codex turns" "2" "$c_turns" + +# Empty log: all zeroes. +: > "$TMPDIR/codex-empty.jsonl" +CSTATS_EMPTY=$(agent_extract_stats "$TMPDIR/codex-empty.jsonl") +IFS=$'\t' read -r c_cost c_in c_out c_cache_rd c_cache_cr c_dur c_api_ms c_turns <<< "$CSTATS_EMPTY" +assert_eq "codex empty cost" "0" "$c_cost" +assert_eq "codex empty turns" "0" "$c_turns" + +# Single turn. +cat > "$TMPDIR/codex-single.jsonl" <<'EOF' +{"type":"turn.completed","turn_id":"t1","usage":{"input_tokens":100,"output_tokens":50,"cached_input_tokens":0}} +EOF +CSTATS_S=$(agent_extract_stats "$TMPDIR/codex-single.jsonl") +IFS=$'\t' read -r c_cost c_in c_out c_cache_rd c_cache_cr c_dur c_api_ms c_turns <<< "$CSTATS_S" +assert_eq "codex single tok_in" "100" "$c_in" +assert_eq "codex single tok_out" "50" "$c_out" +assert_eq "codex single turns" "1" "$c_turns" + +# ============================================================ +echo "" +echo "=== 33. Codex driver — agent_detect_fatal ===" + +# Fatal: turn.failed event. +cat > "$TMPDIR/codex-fatal.jsonl" <<'EOF' +{"type":"turn.failed","turn_id":"t1","error":"authentication_error: Invalid API key"} +EOF +CFATAL=$(agent_detect_fatal "$TMPDIR/codex-fatal.jsonl" 1) +assert_not_empty "codex turn.failed detected" "$CFATAL" +assert_contains "codex fatal mentions auth" "authentication_error" "$CFATAL" + +# Fatal: generic error event. +cat > "$TMPDIR/codex-error.jsonl" <<'EOF' +{"type":"error","message":"model not found: gpt-99"} +EOF +CERR=$(agent_detect_fatal "$TMPDIR/codex-error.jsonl" 1) +assert_not_empty "codex error event detected" "$CERR" +assert_contains "codex error message" "model not found" "$CERR" + +# Fatal: stderr error with no successful turns. +cat > "$TMPDIR/codex-stderr.jsonl" <<'EOF' +EOF +cat > "$TMPDIR/codex-stderr.jsonl.err" <<'EOF' +Error: Unauthorized - invalid API key +EOF +CSTDERR=$(agent_detect_fatal "$TMPDIR/codex-stderr.jsonl" 1) +assert_not_empty "codex stderr error detected" "$CSTDERR" +assert_contains "codex stderr mentions unauthorized" "Unauthorized" "$CSTDERR" + +# Not fatal: successful turns present despite stderr noise. +cat > "$TMPDIR/codex-ok.jsonl" <<'EOF' +{"type":"turn.completed","turn_id":"t1","usage":{"input_tokens":100,"output_tokens":50,"cached_input_tokens":0}} +EOF +cat > "$TMPDIR/codex-ok.jsonl.err" <<'EOF' +Warning: something non-fatal with error word +EOF +COK=$(agent_detect_fatal "$TMPDIR/codex-ok.jsonl" 0) +assert_eq "codex ok not flagged" "" "$COK" + +# Clean log: no errors. +cat > "$TMPDIR/codex-clean.jsonl" <<'EOF' +{"type":"turn.completed","turn_id":"t1","usage":{"input_tokens":200,"output_tokens":100,"cached_input_tokens":0}} +EOF +CCLEAN=$(agent_detect_fatal "$TMPDIR/codex-clean.jsonl" 0) +assert_eq "codex clean not flagged" "" "$CCLEAN" + +# ============================================================ +echo "" +echo "=== 34. Codex driver — agent_is_retriable ===" + +cat > "$TMPDIR/codex-429.jsonl" <<'EOF' +{"type":"error","message":"429 Too many requests - rate limit exceeded"} +EOF +RETRY_OUT=$(agent_is_retriable "$TMPDIR/codex-429.jsonl" 1) +assert_not_empty "codex 429 is retriable" "$RETRY_OUT" + +cat > "$TMPDIR/codex-quota.jsonl" <<'EOF' +{"type":"turn.failed","error":"quota exceeded for model gpt-5.4"} +EOF +RETRY_OUT=$(agent_is_retriable "$TMPDIR/codex-quota.jsonl" 1) +assert_not_empty "codex quota is retriable" "$RETRY_OUT" + +cat > "$TMPDIR/codex-rate-stderr.jsonl" <<'EOF' +EOF +cat > "$TMPDIR/codex-rate-stderr.jsonl.err" <<'EOF' +Error: rate limit exceeded, please retry later +EOF +RETRY_OUT=$(agent_is_retriable "$TMPDIR/codex-rate-stderr.jsonl" 1) +assert_not_empty "codex rate limit in stderr is retriable" "$RETRY_OUT" + +cat > "$TMPDIR/codex-auth.jsonl" <<'EOF' +{"type":"error","message":"Invalid API key"} +EOF +RETRY_OUT=$(agent_is_retriable "$TMPDIR/codex-auth.jsonl" 1) +assert_eq "codex auth not retriable" "" "$RETRY_OUT" + +# ============================================================ +echo "" +echo "=== 35. Codex driver — agent_docker_env ===" + +CDX_ENV=$(agent_docker_env "high") +assert_contains "codex docker_env effort flag" "CODEX_EFFORT=high" "$CDX_ENV" + +CDX_ENV=$(agent_docker_env "low") +assert_contains "codex docker_env effort low" "CODEX_EFFORT=low" "$CDX_ENV" + +CDX_ENV=$(agent_docker_env "") +assert_eq "codex docker_env empty effort" "" "$CDX_ENV" + +# ============================================================ +echo "" +echo "=== 36. Codex driver — agent_docker_auth ===" + +# API key from per-agent config (explicit apikey mode). +AUTH_OUT=$(OPENAI_API_KEY="" CODEX_AUTH_JSON="/nonexistent" \ + agent_docker_auth "sk-codex-key" "" "apikey" "") +assert_contains "codex per-agent key" "OPENAI_API_KEY=sk-codex-key" "$AUTH_OUT" +assert_contains "codex per-agent label" "SWARM_AUTH_MODE=key" "$AUTH_OUT" + +# API key from environment (explicit apikey mode). +AUTH_OUT=$(OPENAI_API_KEY="sk-env-key" CODEX_AUTH_JSON="/nonexistent" \ + agent_docker_auth "" "" "apikey" "") +assert_contains "codex env key" "OPENAI_API_KEY=sk-env-key" "$AUTH_OUT" +assert_contains "codex env key label" "SWARM_AUTH_MODE=key" "$AUTH_OUT" + +# Per-agent overrides env. +AUTH_OUT=$(OPENAI_API_KEY="sk-env" CODEX_AUTH_JSON="/nonexistent" \ + agent_docker_auth "sk-agent" "" "apikey" "") +assert_contains "codex per-agent overrides env" "OPENAI_API_KEY=sk-agent" "$AUTH_OUT" + +# No credentials at all. +AUTH_OUT=$(OPENAI_API_KEY="" CODEX_AUTH_JSON="/nonexistent" \ + agent_docker_auth "" "" "" "") +assert_contains "codex no creds has auth mode" "SWARM_AUTH_MODE=" "$AUTH_OUT" +_line_count=$(echo "$AUTH_OUT" | grep -c "OPENAI_API_KEY" || true) +assert_eq "codex no creds no key flag" "0" "$_line_count" + +# ChatGPT subscription mode: mounts auth.json. +_fake_auth="$TMPDIR/fake-auth.json" +echo '{"token":"test"}' > "$_fake_auth" +AUTH_OUT=$(OPENAI_API_KEY="" CODEX_AUTH_JSON="$_fake_auth" \ + agent_docker_auth "" "" "chatgpt" "") +assert_contains "codex chatgpt mounts auth.json" "$_fake_auth" "$AUTH_OUT" +assert_contains "codex chatgpt mount flag" "--mount" "$AUTH_OUT" +assert_contains "codex chatgpt label" "SWARM_AUTH_MODE=chatgpt" "$AUTH_OUT" +_key_count=$(echo "$AUTH_OUT" | grep -c "OPENAI_API_KEY" || true) +assert_eq "codex chatgpt no api key" "0" "$_key_count" + +# ChatGPT mode but auth.json missing: warns, no mount. +AUTH_OUT=$(OPENAI_API_KEY="" CODEX_AUTH_JSON="/nonexistent" \ + agent_docker_auth "" "" "chatgpt" "" 2>/dev/null) +_mount_count=$(echo "$AUTH_OUT" | grep -c "\-\-mount" || true) +assert_eq "codex chatgpt missing no mount" "0" "$_mount_count" + +# Auto-detect: API key + auth.json both present. +AUTH_OUT=$(OPENAI_API_KEY="sk-both" CODEX_AUTH_JSON="$_fake_auth" \ + agent_docker_auth "" "" "" "") +assert_contains "codex auto has api key" "OPENAI_API_KEY=sk-both" "$AUTH_OUT" +assert_contains "codex auto mounts auth.json" "$_fake_auth" "$AUTH_OUT" +assert_contains "codex auto label" "SWARM_AUTH_MODE=auto" "$AUTH_OUT" + +# Auto-detect: only auth.json, no key. +AUTH_OUT=$(OPENAI_API_KEY="" CODEX_AUTH_JSON="$_fake_auth" \ + agent_docker_auth "" "" "" "") +assert_contains "codex auto chatgpt-only mount" "$_fake_auth" "$AUTH_OUT" +assert_contains "codex auto chatgpt-only label" "SWARM_AUTH_MODE=chatgpt" "$AUTH_OUT" + +# ============================================================ +echo "" +echo "=== 37. Codex driver — activity jq filter via file boundary ===" + +source "$DRIVERS_DIR/codex-cli.sh" +agent_activity_jq > "$TMPDIR/codex.jq" + +CODEX_CMD='{"type":"item.started","item":{"type":"command_execution","command":"npm test","id":"x1","status":"in_progress","aggregated_output":"","exit_code":null}}' +CDX_CMD_OUT=$(echo "$CODEX_CMD" | \ + AGENT_ID=7 SWARM_JQ_FILTER_FILE="$TMPDIR/codex.jq" \ + bash "$FILTER_DIR/activity-filter.sh" 2>/dev/null || true) +assert_contains "codex jq command_execution" "Shell:" "$CDX_CMD_OUT" +assert_contains "codex jq command content" "npm test" "$CDX_CMD_OUT" + +# file_change: path lives in .changes[].path (verified from real output). +CODEX_EDIT='{"type":"item.completed","item":{"type":"file_change","id":"x2","changes":[{"path":"/workspace/src/utils.ts","kind":"edit"}],"status":"completed"}}' +CDX_EDIT_OUT=$(echo "$CODEX_EDIT" | \ + AGENT_ID=7 SWARM_JQ_FILTER_FILE="$TMPDIR/codex.jq" \ + bash "$FILTER_DIR/activity-filter.sh" 2>/dev/null || true) +assert_contains "codex jq file_change" "Edit " "$CDX_EDIT_OUT" +assert_contains "codex jq file_change path" "/workspace/src/utils.ts" "$CDX_EDIT_OUT" + +# file_change with multiple changes uses first path. +CODEX_MULTI='{"type":"item.completed","item":{"type":"file_change","id":"x3","changes":[{"path":"a.ts","kind":"add"},{"path":"b.ts","kind":"add"}],"status":"completed"}}' +CDX_MULTI_OUT=$(echo "$CODEX_MULTI" | \ + AGENT_ID=7 SWARM_JQ_FILTER_FILE="$TMPDIR/codex.jq" \ + bash "$FILTER_DIR/activity-filter.sh" 2>/dev/null || true) +assert_contains "codex jq multi file_change" "Edit a.ts" "$CDX_MULTI_OUT" + +CODEX_SEARCH='{"type":"item.started","item":{"type":"web_search","query":"node.js best practices"}}' +CDX_SEARCH_OUT=$(echo "$CODEX_SEARCH" | \ + AGENT_ID=7 SWARM_JQ_FILTER_FILE="$TMPDIR/codex.jq" \ + bash "$FILTER_DIR/activity-filter.sh" 2>/dev/null || true) +assert_contains "codex jq web_search" "Search:" "$CDX_SEARCH_OUT" + +CODEX_MCP='{"type":"item.completed","item":{"type":"mcp_tool_call","tool_name":"readFile"}}' +CDX_MCP_OUT=$(echo "$CODEX_MCP" | \ + AGENT_ID=7 SWARM_JQ_FILTER_FILE="$TMPDIR/codex.jq" \ + bash "$FILTER_DIR/activity-filter.sh" 2>/dev/null || true) +assert_contains "codex jq mcp_tool_call" "MCP:" "$CDX_MCP_OUT" + +# agent_message events should be silently skipped. +CODEX_MSG='{"type":"item.completed","item":{"type":"agent_message","id":"x4","text":"Done."}}' +CDX_MSG_OUT=$(echo "$CODEX_MSG" | \ + AGENT_ID=7 SWARM_JQ_FILTER_FILE="$TMPDIR/codex.jq" \ + bash "$FILTER_DIR/activity-filter.sh" 2>/dev/null || true) +assert_eq "codex jq agent_message silent" "" "$CDX_MSG_OUT" + +# ============================================================ +echo "" +echo "=== 38. Codex driver in config parsing ===" + +cat > "$TMPDIR/codex_cfg.json" <<'EOF' +{ + "prompt": "p.md", + "driver": "codex-cli", + "agents": [ + { "count": 1, "model": "gpt-5.4" }, + { "count": 1, "model": "claude-opus-4-6", "driver": "claude-code" } + ] +} +EOF + +TOP_DRIVER=$(jq -r '.driver // "claude-code"' "$TMPDIR/codex_cfg.json") +assert_eq "codex top-level driver" "codex-cli" "$TOP_DRIVER" + +AGENTS=$(jq -r '.driver as $dd | .agents[] | + (.driver // $dd // "claude-code")' "$TMPDIR/codex_cfg.json") +LINE1=$(echo "$AGENTS" | sed -n '1p') +LINE2=$(echo "$AGENTS" | sed -n '2p') +assert_eq "codex agent1 inherits top driver" "codex-cli" "$LINE1" +assert_eq "codex agent2 per-agent driver" "claude-code" "$LINE2" + # ============================================================ echo "" echo "===============================" diff --git a/tests/test_harness.sh b/tests/test_harness.sh index 91375da..da40df3 100755 --- a/tests/test_harness.sh +++ b/tests/test_harness.sh @@ -642,6 +642,113 @@ FINAL=$(git -C "$WORK" rev-parse origin/agent-work) FINAL_LOCAL=$(git -C "$WORK" rev-parse HEAD) assert_eq "rebase+push reconciled" "$FINAL_LOCAL" "$FINAL" +# ============================================================ +echo "" +echo "=== 15. Context stripping hooks survive git pull ===" + +# Build a bare + working clone with .claude/ context files. +CTX_BARE="$TMPDIR/ctx-bare.git" +CTX_WORK="$TMPDIR/ctx-work" +CTX_WORK2="$TMPDIR/ctx-work2" +git init -q --bare "$CTX_BARE" +git clone -q "$CTX_BARE" "$CTX_WORK" +git -C "$CTX_WORK" config user.name "test" +git -C "$CTX_WORK" config user.email "test@test" +git -C "$CTX_WORK" config commit.gpgsign false + +mkdir -p "$CTX_WORK/.claude/skills" "$CTX_WORK/.claude/references" +echo "# CLAUDE" > "$CTX_WORK/.claude/CLAUDE.md" +echo "skill data" > "$CTX_WORK/.claude/skills/triage.md" +echo "ref data" > "$CTX_WORK/.claude/references/known.md" +echo "code" > "$CTX_WORK/main.go" +git -C "$CTX_WORK" add -A +git -C "$CTX_WORK" commit -q -m "initial with .claude context" +git -C "$CTX_WORK" checkout -q -b agent-work +git -C "$CTX_WORK" push -q origin agent-work + +# Install _strip_context hook for slim mode (mirrors harness). +mkdir -p "$CTX_WORK/.git/hooks" +cat > "$CTX_WORK/.git/hooks/_strip_context" <<'CTXHOOK' +#!/bin/bash +case "slim" in + none) rm -rf .claude 2>/dev/null ;; + slim) [ -d .claude ] && find .claude -mindepth 1 -maxdepth 1 ! -name CLAUDE.md -exec rm -rf {} + 2>/dev/null ;; +esac +CTXHOOK +chmod +x "$CTX_WORK/.git/hooks/_strip_context" +for _hook in post-merge post-checkout; do + printf '#!/bin/bash\n.git/hooks/_strip_context\n' \ + > "$CTX_WORK/.git/hooks/$_hook" + chmod +x "$CTX_WORK/.git/hooks/$_hook" +done + +# Strip context (simulates the harness initial strip). +(cd "$CTX_WORK" && find .claude -mindepth 1 -maxdepth 1 ! -name CLAUDE.md -exec rm -rf {} +) + +assert_eq "slim: CLAUDE.md kept" "true" \ + "$([ -f "$CTX_WORK/.claude/CLAUDE.md" ] && echo true || echo false)" +assert_eq "slim: skills/ removed" "false" \ + "$([ -d "$CTX_WORK/.claude/skills" ] && echo true || echo false)" + +# Second clone pushes a change (simulates another agent committing). +git clone -q "$CTX_BARE" "$CTX_WORK2" +git -C "$CTX_WORK2" config user.name "test2" +git -C "$CTX_WORK2" config user.email "test2@test" +git -C "$CTX_WORK2" config commit.gpgsign false +git -C "$CTX_WORK2" checkout -q agent-work +echo "new code" >> "$CTX_WORK2/main.go" +git -C "$CTX_WORK2" add main.go +git -C "$CTX_WORK2" commit -q -m "other agent work" +git -C "$CTX_WORK2" push -q origin agent-work + +# First clone pulls — post-merge hook should re-strip. +git -C "$CTX_WORK" pull -q origin agent-work + +assert_eq "slim post-merge: CLAUDE.md still kept" "true" \ + "$([ -f "$CTX_WORK/.claude/CLAUDE.md" ] && echo true || echo false)" +assert_eq "slim post-merge: skills/ re-stripped" "false" \ + "$([ -d "$CTX_WORK/.claude/skills" ] && echo true || echo false)" +assert_eq "slim post-merge: references/ re-stripped" "false" \ + "$([ -d "$CTX_WORK/.claude/references" ] && echo true || echo false)" + +# Now test context=none mode. +CTX_NONE="$TMPDIR/ctx-none" +git clone -q "$CTX_BARE" "$CTX_NONE" +git -C "$CTX_NONE" config user.name "test" +git -C "$CTX_NONE" config user.email "test@test" +git -C "$CTX_NONE" config commit.gpgsign false +git -C "$CTX_NONE" checkout -q agent-work + +mkdir -p "$CTX_NONE/.git/hooks" +cat > "$CTX_NONE/.git/hooks/_strip_context" <<'CTXHOOK' +#!/bin/bash +case "none" in + none) rm -rf .claude 2>/dev/null ;; + slim) [ -d .claude ] && find .claude -mindepth 1 -maxdepth 1 ! -name CLAUDE.md -exec rm -rf {} + 2>/dev/null ;; +esac +CTXHOOK +chmod +x "$CTX_NONE/.git/hooks/_strip_context" +for _hook in post-merge post-checkout; do + printf '#!/bin/bash\n.git/hooks/_strip_context\n' \ + > "$CTX_NONE/.git/hooks/$_hook" + chmod +x "$CTX_NONE/.git/hooks/$_hook" +done + +rm -rf "$CTX_NONE/.claude" +assert_eq "none: .claude/ removed" "false" \ + "$([ -d "$CTX_NONE/.claude" ] && echo true || echo false)" + +# Push another change from work2 to trigger a merge. +echo "more code" >> "$CTX_WORK2/main.go" +git -C "$CTX_WORK2" add main.go +git -C "$CTX_WORK2" commit -q -m "yet more work" +git -C "$CTX_WORK2" push -q origin agent-work + +git -C "$CTX_NONE" pull -q origin agent-work + +assert_eq "none post-merge: .claude/ re-removed" "false" \ + "$([ -d "$CTX_NONE/.claude" ] && echo true || echo false)" + # ============================================================ echo "" echo "==============================="