Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
# Optional: N8N webhook for progress notifications
# PROGRESS_N8N_WEBHOOK_URL=https://your-n8n-instance.com/webhook/...

# ===================
# SDK Selection
# ===================
# Choose which agent SDK to use:
# - claude: Claude Agent SDK (default) - uses Claude Code CLI
# - codex: OpenAI Codex SDK - uses Codex CLI
#
# AUTOFORGE_SDK=claude

# Playwright Browser Configuration
#
# PLAYWRIGHT_BROWSER: Which browser to use for testing
Expand Down
86 changes: 41 additions & 45 deletions agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
from typing import Optional
from zoneinfo import ZoneInfo

from claude_agent_sdk import ClaudeSDKClient
from sdk_adapter import SDKAdapter
from sdk_adapter.types import EventType

# Fix Windows console encoding for Unicode characters (emoji, etc.)
# Without this, print() crashes when Claude outputs emoji like ✅
Expand Down Expand Up @@ -50,15 +51,15 @@


async def run_agent_session(
client: ClaudeSDKClient,
client: SDKAdapter,
message: str,
project_dir: Path,
) -> tuple[str, str]:
"""
Run a single agent session using Claude Agent SDK.
Run a single agent session using SDK adapter.

Args:
client: Claude SDK client
client: SDK adapter (Claude or Codex)
message: The prompt to send
project_dir: Project directory path

Expand All @@ -67,53 +68,48 @@ async def run_agent_session(
- "continue" if agent should continue working
- "error" if an error occurred
"""
print("Sending prompt to Claude Agent SDK...\n")
from sdk_adapter.factory import get_sdk_type

sdk_type = get_sdk_type()
print(f"Sending prompt to {sdk_type.upper()} Agent SDK...\n")

try:
# Send the query
await client.query(message)

# Collect response text and show tool use
# Collect response text using unified event model
response_text = ""
async for msg in client.receive_response():
msg_type = type(msg).__name__

# Handle AssistantMessage (text and tool use)
if msg_type == "AssistantMessage" and hasattr(msg, "content"):
for block in msg.content:
block_type = type(block).__name__

if block_type == "TextBlock" and hasattr(block, "text"):
response_text += block.text
print(block.text, end="", flush=True)
elif block_type == "ToolUseBlock" and hasattr(block, "name"):
print(f"\n[Tool: {block.name}]", flush=True)
if hasattr(block, "input"):
input_str = str(block.input)
if len(input_str) > 200:
print(f" Input: {input_str[:200]}...", flush=True)
else:
print(f" Input: {input_str}", flush=True)

# Handle UserMessage (tool results)
elif msg_type == "UserMessage" and hasattr(msg, "content"):
for block in msg.content:
block_type = type(block).__name__

if block_type == "ToolResultBlock":
result_content = getattr(block, "content", "")
is_error = getattr(block, "is_error", False)

# Check if command was blocked by security hook
if "blocked" in str(result_content).lower():
print(f" [BLOCKED] {result_content}", flush=True)
elif is_error:
# Show errors (truncated)
error_str = str(result_content)[:500]
print(f" [Error] {error_str}", flush=True)
else:
# Tool succeeded - just show brief confirmation
print(" [Done]", flush=True)
async for event in client.receive_events():
if event.type == EventType.TEXT:
response_text += event.content
print(event.content, end="", flush=True)

elif event.type == EventType.TOOL_CALL:
print(f"\n[Tool: {event.tool_name}]", flush=True)
if event.tool_input:
input_str = str(event.tool_input)
if len(input_str) > 200:
print(f" Input: {input_str[:200]}...", flush=True)
else:
print(f" Input: {input_str}", flush=True)

elif event.type == EventType.TOOL_RESULT:
# Check if command was blocked by security hook
if "blocked" in event.content.lower():
print(f" [BLOCKED] {event.content}", flush=True)
elif event.is_error:
# Show errors (truncated)
error_str = event.content[:500]
print(f" [Error] {error_str}", flush=True)
else:
# Tool succeeded - just show brief confirmation
print(" [Done]", flush=True)

elif event.type == EventType.ERROR:
print(f"\n[Error] {event.content}", flush=True)

elif event.type == EventType.DONE:
break

print("\n" + "-" * 70 + "\n")
return "continue", response_text
Expand Down
165 changes: 108 additions & 57 deletions client.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
"""
Claude SDK Client Configuration
===============================
SDK Client Configuration
========================

Functions for creating and configuring the Claude Agent SDK client.
Functions for creating and configuring SDK adapters (Claude or Codex).
Uses AUTOFORGE_SDK environment variable to select the SDK:
- "claude" (default): Claude Agent SDK
- "codex": OpenAI Codex SDK
"""

import json
Expand All @@ -16,6 +19,7 @@
from claude_agent_sdk.types import HookContext, HookInput, HookMatcher, SyncHookJSONOutput
from dotenv import load_dotenv

from sdk_adapter import AdapterOptions, SDKAdapter, create_adapter, get_sdk_type
from security import SENSITIVE_DIRECTORIES, bash_security_hook

# Load environment variables from .env file if present
Expand Down Expand Up @@ -284,31 +288,38 @@ def create_client(
yolo_mode: bool = False,
agent_id: str | None = None,
agent_type: str = "coding",
):
) -> SDKAdapter:
"""
Create a Claude Agent SDK client with multi-layered security.
Create an SDK adapter (Claude or Codex) with multi-layered security.

Uses AUTOFORGE_SDK environment variable to select SDK:
- "claude" (default): Claude Agent SDK
- "codex": OpenAI Codex SDK

Args:
project_dir: Directory for the project
model: Claude model to use
model: Model to use (Claude or Codex model name)
yolo_mode: If True, skip Playwright MCP server for rapid prototyping
agent_id: Optional unique identifier for browser isolation in parallel mode.
When provided, each agent gets its own browser profile.
agent_type: One of "coding", "testing", or "initializer". Controls which
MCP tools are exposed and the max_turns limit.

Returns:
Configured ClaudeSDKClient (from claude_agent_sdk)
Configured SDKAdapter (Claude or Codex)

Security layers (defense in depth):
1. Sandbox - OS-level bash command isolation prevents filesystem escape
2. Permissions - File operations restricted to project_dir only
3. Security hooks - Bash commands validated against an allowlist
(see security.py for ALLOWED_COMMANDS)
Note: Codex SDK does not support pre-tool-use hooks; relies on built-in sandboxing.

Note: Authentication is handled by start.bat/start.sh before this runs.
The Claude SDK auto-detects credentials from the Claude CLI configuration
The SDK auto-detects credentials from the respective CLI configuration.
"""
sdk_type = get_sdk_type()
print(f" - SDK type: {sdk_type.upper()}")
# Select the feature MCP tools appropriate for this agent type
feature_tools_map = {
"coding": CODING_AGENT_TOOLS,
Expand Down Expand Up @@ -452,16 +463,25 @@ def create_client(
}

# Build environment overrides for API endpoint configuration
# Uses get_effective_sdk_env() which reads provider settings from the database,
# ensuring UI-configured alternative providers (GLM, Ollama, Kimi, Custom) propagate
# correctly to the Claude CLI subprocess
from registry import get_effective_sdk_env
sdk_env = get_effective_sdk_env()
# Uses get_effective_sdk_env() for Claude or get_effective_sdk_env_for_codex() for Codex
# These read provider settings from the database, ensuring UI-configured
# alternative providers (GLM, Ollama, Kimi, Custom) propagate correctly
from registry import get_effective_sdk_env, get_effective_sdk_env_for_codex
if sdk_type == "codex":
sdk_env = get_effective_sdk_env_for_codex()
else:
sdk_env = get_effective_sdk_env()

# Detect alternative API mode (Ollama, GLM, or Vertex AI)
base_url = sdk_env.get("ANTHROPIC_BASE_URL", "")
is_vertex = sdk_env.get("CLAUDE_CODE_USE_VERTEX") == "1"
is_alternative_api = bool(base_url) or is_vertex
# For Codex: check OPENAI_BASE_URL; for Claude: check ANTHROPIC_BASE_URL
if sdk_type == "codex":
base_url = sdk_env.get("OPENAI_BASE_URL", "")
is_vertex = False # Vertex AI is Claude-specific
is_alternative_api = bool(base_url)
else:
base_url = sdk_env.get("ANTHROPIC_BASE_URL", "")
is_vertex = sdk_env.get("CLAUDE_CODE_USE_VERTEX") == "1"
is_alternative_api = bool(base_url) or is_vertex
is_ollama = "localhost:11434" in base_url or "127.0.0.1:11434" in base_url
model = convert_model_for_vertex(model)
if sdk_env:
Expand All @@ -472,6 +492,8 @@ def create_client(
print(f" - Vertex AI Mode: Using GCP project '{project_id}' with model '{model}' in region '{region}'")
elif is_ollama:
print(" - Ollama Mode: Using local models")
elif sdk_type == "codex" and "OPENAI_BASE_URL" in sdk_env:
print(f" - Custom API: Using {sdk_env['OPENAI_BASE_URL']}")
elif "ANTHROPIC_BASE_URL" in sdk_env:
print(f" - GLM Mode: Using {sdk_env['ANTHROPIC_BASE_URL']}")

Expand Down Expand Up @@ -559,49 +581,78 @@ async def pre_compact_hook(
# Our system_prompt benefits from automatic caching without explicit configuration.
# If explicit cache_control is needed, the SDK would need to accept content blocks
# with cache_control fields (not currently supported in v0.1.x).
return ClaudeSDKClient(
options=ClaudeAgentOptions(
model=model,
cli_path=system_cli, # Use system CLI to avoid bundled Bun crash (exit code 3)

# Branch based on SDK type
if sdk_type == "claude":
# Claude SDK: use ClaudeSDKClient with full hook support
# Note: ClaudeSDKClient has different API than SDKAdapter protocol but works at runtime
return ClaudeSDKClient( # type: ignore[return-value]
options=ClaudeAgentOptions(
model=model,
cli_path=system_cli, # Use system CLI to avoid bundled Bun crash (exit code 3)
system_prompt="You are an expert full-stack developer building a production-quality web application.",
setting_sources=["project"], # Enable skills, commands, and CLAUDE.md from project dir
max_buffer_size=10 * 1024 * 1024, # 10MB for large Playwright screenshots
allowed_tools=allowed_tools,
mcp_servers=mcp_servers, # type: ignore[arg-type] # SDK accepts dict config at runtime
hooks={
"PreToolUse": [
HookMatcher(matcher="Bash", hooks=[bash_hook_with_context]),
],
# PreCompact hook for context management during long sessions.
# Compaction is automatic when context approaches token limits.
# This hook logs compaction events and can customize summarization.
"PreCompact": [
HookMatcher(hooks=[pre_compact_hook]),
],
},
max_turns=max_turns,
cwd=str(project_dir.resolve()),
settings=str(settings_file.resolve()), # Use absolute path
env=sdk_env, # Pass API configuration overrides to CLI subprocess
# Enable extended context beta for better handling of long sessions.
# This provides up to 1M tokens of context with automatic compaction.
# See: https://docs.anthropic.com/en/api/beta-headers
# Disabled for alternative APIs (Ollama, GLM, Vertex AI) as they don't support this beta.
betas=[] if is_alternative_api else ["context-1m-2025-08-07"],
# Note on context management:
# The Claude Agent SDK handles context management automatically through the
# underlying Claude Code CLI. When context approaches limits, the CLI
# automatically compacts/summarizes previous messages.
#
# The SDK does NOT expose explicit compaction_control or context_management
# parameters. Instead, context is managed via:
# 1. betas=["context-1m-2025-08-07"] - Extended context window
# 2. PreCompact hook - Intercept and customize compaction behavior
# 3. max_turns - Limit conversation turns (per agent type: coding=300, testing=100)
#
# Future SDK versions may add explicit compaction controls. When available,
# consider adding:
# - compaction_control={"enabled": True, "context_token_threshold": 80000}
# - context_management={"edits": [...]} for tool use clearing
)
)
else:
# Codex SDK: use factory with unified adapter interface
# Note: Codex does not support PreToolUse hooks - relies on built-in sandboxing
print(" - Note: Bash command hooks not available with Codex SDK")
print(" - Security relies on Codex's built-in sandboxing")

options = AdapterOptions(
model=None, # Codex SDK uses its own default model
project_dir=project_dir,
system_prompt="You are an expert full-stack developer building a production-quality web application.",
setting_sources=["project"], # Enable skills, commands, and CLAUDE.md from project dir
max_buffer_size=10 * 1024 * 1024, # 10MB for large Playwright screenshots
allowed_tools=allowed_tools,
mcp_servers=mcp_servers, # type: ignore[arg-type] # SDK accepts dict config at runtime
hooks={
"PreToolUse": [
HookMatcher(matcher="Bash", hooks=[bash_hook_with_context]),
],
# PreCompact hook for context management during long sessions.
# Compaction is automatic when context approaches token limits.
# This hook logs compaction events and can customize summarization.
"PreCompact": [
HookMatcher(hooks=[pre_compact_hook]),
],
},
max_turns=max_turns,
agent_type=agent_type,
cli_path=system_cli,
setting_sources=["project"],
max_buffer_size=10 * 1024 * 1024,
allowed_tools=allowed_tools,
mcp_servers=mcp_servers,
settings_file=str(settings_file.resolve()),
cwd=str(project_dir.resolve()),
settings=str(settings_file.resolve()), # Use absolute path
env=sdk_env, # Pass API configuration overrides to CLI subprocess
# Enable extended context beta for better handling of long sessions.
# This provides up to 1M tokens of context with automatic compaction.
# See: https://docs.anthropic.com/en/api/beta-headers
# Disabled for alternative APIs (Ollama, GLM, Vertex AI) as they don't support this beta.
env=sdk_env,
yolo_mode=yolo_mode,
betas=[] if is_alternative_api else ["context-1m-2025-08-07"],
# Note on context management:
# The Claude Agent SDK handles context management automatically through the
# underlying Claude Code CLI. When context approaches limits, the CLI
# automatically compacts/summarizes previous messages.
#
# The SDK does NOT expose explicit compaction_control or context_management
# parameters. Instead, context is managed via:
# 1. betas=["context-1m-2025-08-07"] - Extended context window
# 2. PreCompact hook - Intercept and customize compaction behavior
# 3. max_turns - Limit conversation turns (per agent type: coding=300, testing=100)
#
# Future SDK versions may add explicit compaction controls. When available,
# consider adding:
# - compaction_control={"enabled": True, "context_token_threshold": 80000}
# - context_management={"edits": [...]} for tool use clearing
)
)
return create_adapter(options)
Loading
Loading