diff --git a/docs/plans/2026-02-19-sandbox-container-execution-design.md b/docs/plans/2026-02-19-sandbox-container-execution-design.md new file mode 100644 index 00000000..e1430242 --- /dev/null +++ b/docs/plans/2026-02-19-sandbox-container-execution-design.md @@ -0,0 +1,281 @@ +# Sandbox Container Execution Design + +**Date:** 2026-02-19 +**Status:** Approved +**Scope:** Add container-based sandboxing for MCP command execution with command allowlisting + +## Problem + +The SDK executes MCP stdio server commands directly on the host via `exec.CommandContext` (`pkg/mcp/mcp.go`). There is no isolation boundary — a malicious or misconfigured MCP server can access the host filesystem, network, and processes. The existing guardrails system only filters text content and does not restrict actual tool execution. + +### Bugs Found During Audit + +1. **`ToolMiddleware` missing `Execute()` method** — Every LLM provider (OpenAI, Anthropic, Gemini, DeepSeek, Azure) calls `tool.Execute()`, but `ToolMiddleware` only implements `Run()`. Guardrails applied via `ToolMiddleware` are completely bypassed during agent execution. (`pkg/guardrails/tool_middleware.go`) + +2. **`ToolRestrictionGuardrail` is text-pattern only** — Regex matches `"use tool "` in prompt text. LLMs use structured tool calls, not this text pattern. Provides zero protection against actual tool invocations. (`pkg/guardrails/tool_restriction.go`) + +3. **MCP args unsanitized** — `config.Args` passed directly to `exec.CommandContext()`. While Go's `exec` avoids shell injection, args can contain malicious flags the target command interprets dangerously. + +4. **MCP env inherits host environment** — `cmd.Env = append(os.Environ(), config.Env...)` exposes all host env vars to MCP processes. + +5. **No execution timeout on MCP commands** — Only the caller's context provides timeout. No default deadline. + +## Design + +### Approach: Standalone `pkg/sandbox` Package with Docker Runtime + +A new opt-in package that provides container-based command execution with command allowlisting. Integrates with MCP's existing `exec.Cmd`-based transport by returning `*exec.Cmd` instances instead of captured output. + +### Package Structure + +``` +pkg/sandbox/ + ├── sandbox.go # CommandExecutor interface + LocalExecutor (default) + ├── config.go # Config structs (YAML-compatible) + ├── allowlist.go # Command allowlist logic (allow + deny lists) + ├── docker.go # DockerExecutor implementation + ├── pool.go # Warm container pool (session-scoped) + ├── sandbox_test.go + ├── allowlist_test.go + ├── docker_test.go # Integration tests (//go:build integration) + └── pool_test.go +``` + +### Core Interface + +```go +// CommandExecutor creates exec.Cmd instances, optionally sandboxed. +// Returns *exec.Cmd so MCP's CommandTransport can attach stdin/stdout pipes. +type CommandExecutor interface { + Command(ctx context.Context, name string, args ...string) (*exec.Cmd, error) + Close(ctx context.Context) error +} +``` + +**Why `*exec.Cmd` instead of captured output:** MCP stdio servers communicate via stdin/stdout of the child process. The MCP `CommandTransport` needs pipe access to the process, not just the final output. Returning `*exec.Cmd` lets the sandbox slot in transparently. + +### LocalExecutor (Default — No Sandbox) + +```go +type LocalExecutor struct{} + +func (l *LocalExecutor) Command(ctx context.Context, name string, args ...string) (*exec.Cmd, error) { + return exec.CommandContext(ctx, name, args...), nil +} + +func (l *LocalExecutor) Close(ctx context.Context) error { return nil } +``` + +Zero overhead. Preserves exact current behavior when no sandbox is configured. + +### DockerExecutor + +```go +type DockerExecutor struct { + config Config + allowlist *Allowlist + pool *Pool + logger logging.Logger +} +``` + +**Container creation flags:** +``` +docker run -d --name agent-sandbox-- + --memory + --cpus + --network + --read-only + --tmpfs /tmp:size=64m + --security-opt no-new-privileges + --cap-drop ALL + --pids-limit 64 + + sleep infinity +``` + +**Command execution:** +```go +func (d *DockerExecutor) Command(ctx context.Context, name string, args ...string) (*exec.Cmd, error) { + if err := d.allowlist.Check(name); err != nil { + return nil, err + } + container := d.pool.Acquire() + dockerArgs := append([]string{"exec", "-i", container.ID, name}, args...) + return exec.CommandContext(ctx, "docker", dockerArgs...), nil +} +``` + +### Config + +```go +type Config struct { + Enabled bool `yaml:"enabled"` + Image string `yaml:"image"` + AllowedCommands []string `yaml:"allowed_commands"` + DeniedCommands []string `yaml:"denied_commands"` + PoolSize int `yaml:"pool_size"` + Timeout time.Duration `yaml:"timeout"` + MemoryLimit string `yaml:"memory_limit"` + CPULimit string `yaml:"cpu_limit"` + NetworkMode string `yaml:"network_mode"` + MountPaths []MountPath `yaml:"mount_paths"` +} + +type MountPath struct { + Host string `yaml:"host"` + Container string `yaml:"container"` + ReadOnly bool `yaml:"read_only"` +} +``` + +**Defaults:** +- `PoolSize`: 1 +- `Timeout`: 30s +- `MemoryLimit`: "256m" +- `CPULimit`: "0.5" +- `NetworkMode`: "none" +- `MountPaths[].ReadOnly`: true + +### Allowlist + +```go +type Allowlist struct { + allowed map[string]bool + denied map[string]bool +} + +func (a *Allowlist) Check(command string) error +``` + +Resolution order: +1. Extract base name (`/usr/bin/git` -> `git`) +2. If in `denied` -> reject (always wins) +3. If `allowed` is non-empty and command not in it -> reject +4. If `allowed` is empty -> reject all (fail-closed) +5. Otherwise -> permit + +### Warm Container Pool + +```go +type Pool struct { + containers []Container + mu sync.Mutex + nextIdx int +} +``` + +- Creates `PoolSize` containers at `NewDockerExecutor()` time +- Round-robin selection via `Acquire()` +- Lazy health recovery: if a container is dead on `Acquire()`, replace it +- `Close()` stops and removes all containers + +### Error Types + +```go +var ( + ErrCommandDenied = errors.New("sandbox: command not in allowlist") + ErrDockerNotFound = errors.New("sandbox: docker binary not found") + ErrContainerUnhealthy = errors.New("sandbox: container not ready") + ErrCommandTimeout = errors.New("sandbox: command execution timed out") +) +``` + +### SDK Integration — Go API + +```go +agent.New( + agent.WithLLM(llm), + agent.WithSandbox(sandbox.NewDockerExecutor(ctx, sandbox.Config{ + Image: "node:20-slim", + AllowedCommands: []string{"npx", "node", "ls"}, + PoolSize: 1, + NetworkMode: "none", + }, logger)), + agent.WithLazyMCPConfigs(configs), +) +``` + +### SDK Integration — YAML Config + +```yaml +mcp: + mcpServers: + filesystem: + command: npx + args: ["-y", "@modelcontextprotocol/server-filesystem"] + sandbox: + enabled: true + image: "node:20-slim" + allowed_commands: ["npx", "node", "ls", "cat"] + denied_commands: ["rm", "dd", "mkfs"] + pool_size: 1 + timeout: "30s" + memory_limit: "256m" + network_mode: "none" +``` + +### MCP Integration (Minimal Change) + +**`StdioServerConfig` — add one field:** +```go +type StdioServerConfig struct { + Command string + Args []string + Env []string + Logger logging.Logger + Executor sandbox.CommandExecutor // nil defaults to LocalExecutor +} +``` + +**`NewStdioServerWithRetry` — replace `exec.CommandContext` call:** +```go +executor := config.Executor +if executor == nil { + executor = &sandbox.LocalExecutor{} +} +cmd, err := executor.Command(ctx, commandPath, config.Args...) +``` + +Existing users who don't set `Executor` get the exact same behavior as today. Non-breaking change. + +### Bug Fix: ToolMiddleware.Execute() + +Add the missing `Execute()` method to `ToolMiddleware`: + +```go +func (m *ToolMiddleware) Execute(ctx context.Context, args string) (string, error) { + processedInput, err := m.pipeline.ProcessRequest(ctx, args) + if err != nil { + return "", err + } + output, err := m.tool.Execute(ctx, processedInput) + if err != nil { + return "", err + } + processedOutput, err := m.pipeline.ProcessResponse(ctx, output) + if err != nil { + return "", err + } + return processedOutput, nil +} +``` + +### Testing Strategy + +| File | Type | Requires | +|------|------|----------| +| `sandbox_test.go` | Unit | Nothing | +| `allowlist_test.go` | Unit | Nothing | +| `pool_test.go` | Unit | Nothing (mocks Docker CLI) | +| `docker_test.go` | Integration | Docker daemon (`//go:build integration`) | +| MCP sandbox test | Integration | Docker daemon | + +### Security Properties + +- **Network isolation**: `--network none` by default +- **Filesystem isolation**: `--read-only` + tmpfs `/tmp` only +- **Privilege isolation**: `--cap-drop ALL`, `--no-new-privileges` +- **Resource limits**: memory, CPU, PID limits +- **Command restriction**: fail-closed allowlist, deny takes precedence +- **No host env leakage**: sandbox containers get only explicitly configured env vars diff --git a/docs/plans/2026-02-19-sandbox-container-execution-plan.md b/docs/plans/2026-02-19-sandbox-container-execution-plan.md new file mode 100644 index 00000000..d72e3222 --- /dev/null +++ b/docs/plans/2026-02-19-sandbox-container-execution-plan.md @@ -0,0 +1,1380 @@ +# Sandbox Container Execution Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Add opt-in Docker-based container sandboxing for MCP command execution with command allowlisting, plus fix the ToolMiddleware.Execute() bug. + +**Architecture:** New `pkg/sandbox` package provides a `CommandExecutor` interface that returns `*exec.Cmd` instances. MCP's `StdioServerConfig` accepts an optional `CommandExecutor` — nil means direct host execution (current behavior). Docker implementation manages a warm container pool with security hardening and a fail-closed command allowlist. + +**Tech Stack:** Go stdlib (`os/exec`, `sync`), Docker CLI (no SDK dependency), YAML config via `gopkg.in/yaml.v3` + +--- + +### Task 1: Bug Fix — ToolMiddleware.Execute() + +**Files:** +- Modify: `pkg/guardrails/tool_middleware.go:39` (after existing `Run` method) +- Create: `pkg/guardrails/tool_middleware_test.go` + +**Step 1: Write the failing test** + +Create `pkg/guardrails/tool_middleware_test.go`: + +```go +package guardrails + +import ( + "context" + "testing" + + "github.com/Ingenimax/agent-sdk-go/pkg/interfaces" +) + +// mockTool implements interfaces.Tool for testing +type mockTool struct { + name string + description string + runOutput string + execOutput string + runErr error + execErr error +} + +func (m *mockTool) Name() string { return m.name } +func (m *mockTool) Description() string { return m.description } +func (m *mockTool) Parameters() map[string]interfaces.ParameterSpec { return nil } +func (m *mockTool) Run(ctx context.Context, input string) (string, error) { return m.runOutput, m.runErr } +func (m *mockTool) Execute(ctx context.Context, args string) (string, error) { return m.execOutput, m.execErr } + +func TestToolMiddleware_Execute(t *testing.T) { + tool := &mockTool{ + name: "test_tool", + execOutput: "raw output with badword inside", + } + + pipeline := NewPipeline(NewContentFilter([]string{"badword"}, RedactAction)) + middleware := NewToolMiddleware(tool, pipeline) + + result, err := middleware.Execute(context.Background(), "input with badword here") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Both input and output should have "badword" redacted + if result == "raw output with badword inside" { + t.Error("Execute() did not apply guardrails to output — guardrails are bypassed") + } + if result != "raw output with **** inside" { + t.Errorf("unexpected result: %q", result) + } +} + +func TestToolMiddleware_Execute_BlockAction(t *testing.T) { + tool := &mockTool{ + name: "test_tool", + execOutput: "clean output", + } + + pipeline := NewPipeline(NewContentFilter([]string{"blocked"}, BlockAction)) + middleware := NewToolMiddleware(tool, pipeline) + + _, err := middleware.Execute(context.Background(), "this is blocked content") + if err == nil { + t.Error("expected error for blocked content, got nil") + } +} +``` + +**Step 2: Run test to verify it fails** + +Run: `go test ./pkg/guardrails/ -run TestToolMiddleware_Execute -v` +Expected: Compilation error — `ToolMiddleware` does not implement `Execute` + +**Step 3: Write minimal implementation** + +Add to `pkg/guardrails/tool_middleware.go` after the `Run` method (after line 59): + +```go +// Execute executes the tool with the given arguments, applying guardrails +func (m *ToolMiddleware) Execute(ctx context.Context, args string) (string, error) { + // Process request through guardrails + processedInput, err := m.pipeline.ProcessRequest(ctx, args) + if err != nil { + return "", err + } + + // Call the underlying tool + output, err := m.tool.Execute(ctx, processedInput) + if err != nil { + return "", err + } + + // Process response through guardrails + processedOutput, err := m.pipeline.ProcessResponse(ctx, output) + if err != nil { + return "", err + } + + return processedOutput, nil +} +``` + +**Step 4: Run test to verify it passes** + +Run: `go test ./pkg/guardrails/ -run TestToolMiddleware_Execute -v` +Expected: PASS + +**Step 5: Run full guardrails tests** + +Run: `go test ./pkg/guardrails/ -v` +Expected: All PASS + +**Step 6: Commit** + +```bash +git add pkg/guardrails/tool_middleware.go pkg/guardrails/tool_middleware_test.go +git commit -m "fix: add Execute() to ToolMiddleware so guardrails apply to LLM tool calls" +``` + +--- + +### Task 2: Sandbox Package — Config & Errors + +**Files:** +- Create: `pkg/sandbox/config.go` +- Create: `pkg/sandbox/errors.go` + +**Step 1: Write config.go** + +```go +package sandbox + +import "time" + +// Config holds sandbox configuration, loadable from YAML. +type Config struct { + Enabled bool `json:"enabled" yaml:"enabled"` + Image string `json:"image,omitempty" yaml:"image,omitempty"` + AllowedCommands []string `json:"allowed_commands,omitempty" yaml:"allowed_commands,omitempty"` + DeniedCommands []string `json:"denied_commands,omitempty" yaml:"denied_commands,omitempty"` + PoolSize int `json:"pool_size,omitempty" yaml:"pool_size,omitempty"` + Timeout time.Duration `json:"timeout,omitempty" yaml:"timeout,omitempty"` + MemoryLimit string `json:"memory_limit,omitempty" yaml:"memory_limit,omitempty"` + CPULimit string `json:"cpu_limit,omitempty" yaml:"cpu_limit,omitempty"` + NetworkMode string `json:"network_mode,omitempty" yaml:"network_mode,omitempty"` + MountPaths []MountPath `json:"mount_paths,omitempty" yaml:"mount_paths,omitempty"` +} + +// MountPath represents a bind mount from host to container. +type MountPath struct { + Host string `json:"host" yaml:"host"` + Container string `json:"container" yaml:"container"` + ReadOnly bool `json:"read_only" yaml:"read_only"` +} + +// applyDefaults fills in zero-value fields with sensible defaults. +func (c *Config) applyDefaults() { + if c.PoolSize <= 0 { + c.PoolSize = 1 + } + if c.Timeout <= 0 { + c.Timeout = 30 * time.Second + } + if c.MemoryLimit == "" { + c.MemoryLimit = "256m" + } + if c.CPULimit == "" { + c.CPULimit = "0.5" + } + if c.NetworkMode == "" { + c.NetworkMode = "none" + } + if c.Image == "" { + c.Image = "ubuntu:22.04" + } + for i := range c.MountPaths { + // ReadOnly defaults to true — zero value of bool is false, + // so we cannot distinguish "unset" from "explicitly false" in Go. + // Convention: callers must explicitly set ReadOnly=false for writable mounts. + } +} +``` + +**Step 2: Write errors.go** + +```go +package sandbox + +import "errors" + +var ( + // ErrCommandDenied is returned when a command is not in the allowlist. + ErrCommandDenied = errors.New("sandbox: command not in allowlist") + + // ErrDockerNotFound is returned when the docker binary is not available. + ErrDockerNotFound = errors.New("sandbox: docker binary not found") + + // ErrContainerUnhealthy is returned when no healthy container is available. + ErrContainerUnhealthy = errors.New("sandbox: container not ready") + + // ErrCommandTimeout is returned when a command exceeds the configured timeout. + ErrCommandTimeout = errors.New("sandbox: command execution timed out") + + // ErrSandboxDisabled is returned when sandbox is not enabled but executor is called. + ErrSandboxDisabled = errors.New("sandbox: not enabled") +) +``` + +**Step 3: Verify it compiles** + +Run: `go build ./pkg/sandbox/` +Expected: Success (no errors) + +**Step 4: Commit** + +```bash +git add pkg/sandbox/config.go pkg/sandbox/errors.go +git commit -m "feat(sandbox): add config structs and error types" +``` + +--- + +### Task 3: Sandbox Package — CommandExecutor Interface & LocalExecutor + +**Files:** +- Create: `pkg/sandbox/sandbox.go` +- Create: `pkg/sandbox/sandbox_test.go` + +**Step 1: Write the failing test** + +Create `pkg/sandbox/sandbox_test.go`: + +```go +package sandbox + +import ( + "context" + "testing" +) + +func TestLocalExecutor_Command(t *testing.T) { + executor := &LocalExecutor{} + cmd, err := executor.Command(context.Background(), "echo", "hello") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cmd == nil { + t.Fatal("expected non-nil cmd") + } + if cmd.Path == "" { + t.Error("expected cmd.Path to be set") + } + output, err := cmd.Output() + if err != nil { + t.Fatalf("failed to run cmd: %v", err) + } + if string(output) != "hello\n" { + t.Errorf("unexpected output: %q", string(output)) + } +} + +func TestLocalExecutor_Close(t *testing.T) { + executor := &LocalExecutor{} + if err := executor.Close(context.Background()); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} +``` + +**Step 2: Run test to verify it fails** + +Run: `go test ./pkg/sandbox/ -run TestLocalExecutor -v` +Expected: Compilation error — `LocalExecutor` not defined + +**Step 3: Write minimal implementation** + +Create `pkg/sandbox/sandbox.go`: + +```go +package sandbox + +import ( + "context" + "os/exec" +) + +// CommandExecutor creates exec.Cmd instances, optionally sandboxed. +// Returns *exec.Cmd so MCP's CommandTransport can attach stdin/stdout pipes. +type CommandExecutor interface { + // Command creates an exec.Cmd for the given command and args. + Command(ctx context.Context, name string, args ...string) (*exec.Cmd, error) + // Close releases sandbox resources (stops containers, etc.). + Close(ctx context.Context) error +} + +// LocalExecutor runs commands directly on the host with no sandboxing. +// This is the default executor when no sandbox is configured. +type LocalExecutor struct{} + +// Command creates an exec.Cmd that runs directly on the host. +func (l *LocalExecutor) Command(ctx context.Context, name string, args ...string) (*exec.Cmd, error) { + return exec.CommandContext(ctx, name, args...), nil +} + +// Close is a no-op for LocalExecutor. +func (l *LocalExecutor) Close(_ context.Context) error { + return nil +} +``` + +**Step 4: Run test to verify it passes** + +Run: `go test ./pkg/sandbox/ -run TestLocalExecutor -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add pkg/sandbox/sandbox.go pkg/sandbox/sandbox_test.go +git commit -m "feat(sandbox): add CommandExecutor interface and LocalExecutor" +``` + +--- + +### Task 4: Sandbox Package — Allowlist + +**Files:** +- Create: `pkg/sandbox/allowlist.go` +- Create: `pkg/sandbox/allowlist_test.go` + +**Step 1: Write the failing tests** + +Create `pkg/sandbox/allowlist_test.go`: + +```go +package sandbox + +import ( + "errors" + "testing" +) + +func TestAllowlist_Check_AllowedCommand(t *testing.T) { + al := NewAllowlist([]string{"git", "curl"}, nil) + if err := al.Check("git"); err != nil { + t.Errorf("expected git to be allowed, got: %v", err) + } +} + +func TestAllowlist_Check_AllowedAbsolutePath(t *testing.T) { + al := NewAllowlist([]string{"git"}, nil) + if err := al.Check("/usr/bin/git"); err != nil { + t.Errorf("expected /usr/bin/git to be allowed, got: %v", err) + } +} + +func TestAllowlist_Check_DeniedCommand(t *testing.T) { + al := NewAllowlist([]string{"git", "rm"}, []string{"rm"}) + err := al.Check("rm") + if err == nil { + t.Error("expected rm to be denied") + } + if !errors.Is(err, ErrCommandDenied) { + t.Errorf("expected ErrCommandDenied, got: %v", err) + } +} + +func TestAllowlist_Check_DenyTakesPrecedence(t *testing.T) { + al := NewAllowlist([]string{"rm"}, []string{"rm"}) + err := al.Check("rm") + if err == nil { + t.Error("deny should take precedence over allow") + } +} + +func TestAllowlist_Check_NotInAllowlist(t *testing.T) { + al := NewAllowlist([]string{"git"}, nil) + err := al.Check("curl") + if err == nil { + t.Error("expected curl to be denied when not in allowlist") + } +} + +func TestAllowlist_Check_EmptyAllowlistDeniesAll(t *testing.T) { + al := NewAllowlist(nil, nil) + err := al.Check("git") + if err == nil { + t.Error("expected all commands denied when allowlist is empty (fail-closed)") + } +} + +func TestAllowlist_Check_CaseInsensitive(t *testing.T) { + al := NewAllowlist([]string{"Git"}, nil) + if err := al.Check("git"); err != nil { + t.Errorf("expected case-insensitive match, got: %v", err) + } +} +``` + +**Step 2: Run test to verify it fails** + +Run: `go test ./pkg/sandbox/ -run TestAllowlist -v` +Expected: Compilation error — `NewAllowlist` not defined + +**Step 3: Write minimal implementation** + +Create `pkg/sandbox/allowlist.go`: + +```go +package sandbox + +import ( + "fmt" + "path/filepath" + "strings" +) + +// Allowlist enforces which commands are permitted in the sandbox. +// Deny list takes precedence over allow list. Empty allow list denies all (fail-closed). +type Allowlist struct { + allowed map[string]bool + denied map[string]bool +} + +// NewAllowlist creates a new Allowlist from allow and deny lists. +func NewAllowlist(allowed, denied []string) *Allowlist { + a := &Allowlist{ + allowed: make(map[string]bool, len(allowed)), + denied: make(map[string]bool, len(denied)), + } + for _, cmd := range allowed { + a.allowed[strings.ToLower(cmd)] = true + } + for _, cmd := range denied { + a.denied[strings.ToLower(cmd)] = true + } + return a +} + +// Check returns nil if the command is permitted, ErrCommandDenied otherwise. +func (a *Allowlist) Check(command string) error { + base := strings.ToLower(filepath.Base(command)) + + if a.denied[base] { + return fmt.Errorf("%w: %q is explicitly denied", ErrCommandDenied, base) + } + + if len(a.allowed) == 0 { + return fmt.Errorf("%w: no commands are allowed (empty allowlist)", ErrCommandDenied) + } + + if !a.allowed[base] { + return fmt.Errorf("%w: %q is not in the allowlist", ErrCommandDenied, base) + } + + return nil +} +``` + +**Step 4: Run test to verify it passes** + +Run: `go test ./pkg/sandbox/ -run TestAllowlist -v` +Expected: All PASS + +**Step 5: Commit** + +```bash +git add pkg/sandbox/allowlist.go pkg/sandbox/allowlist_test.go +git commit -m "feat(sandbox): add command allowlist with fail-closed semantics" +``` + +--- + +### Task 5: Sandbox Package — Container Pool + +**Files:** +- Create: `pkg/sandbox/pool.go` +- Create: `pkg/sandbox/pool_test.go` + +**Step 1: Write the failing tests** + +Create `pkg/sandbox/pool_test.go`: + +```go +package sandbox + +import ( + "context" + "testing" +) + +func TestPool_Acquire_RoundRobin(t *testing.T) { + containers := []Container{ + {ID: "abc123", Name: "sandbox-0", Ready: true}, + {ID: "def456", Name: "sandbox-1", Ready: true}, + } + p := &Pool{containers: containers} + + c1, err := p.Acquire(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + c2, err := p.Acquire(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + c3, err := p.Acquire(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if c1.ID != "abc123" { + t.Errorf("expected first container, got %s", c1.ID) + } + if c2.ID != "def456" { + t.Errorf("expected second container, got %s", c2.ID) + } + if c3.ID != "abc123" { + t.Errorf("expected round-robin back to first, got %s", c3.ID) + } +} + +func TestPool_Acquire_EmptyPool(t *testing.T) { + p := &Pool{containers: nil} + _, err := p.Acquire(context.Background()) + if err == nil { + t.Error("expected error for empty pool") + } +} + +func TestPool_Close(t *testing.T) { + // closeFn tracks which container IDs were closed + var closed []string + closeFn := func(ctx context.Context, id string) error { + closed = append(closed, id) + return nil + } + containers := []Container{ + {ID: "abc123", Name: "sandbox-0", Ready: true}, + {ID: "def456", Name: "sandbox-1", Ready: true}, + } + p := &Pool{containers: containers, closeFn: closeFn} + + if err := p.Close(context.Background()); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(closed) != 2 { + t.Errorf("expected 2 containers closed, got %d", len(closed)) + } +} +``` + +**Step 2: Run test to verify it fails** + +Run: `go test ./pkg/sandbox/ -run TestPool -v` +Expected: Compilation error — `Pool`, `Container` not defined + +**Step 3: Write minimal implementation** + +Create `pkg/sandbox/pool.go`: + +```go +package sandbox + +import ( + "context" + "fmt" + "sync" + "time" +) + +// Container represents a running sandbox container. +type Container struct { + ID string + Name string + Ready bool + CreatedAt time.Time +} + +// Pool manages a set of warm sandbox containers with round-robin selection. +type Pool struct { + containers []Container + mu sync.Mutex + nextIdx int + closeFn func(ctx context.Context, id string) error +} + +// NewPool creates a pool with pre-created containers. +func NewPool(containers []Container, closeFn func(ctx context.Context, id string) error) *Pool { + return &Pool{ + containers: containers, + closeFn: closeFn, + } +} + +// Acquire returns the next available container using round-robin selection. +func (p *Pool) Acquire(ctx context.Context) (*Container, error) { + p.mu.Lock() + defer p.mu.Unlock() + + if len(p.containers) == 0 { + return nil, ErrContainerUnhealthy + } + + c := &p.containers[p.nextIdx] + p.nextIdx = (p.nextIdx + 1) % len(p.containers) + + if !c.Ready { + return nil, fmt.Errorf("%w: container %s is not ready", ErrContainerUnhealthy, c.Name) + } + + return c, nil +} + +// Close stops and removes all containers in the pool. +func (p *Pool) Close(ctx context.Context) error { + p.mu.Lock() + defer p.mu.Unlock() + + var lastErr error + for _, c := range p.containers { + if p.closeFn != nil { + if err := p.closeFn(ctx, c.ID); err != nil { + lastErr = err + } + } + } + p.containers = nil + return lastErr +} + +// MarkUnhealthy marks a container as not ready. +func (p *Pool) MarkUnhealthy(id string) { + p.mu.Lock() + defer p.mu.Unlock() + + for i := range p.containers { + if p.containers[i].ID == id { + p.containers[i].Ready = false + break + } + } +} +``` + +**Step 4: Run test to verify it passes** + +Run: `go test ./pkg/sandbox/ -run TestPool -v` +Expected: All PASS + +**Step 5: Commit** + +```bash +git add pkg/sandbox/pool.go pkg/sandbox/pool_test.go +git commit -m "feat(sandbox): add warm container pool with round-robin selection" +``` + +--- + +### Task 6: Sandbox Package — DockerExecutor + +**Files:** +- Create: `pkg/sandbox/docker.go` +- Create: `pkg/sandbox/docker_test.go` + +**Step 1: Write the unit test (no Docker required)** + +Create `pkg/sandbox/docker_test.go`: + +```go +package sandbox + +import ( + "context" + "errors" + "testing" +) + +func TestDockerExecutor_Command_DeniedByAllowlist(t *testing.T) { + executor := &DockerExecutor{ + config: Config{Enabled: true, Image: "ubuntu:22.04"}, + allowlist: NewAllowlist([]string{"git"}, nil), + pool: &Pool{ + containers: []Container{{ID: "test", Name: "test", Ready: true}}, + }, + } + + _, err := executor.Command(context.Background(), "rm", "-rf", "/") + if err == nil { + t.Fatal("expected error for denied command") + } + if !errors.Is(err, ErrCommandDenied) { + t.Errorf("expected ErrCommandDenied, got: %v", err) + } +} + +func TestDockerExecutor_Command_AllowedCommand(t *testing.T) { + executor := &DockerExecutor{ + config: Config{Enabled: true, Image: "ubuntu:22.04"}, + allowlist: NewAllowlist([]string{"git"}, nil), + pool: &Pool{ + containers: []Container{{ID: "abc123", Name: "sandbox-0", Ready: true}}, + }, + } + + cmd, err := executor.Command(context.Background(), "git", "status") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cmd == nil { + t.Fatal("expected non-nil cmd") + } + // Verify the command wraps via docker exec + args := cmd.Args + if len(args) < 5 { + t.Fatalf("expected docker exec args, got: %v", args) + } + // args[0] = "docker", args[1] = "exec", args[2] = "-i", args[3] = containerID, args[4] = command + if args[1] != "exec" { + t.Errorf("expected 'exec', got %q", args[1]) + } + if args[2] != "-i" { + t.Errorf("expected '-i', got %q", args[2]) + } + if args[3] != "abc123" { + t.Errorf("expected container ID 'abc123', got %q", args[3]) + } + if args[4] != "git" { + t.Errorf("expected command 'git', got %q", args[4]) + } + if args[5] != "status" { + t.Errorf("expected arg 'status', got %q", args[5]) + } +} + +func TestDockerExecutor_Close(t *testing.T) { + var closed []string + closeFn := func(ctx context.Context, id string) error { + closed = append(closed, id) + return nil + } + executor := &DockerExecutor{ + config: Config{Enabled: true}, + pool: &Pool{ + containers: []Container{{ID: "abc", Name: "s-0", Ready: true}}, + closeFn: closeFn, + }, + } + + if err := executor.Close(context.Background()); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(closed) != 1 || closed[0] != "abc" { + t.Errorf("expected container 'abc' to be closed, got: %v", closed) + } +} +``` + +**Step 2: Run test to verify it fails** + +Run: `go test ./pkg/sandbox/ -run TestDockerExecutor -v` +Expected: Compilation error — `DockerExecutor` not defined + +**Step 3: Write implementation** + +Create `pkg/sandbox/docker.go`: + +```go +package sandbox + +import ( + "context" + "fmt" + "os/exec" + "strconv" + "strings" + "time" + + "github.com/Ingenimax/agent-sdk-go/pkg/logging" +) + +// DockerExecutor implements CommandExecutor using Docker containers. +type DockerExecutor struct { + config Config + allowlist *Allowlist + pool *Pool + logger logging.Logger +} + +// NewDockerExecutor creates a new DockerExecutor, starts warm containers, and returns the executor. +// Fails fast if Docker is not available or the config is invalid. +func NewDockerExecutor(ctx context.Context, config Config, logger logging.Logger) (*DockerExecutor, error) { + if logger == nil { + logger = logging.New() + } + + // Verify Docker is available + dockerPath, err := exec.LookPath("docker") + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrDockerNotFound, err) + } + logger.Debug(ctx, "Docker found", map[string]interface{}{"path": dockerPath}) + + config.applyDefaults() + + allowlist := NewAllowlist(config.AllowedCommands, config.DeniedCommands) + + // Create warm containers + containers, err := createContainers(ctx, config, logger) + if err != nil { + return nil, fmt.Errorf("failed to create sandbox containers: %w", err) + } + + closeFn := func(ctx context.Context, id string) error { + return removeContainer(ctx, id, logger) + } + + return &DockerExecutor{ + config: config, + allowlist: allowlist, + pool: NewPool(containers, closeFn), + logger: logger, + }, nil +} + +// Command creates an exec.Cmd that runs inside a sandbox container via `docker exec`. +func (d *DockerExecutor) Command(ctx context.Context, name string, args ...string) (*exec.Cmd, error) { + if err := d.allowlist.Check(name); err != nil { + return nil, err + } + + container, err := d.pool.Acquire(ctx) + if err != nil { + return nil, err + } + + dockerArgs := make([]string, 0, 4+len(args)) + dockerArgs = append(dockerArgs, "exec", "-i", container.ID, name) + dockerArgs = append(dockerArgs, args...) + + cmd := exec.CommandContext(ctx, "docker", dockerArgs...) + return cmd, nil +} + +// Close stops and removes all sandbox containers. +func (d *DockerExecutor) Close(ctx context.Context) error { + return d.pool.Close(ctx) +} + +// createContainers starts warm containers based on config. +func createContainers(ctx context.Context, config Config, logger logging.Logger) ([]Container, error) { + containers := make([]Container, 0, config.PoolSize) + + for i := 0; i < config.PoolSize; i++ { + name := fmt.Sprintf("agent-sandbox-%d-%d", time.Now().UnixNano(), i) + + args := buildContainerArgs(config, name) + + logger.Info(ctx, "Creating sandbox container", map[string]interface{}{ + "name": name, + "image": config.Image, + }) + + cmd := exec.CommandContext(ctx, "docker", args...) + output, err := cmd.Output() + if err != nil { + // Clean up any containers that were created + for _, c := range containers { + _ = removeContainer(ctx, c.ID, logger) + } + return nil, fmt.Errorf("failed to create container %s: %w", name, err) + } + + containerID := strings.TrimSpace(string(output)) + containers = append(containers, Container{ + ID: containerID, + Name: name, + Ready: true, + CreatedAt: time.Now(), + }) + + logger.Info(ctx, "Sandbox container created", map[string]interface{}{ + "name": name, + "id": containerID, + }) + } + + return containers, nil +} + +// buildContainerArgs builds the docker run arguments from config. +func buildContainerArgs(config Config, name string) []string { + args := []string{ + "run", "-d", + "--name", name, + "--memory", config.MemoryLimit, + "--cpus", config.CPULimit, + "--network", config.NetworkMode, + "--read-only", + "--tmpfs", "/tmp:size=64m", + "--security-opt", "no-new-privileges", + "--cap-drop", "ALL", + "--pids-limit", strconv.Itoa(64), + } + + for _, mount := range config.MountPaths { + mountStr := mount.Host + ":" + mount.Container + if mount.ReadOnly { + mountStr += ":ro" + } + args = append(args, "-v", mountStr) + } + + args = append(args, config.Image, "sleep", "infinity") + return args +} + +// removeContainer stops and removes a container by ID. +func removeContainer(ctx context.Context, id string, logger logging.Logger) error { + cmd := exec.CommandContext(ctx, "docker", "rm", "-f", id) + if err := cmd.Run(); err != nil { + logger.Warn(ctx, "Failed to remove sandbox container", map[string]interface{}{ + "id": id, + "error": err.Error(), + }) + return err + } + logger.Debug(ctx, "Sandbox container removed", map[string]interface{}{"id": id}) + return nil +} +``` + +**Step 4: Run unit tests to verify they pass** + +Run: `go test ./pkg/sandbox/ -run TestDockerExecutor -v` +Expected: All PASS + +**Step 5: Commit** + +```bash +git add pkg/sandbox/docker.go pkg/sandbox/docker_test.go +git commit -m "feat(sandbox): add DockerExecutor with container lifecycle management" +``` + +--- + +### Task 7: Sandbox Package — Docker Integration Tests + +**Files:** +- Create: `pkg/sandbox/docker_integration_test.go` + +**Step 1: Write integration test** + +Create `pkg/sandbox/docker_integration_test.go`: + +```go +//go:build integration + +package sandbox + +import ( + "context" + "testing" + "time" + + "github.com/Ingenimax/agent-sdk-go/pkg/logging" +) + +func TestDockerExecutor_Integration_CreateAndExecute(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + logger := logging.New() + + executor, err := NewDockerExecutor(ctx, Config{ + Enabled: true, + Image: "alpine:3.19", + AllowedCommands: []string{"echo", "ls", "cat"}, + PoolSize: 1, + Timeout: 10 * time.Second, + }, logger) + if err != nil { + t.Fatalf("failed to create executor: %v", err) + } + defer executor.Close(ctx) + + // Test: execute an allowed command + cmd, err := executor.Command(ctx, "echo", "hello sandbox") + if err != nil { + t.Fatalf("failed to create command: %v", err) + } + output, err := cmd.Output() + if err != nil { + t.Fatalf("failed to run command: %v", err) + } + if got := string(output); got != "hello sandbox\n" { + t.Errorf("expected 'hello sandbox\\n', got %q", got) + } + + // Test: denied command + _, err = executor.Command(ctx, "rm", "-rf", "/") + if err == nil { + t.Error("expected error for denied command 'rm'") + } +} + +func TestDockerExecutor_Integration_ContainerIsolation(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + logger := logging.New() + + executor, err := NewDockerExecutor(ctx, Config{ + Enabled: true, + Image: "alpine:3.19", + AllowedCommands: []string{"cat"}, + PoolSize: 1, + NetworkMode: "none", + }, logger) + if err != nil { + t.Fatalf("failed to create executor: %v", err) + } + defer executor.Close(ctx) + + // Host's /etc/hostname should NOT be accessible as host content + cmd, err := executor.Command(ctx, "cat", "/etc/hostname") + if err != nil { + t.Fatalf("failed to create command: %v", err) + } + output, err := cmd.Output() + if err != nil { + // Some alpine containers may not have /etc/hostname — that's fine + t.Logf("cat /etc/hostname failed (expected in isolated container): %v", err) + return + } + t.Logf("container hostname: %s", string(output)) + // The hostname should be the container ID, not the host +} +``` + +**Step 2: Verify it compiles (don't run — needs Docker)** + +Run: `go build -tags integration ./pkg/sandbox/` +Expected: Success + +**Step 3: Run integration test (only if Docker is available)** + +Run: `go test -tags integration ./pkg/sandbox/ -run TestDockerExecutor_Integration -v -timeout 120s` +Expected: PASS (if Docker daemon is running) + +**Step 4: Commit** + +```bash +git add pkg/sandbox/docker_integration_test.go +git commit -m "test(sandbox): add Docker integration tests" +``` + +--- + +### Task 8: MCP Integration — Wire Sandbox into StdioServerConfig + +**Files:** +- Modify: `pkg/mcp/mcp.go:687-692` (StdioServerConfig struct) +- Modify: `pkg/mcp/mcp.go:770-774` (command creation in NewStdioServerWithRetry) + +**Step 1: Write the failing test** + +Modify or create a test that verifies `StdioServerConfig` accepts an `Executor` field. Add to an existing MCP test file or create a focused one: + +Create `pkg/mcp/sandbox_integration_test.go`: + +```go +package mcp + +import ( + "context" + "os/exec" + "testing" + + "github.com/Ingenimax/agent-sdk-go/pkg/sandbox" +) + +// Verify the interface compiles — LocalExecutor satisfies CommandExecutor +var _ sandbox.CommandExecutor = &sandbox.LocalExecutor{} + +func TestStdioServerConfig_AcceptsExecutor(t *testing.T) { + config := StdioServerConfig{ + Command: "echo", + Args: []string{"hello"}, + Executor: &sandbox.LocalExecutor{}, + } + if config.Executor == nil { + t.Error("expected Executor to be set") + } +} + +// mockExecutor records calls for testing +type mockExecutor struct { + called bool + lastCmd string +} + +func (m *mockExecutor) Command(ctx context.Context, name string, args ...string) (*exec.Cmd, error) { + m.called = true + m.lastCmd = name + return exec.CommandContext(ctx, name, args...), nil +} + +func (m *mockExecutor) Close(ctx context.Context) error { return nil } + +func TestStdioServerConfig_ExecutorIsUsed(t *testing.T) { + // This test verifies the Executor field exists and has the correct type. + // Full integration testing with MCP server startup requires a real MCP server binary. + mock := &mockExecutor{} + config := StdioServerConfig{ + Command: "echo", + Args: []string{"hello"}, + Executor: mock, + } + _ = config // Compiles = type is correct +} +``` + +**Step 2: Run test to verify it fails** + +Run: `go test ./pkg/mcp/ -run TestStdioServerConfig -v` +Expected: Compilation error — `StdioServerConfig` has no field `Executor` + +**Step 3: Modify StdioServerConfig** + +In `pkg/mcp/mcp.go`, change `StdioServerConfig` (line 687): + +```go +// StdioServerConfig holds configuration for a stdio MCP server +type StdioServerConfig struct { + Command string + Args []string + Env []string + Logger logging.Logger + Executor sandbox.CommandExecutor // Optional sandboxed executor. Nil uses direct host execution. +} +``` + +Add import for sandbox package at the top of the file: + +```go +import ( + ... + "github.com/Ingenimax/agent-sdk-go/pkg/sandbox" + ... +) +``` + +**Step 4: Modify NewStdioServerWithRetry** + +In `pkg/mcp/mcp.go`, replace line 774: + +```go + // #nosec G204 -- commandPath is validated above with LookPath and security checks + cmd := exec.CommandContext(ctx, commandPath, config.Args...) +``` + +With: + +```go + // Create the command, optionally through sandbox executor + var cmd *exec.Cmd + if config.Executor != nil { + var execErr error + cmd, execErr = config.Executor.Command(ctx, commandPath, config.Args...) + if execErr != nil { + return nil, fmt.Errorf("sandbox executor error: %w", execErr) + } + } else { + // #nosec G204 -- commandPath is validated above with LookPath and security checks + cmd = exec.CommandContext(ctx, commandPath, config.Args...) + } +``` + +**Step 5: Run test to verify it passes** + +Run: `go test ./pkg/mcp/ -run TestStdioServerConfig -v` +Expected: PASS + +**Step 6: Run full test suite** + +Run: `go test ./...` +Expected: All PASS (no regressions) + +**Step 7: Commit** + +```bash +git add pkg/mcp/mcp.go pkg/mcp/sandbox_integration_test.go +git commit -m "feat(mcp): integrate sandbox CommandExecutor into StdioServerConfig" +``` + +--- + +### Task 9: Agent SDK Integration — WithSandbox Option & YAML Config + +**Files:** +- Modify: `pkg/agent/agent.go:60-103` (add sandbox field to Agent struct) +- Modify: `pkg/agent/agent.go` (add WithSandbox option) +- Modify: `pkg/agent/mcp_config.go:17-25` (add Sandbox field to MCPServerConfig) +- Modify: `pkg/agent/agent.go:1115-1133` (pass executor to LazyMCPServerConfig) + +**Step 1: Add sandbox field to Agent struct** + +In `pkg/agent/agent.go`, add to the Agent struct (around line 102): + +```go + // Sandbox executor for containerized command execution + sandbox sandbox.CommandExecutor +``` + +Add import: + +```go + "github.com/Ingenimax/agent-sdk-go/pkg/sandbox" +``` + +**Step 2: Add WithSandbox option** + +Add after the existing `With*` options (e.g., after `WithCustomRunStreamFunction`): + +```go +// WithSandbox sets the sandbox executor for containerized MCP command execution. +func WithSandbox(executor sandbox.CommandExecutor) Option { + return func(a *Agent) { + a.sandbox = executor + } +} +``` + +**Step 3: Add Sandbox field to MCPServerConfig** + +In `pkg/agent/mcp_config.go`, modify `MCPServerConfig`: + +```go +type MCPServerConfig struct { + Command string `json:"command,omitempty" yaml:"command,omitempty"` + Args []string `json:"args,omitempty" yaml:"args,omitempty"` + Env map[string]string `json:"env,omitempty" yaml:"env,omitempty"` + URL string `json:"url,omitempty" yaml:"url,omitempty"` + Token string `json:"token,omitempty" yaml:"token,omitempty"` + HttpTransportMode string `json:"httpTransportMode,omitempty" yaml:"httpTransportMode,omitempty"` + AllowedTools []string `json:"allowedTools,omitempty" yaml:"allowedTools,omitempty"` + Sandbox *sandbox.Config `json:"sandbox,omitempty" yaml:"sandbox,omitempty"` +} +``` + +**Step 4: Wire sandbox into LazyMCPConfig → StdioServerConfig** + +In `pkg/agent/agent.go`, in `createLazyMCPTools()` (around line 1123), update `LazyMCPServerConfig` creation to pass the agent's sandbox executor. This requires adding an `Executor` field to `LazyMCPConfig` in the agent package: + +Add to `LazyMCPConfig` struct (line 34): + +```go +type LazyMCPConfig struct { + Name string + Type string + Command string + Args []string + Env []string + URL string + Token string + Tools []LazyMCPToolConfig + HttpTransportMode string + AllowedTools []string + Executor sandbox.CommandExecutor // Optional sandbox executor +} +``` + +Then in `createLazyMCPTools()`, pass the executor to the MCP config. Find where `mcp.LazyMCPServerConfig` is created and add: + +```go +lazyServerConfig := mcp.LazyMCPServerConfig{ + ...existing fields... + Executor: config.Executor, +} +``` + +And in agent initialization, propagate the agent-level sandbox to each lazy MCP config that doesn't already have one. + +**Step 5: Verify compilation** + +Run: `go build ./...` +Expected: Success + +**Step 6: Run full test suite** + +Run: `go test ./...` +Expected: All PASS + +**Step 7: Commit** + +```bash +git add pkg/agent/agent.go pkg/agent/mcp_config.go +git commit -m "feat(agent): add WithSandbox option and wire sandbox into MCP configs" +``` + +--- + +### Task 10: Run Full Linter & Final Verification + +**Step 1: Format code** + +Run: `make fmt` + +**Step 2: Tidy dependencies** + +Run: `make tidy` + +**Step 3: Run linter** + +Run: `make lint` +Expected: No new warnings/errors + +**Step 4: Run all tests** + +Run: `make test` +Expected: All PASS + +**Step 5: Fix any issues found** + +Address any lint warnings or test failures. + +**Step 6: Final commit** + +```bash +git add -A +git commit -m "chore: lint and tidy after sandbox feature" +``` + +--- + +## Task Dependency Order + +``` +Task 1 (ToolMiddleware bug fix) — independent, do first + ↓ +Task 2 (Config & Errors) — foundation + ↓ +Task 3 (Interface & LocalExecutor) — depends on Task 2 + ↓ +Task 4 (Allowlist) — depends on Task 2 + ↓ +Task 5 (Pool) — depends on Task 2 + ↓ +Task 6 (DockerExecutor) — depends on Tasks 3, 4, 5 + ↓ +Task 7 (Integration tests) — depends on Task 6 + ↓ +Task 8 (MCP wiring) — depends on Task 3 + ↓ +Task 9 (Agent SDK wiring) — depends on Tasks 6, 8 + ↓ +Task 10 (Final verification) — depends on all +``` + +**Parallelizable:** Tasks 2-5 can be done in parallel. Task 1 is independent of everything. diff --git a/examples/graphrag-memory-agent/main.go b/examples/graphrag-memory-agent/main.go index 0226af90..c60f632a 100644 --- a/examples/graphrag-memory-agent/main.go +++ b/examples/graphrag-memory-agent/main.go @@ -112,8 +112,8 @@ func main() { ag, err := agent.NewAgent( agent.WithLLM(llm), agent.WithName("MemoryAgent"), - agent.WithMemory(conversationMemory), // Short-term: conversation history - agent.WithGraphRAG(store), // Long-term: structured knowledge graph + agent.WithMemory(conversationMemory), // Short-term: conversation history + agent.WithGraphRAG(store), // Long-term: structured knowledge graph agent.WithRequirePlanApproval(false), agent.WithMaxIterations(10), // Allow enough iterations for memory operations agent.WithSystemPrompt(memoryAgentPrompt), diff --git a/examples/llm/deepseek/agent/main.go b/examples/llm/deepseek/agent/main.go index 558ce1e2..19b6b5f5 100644 --- a/examples/llm/deepseek/agent/main.go +++ b/examples/llm/deepseek/agent/main.go @@ -48,7 +48,7 @@ func (t *SearchTool) Execute(ctx context.Context, args string) (string, error) { // Simulate search results results := map[string]interface{}{ - "query": params.Query, + "query": params.Query, "results": []string{ "Result 1: DeepSeek-V3.2 released with 128K context window", "Result 2: DeepSeek reasoning models outperform GPT-4 on benchmarks", diff --git a/examples/sandbox_demo/main.go b/examples/sandbox_demo/main.go new file mode 100644 index 00000000..0aca41ff --- /dev/null +++ b/examples/sandbox_demo/main.go @@ -0,0 +1,171 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "log" + "os" + "time" + + "google.golang.org/genai" + + "github.com/Ingenimax/agent-sdk-go/pkg/agent" + "github.com/Ingenimax/agent-sdk-go/pkg/interfaces" + "github.com/Ingenimax/agent-sdk-go/pkg/llm/gemini" + "github.com/Ingenimax/agent-sdk-go/pkg/logging" + "github.com/Ingenimax/agent-sdk-go/pkg/sandbox" +) + +// ShellTool is a tool that executes commands through the sandbox executor. +type ShellTool struct { + executor sandbox.CommandExecutor +} + +func (t *ShellTool) Name() string { return "run_command" } +func (t *ShellTool) Description() string { return "Run a shell command in a sandboxed container. Only allowed commands can be executed." } +func (t *ShellTool) Parameters() map[string]interfaces.ParameterSpec { + return map[string]interfaces.ParameterSpec{ + "command": {Type: "string", Description: "The command to run (e.g., 'ls', 'cat', 'echo')", Required: true}, + "args": {Type: "string", Description: "Space-separated arguments for the command", Required: false}, + } +} + +func (t *ShellTool) Run(ctx context.Context, input string) (string, error) { + return t.Execute(ctx, input) +} + +func (t *ShellTool) Execute(ctx context.Context, args string) (string, error) { + var params struct { + Command string `json:"command"` + Args string `json:"args"` + } + if err := json.Unmarshal([]byte(args), ¶ms); err != nil { + return "", fmt.Errorf("invalid args: %w", err) + } + + // Split args string into slice + var cmdArgs []string + if params.Args != "" { + // Simple split — good enough for demo + for _, a := range splitArgs(params.Args) { + cmdArgs = append(cmdArgs, a) + } + } + + cmd, err := t.executor.Command(ctx, params.Command, cmdArgs...) + if err != nil { + return fmt.Sprintf("Command denied: %v", err), nil + } + + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Sprintf("Command failed: %v\nOutput: %s", err, string(output)), nil + } + + return string(output), nil +} + +func splitArgs(s string) []string { + var args []string + current := "" + inQuote := false + for _, c := range s { + switch { + case c == '"': + inQuote = !inQuote + case c == ' ' && !inQuote: + if current != "" { + args = append(args, current) + current = "" + } + default: + current += string(c) + } + } + if current != "" { + args = append(args, current) + } + return args +} + +func main() { + apiKey := os.Getenv("GEMINI_API_KEY") + if apiKey == "" { + log.Fatal("Set GEMINI_API_KEY environment variable") + } + + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + logger := logging.New() + + // --- Step 1: Create sandbox executor --- + fmt.Println("=== Creating Docker Sandbox ===") + executor, err := sandbox.NewDockerExecutor(ctx, sandbox.Config{ + Enabled: true, + Image: "alpine:3.19", + AllowedCommands: []string{"echo", "ls", "cat", "uname", "whoami", "date", "hostname"}, + DeniedCommands: []string{"rm", "dd", "mkfs", "chmod", "chown", "kill"}, + PoolSize: 1, + Timeout: 10 * time.Second, + NetworkMode: "none", + MemoryLimit: "128m", + CPULimit: "0.5", + }, logger) + if err != nil { + log.Fatalf("Failed to create sandbox: %v", err) + } + defer executor.Close(ctx) + fmt.Println("Sandbox container ready!") + fmt.Println() + + // --- Step 2: Create Gemini LLM client --- + fmt.Println("=== Creating Gemini Agent ===") + llm, err := gemini.NewClient(ctx, + gemini.WithAPIKey(apiKey), + gemini.WithBackend(genai.BackendGeminiAPI), + gemini.WithModel(gemini.ModelGemini20Flash), + ) + if err != nil { + log.Fatalf("Failed to create Gemini client: %v", err) + } + fmt.Printf("LLM: %s (model: %s)\n", llm.Name(), llm.GetModel()) + + // --- Step 3: Create agent with sandbox tool --- + shellTool := &ShellTool{executor: executor} + + agentInstance, err := agent.NewAgent( + agent.WithLLM(llm), + agent.WithTools(shellTool), + agent.WithSandbox(executor), + agent.WithRequirePlanApproval(false), + agent.WithSystemPrompt(`You are a system exploration agent running inside a secure sandbox container. +You have a run_command tool that executes commands inside a Docker container. +The container is isolated: no network, read-only filesystem, limited resources. + +Only these commands are allowed: echo, ls, cat, uname, whoami, date, hostname. +Commands like rm, dd, chmod are blocked for security. + +When the user asks you to explore, use the run_command tool directly. Do NOT create execution plans.`), + agent.WithMaxIterations(10), + ) + if err != nil { + log.Fatalf("Failed to create agent: %v", err) + } + fmt.Println("Agent ready!") + fmt.Println() + + // --- Step 4: Run the agent --- + fmt.Println("=== Agent Task: Explore the sandbox container ===") + fmt.Println() + + result, err := agentInstance.Run(ctx, + "Explore this sandbox container. Tell me: 1) What OS and architecture is it running? 2) What user are you? 3) What's the hostname? 4) List the files in /usr/bin/ (first 20 lines). 5) Try to run 'rm /tmp/test' to show it's blocked.") + if err != nil { + log.Fatalf("Agent error: %v", err) + } + + fmt.Println("=== Agent Response ===") + fmt.Println(result) +} diff --git a/examples/simple_yaml_agent/main.go b/examples/simple_yaml_agent/main.go index 5af9e18c..cd1c9912 100644 --- a/examples/simple_yaml_agent/main.go +++ b/examples/simple_yaml_agent/main.go @@ -25,4 +25,4 @@ func main() { // Print result println(result) -} \ No newline at end of file +} diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index 41658789..5319fe0b 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -21,6 +21,7 @@ import ( "github.com/Ingenimax/agent-sdk-go/pkg/mcp" "github.com/Ingenimax/agent-sdk-go/pkg/memory" "github.com/Ingenimax/agent-sdk-go/pkg/multitenancy" + "github.com/Ingenimax/agent-sdk-go/pkg/sandbox" "github.com/Ingenimax/agent-sdk-go/pkg/storage" "github.com/Ingenimax/agent-sdk-go/pkg/tools" "github.com/Ingenimax/agent-sdk-go/pkg/tools/imagegen" @@ -40,8 +41,9 @@ type LazyMCPConfig struct { URL string Token string // Bearer token for HTTP authentication Tools []LazyMCPToolConfig - HttpTransportMode string // "sse" or "streamable" - AllowedTools []string // List of allowed tool names for this MCP server + HttpTransportMode string // "sse" or "streamable" + AllowedTools []string // List of allowed tool names for this MCP server + Executor sandbox.CommandExecutor // Optional sandbox executor } // LazyMCPToolConfig holds configuration for a lazy MCP tool @@ -100,6 +102,9 @@ type Agent struct { // Custom function fields customRunFunc CustomRunFunction // Custom run function to replace default behavior customRunStreamFunc CustomRunStreamFunction // Custom stream function to replace default streaming behavior + + // Sandbox executor for containerized MCP command execution + sandbox sandbox.CommandExecutor } // Option represents an option for configuring an agent @@ -600,6 +605,14 @@ func WithCustomRunStreamFunction(fn CustomRunStreamFunction) Option { } } +// WithSandbox sets the sandbox executor for containerized MCP command execution. +// When set, all MCP stdio server commands will run through this executor. +func WithSandbox(executor sandbox.CommandExecutor) Option { + return func(a *Agent) { + a.sandbox = executor + } +} + // NewAgent creates a new agent with the given options func NewAgent(options ...Option) (*Agent, error) { agent := &Agent{ @@ -1130,6 +1143,12 @@ func (a *Agent) createLazyMCPTools() []interfaces.Tool { Token: config.Token, HttpTransportMode: config.HttpTransportMode, AllowedTools: config.AllowedTools, + Executor: config.Executor, + } + + // Propagate agent-level sandbox if the individual config doesn't have one + if config.Executor == nil && a.sandbox != nil { + lazyServerConfig.Executor = a.sandbox } // If no specific tools are defined, discover all tools from the server @@ -2292,4 +2311,3 @@ func createImageStorageFromConfig(config *ImageStorageYAML) (storage.ImageStorag return nil, fmt.Errorf("unsupported storage type: %s (only 'local' and 'gcs' are supported)", storageType) } } - diff --git a/pkg/agent/config.go b/pkg/agent/config.go index 8bc4e3fa..016b254c 100644 --- a/pkg/agent/config.go +++ b/pkg/agent/config.go @@ -132,27 +132,27 @@ type RuntimeConfigYAML struct { // ImageGenerationYAML represents image generation configuration in YAML type ImageGenerationYAML struct { - Enabled *bool `yaml:"enabled,omitempty"` - Provider string `yaml:"provider,omitempty"` // "gemini" - Model string `yaml:"model,omitempty"` // e.g., "gemini-2.5-flash-image" - Config map[string]interface{} `yaml:"config,omitempty"` - Storage *ImageStorageYAML `yaml:"storage,omitempty"` - MultiTurnEditing *MultiTurnEditingYAML `yaml:"multi_turn_editing,omitempty"` + Enabled *bool `yaml:"enabled,omitempty"` + Provider string `yaml:"provider,omitempty"` // "gemini" + Model string `yaml:"model,omitempty"` // e.g., "gemini-2.5-flash-image" + Config map[string]interface{} `yaml:"config,omitempty"` + Storage *ImageStorageYAML `yaml:"storage,omitempty"` + MultiTurnEditing *MultiTurnEditingYAML `yaml:"multi_turn_editing,omitempty"` } // MultiTurnEditingYAML represents multi-turn image editing configuration in YAML type MultiTurnEditingYAML struct { - Enabled *bool `yaml:"enabled,omitempty"` - Model string `yaml:"model,omitempty"` // e.g., "gemini-3-pro-image-preview" - SessionTimeout string `yaml:"session_timeout,omitempty"` // e.g., "30m" - MaxSessionsPerOrg *int `yaml:"max_sessions_per_org,omitempty"` + Enabled *bool `yaml:"enabled,omitempty"` + Model string `yaml:"model,omitempty"` // e.g., "gemini-3-pro-image-preview" + SessionTimeout string `yaml:"session_timeout,omitempty"` // e.g., "30m" + MaxSessionsPerOrg *int `yaml:"max_sessions_per_org,omitempty"` } // ImageStorageYAML represents image storage configuration in YAML type ImageStorageYAML struct { - Type string `yaml:"type,omitempty"` // "local", "gcs" - Local *LocalStorageYAML `yaml:"local,omitempty"` - GCS *GCSStorageYAML `yaml:"gcs,omitempty"` + Type string `yaml:"type,omitempty"` // "local", "gcs" + Local *LocalStorageYAML `yaml:"local,omitempty"` + GCS *GCSStorageYAML `yaml:"gcs,omitempty"` } // LocalStorageYAML represents local storage configuration in YAML diff --git a/pkg/agent/env.go b/pkg/agent/env.go index 62bec583..cecc5e92 100644 --- a/pkg/agent/env.go +++ b/pkg/agent/env.go @@ -107,4 +107,4 @@ func GetEnvValue(key string) string { return value } return "" -} \ No newline at end of file +} diff --git a/pkg/agent/factory.go b/pkg/agent/factory.go index 3b5ac4a2..c3d517ed 100644 --- a/pkg/agent/factory.go +++ b/pkg/agent/factory.go @@ -75,7 +75,7 @@ type CreateAgentConfig struct { AllowFallback bool CacheTimeout time.Duration EnableEnvOverrides bool - Verbose bool + Verbose bool // Agent options MaxIterations *int @@ -97,8 +97,8 @@ func NewAgentFromCreateConfig(ctx context.Context, config CreateAgentConfig) (*A // Otherwise, use the agentconfig package // Since we can't import it due to cycles, provide guidance - return nil, fmt.Errorf("for remote/dual configuration loading, use:\n" + - "import \"github.com/Ingenimax/agent-sdk-go/pkg/agentconfig\"\n" + + return nil, fmt.Errorf("for remote/dual configuration loading, use:\n"+ + "import \"github.com/Ingenimax/agent-sdk-go/pkg/agentconfig\"\n"+ "agent, err := agentconfig.LoadAgentAuto(ctx, %q, %q)", config.AgentName, config.Environment) } @@ -163,4 +163,4 @@ func NewAgentCreationError(agentName, source string, err error) error { Source: source, Err: err, } -} \ No newline at end of file +} diff --git a/pkg/agent/llm_factory.go b/pkg/agent/llm_factory.go index ce88c2c4..c2a86011 100644 --- a/pkg/agent/llm_factory.go +++ b/pkg/agent/llm_factory.go @@ -435,4 +435,4 @@ func getConfigString(config map[string]interface{}, key string) string { } } return "" -} \ No newline at end of file +} diff --git a/pkg/agent/llm_factory_test.go b/pkg/agent/llm_factory_test.go index 13127321..982ab03b 100644 --- a/pkg/agent/llm_factory_test.go +++ b/pkg/agent/llm_factory_test.go @@ -30,7 +30,7 @@ func TestParseGoogleCredentials(t *testing.T) { wantErr: false, }, { - name: "file path with JSON content", + name: "file path with JSON content", setupFunc: func() (string, func()) { tmpDir := t.TempDir() tmpFile := filepath.Join(tmpDir, "credentials.json") @@ -59,7 +59,7 @@ func TestParseGoogleCredentials(t *testing.T) { wantErr: true, }, { - name: "file with invalid JSON", + name: "file with invalid JSON", setupFunc: func() (string, func()) { tmpDir := t.TempDir() tmpFile := filepath.Join(tmpDir, "invalid.json") diff --git a/pkg/agent/mcp_config.go b/pkg/agent/mcp_config.go index fc8d4bea..3b157f05 100644 --- a/pkg/agent/mcp_config.go +++ b/pkg/agent/mcp_config.go @@ -11,6 +11,7 @@ import ( "gopkg.in/yaml.v3" "github.com/Ingenimax/agent-sdk-go/pkg/mcp" + "github.com/Ingenimax/agent-sdk-go/pkg/sandbox" ) // MCPServerConfig represents a single MCP server configuration @@ -22,6 +23,7 @@ type MCPServerConfig struct { Token string `json:"token,omitempty" yaml:"token,omitempty"` HttpTransportMode string `json:"httpTransportMode,omitempty" yaml:"httpTransportMode,omitempty"` // "sse" or "streamable" AllowedTools []string `json:"allowedTools,omitempty" yaml:"allowedTools,omitempty"` + Sandbox *sandbox.Config `json:"sandbox,omitempty" yaml:"sandbox,omitempty"` } // MCPDiscoveredServerInfo represents metadata discovered from the server at runtime diff --git a/pkg/agent/tool_factory.go b/pkg/agent/tool_factory.go index 4a01f1eb..cc04501f 100644 --- a/pkg/agent/tool_factory.go +++ b/pkg/agent/tool_factory.go @@ -168,4 +168,4 @@ func (atw *AgentToolWrapper) Execute(ctx context.Context, args string) (string, // Run implements interfaces.Tool.Run func (atw *AgentToolWrapper) Run(ctx context.Context, input string) (string, error) { return atw.agent.Run(ctx, input) -} \ No newline at end of file +} diff --git a/pkg/agentconfig/api.go b/pkg/agentconfig/api.go index 5dfa624b..7ac24145 100644 --- a/pkg/agentconfig/api.go +++ b/pkg/agentconfig/api.go @@ -81,4 +81,4 @@ func LoadAgentWithVariables(ctx context.Context, agentName, environment string, } return agent.NewAgentFromConfigObject(ctx, config, variables, options...) -} \ No newline at end of file +} diff --git a/pkg/agentconfig/cache.go b/pkg/agentconfig/cache.go index 7676132a..70ada146 100644 --- a/pkg/agentconfig/cache.go +++ b/pkg/agentconfig/cache.go @@ -101,4 +101,4 @@ func CleanupExpiredEntries() { delete(configCache, key) } } -} \ No newline at end of file +} diff --git a/pkg/agentconfig/examples.go b/pkg/agentconfig/examples.go index f8537300..ab2de48b 100644 --- a/pkg/agentconfig/examples.go +++ b/pkg/agentconfig/examples.go @@ -71,7 +71,7 @@ func ExampleAdvancedOptions() { WithLocalFallback("./configs/research.yaml"), // Specific fallback file WithCache(10 * time.Minute), // Longer cache WithEnvOverrides(), // Enable env var overrides - WithVerbose(), // Enable logging + WithVerbose(), // Enable logging } // Agent options for customization @@ -142,4 +142,4 @@ func ExampleMigrationFromOldAPI() { } fmt.Printf("Migrated to new configuration system: %s\n", agentInstance.GetConfig().ConfigSource.Type) -} \ No newline at end of file +} diff --git a/pkg/agentconfig/merge_test.go b/pkg/agentconfig/merge_test.go index 9d4eb2e9..b3aeb5ed 100644 --- a/pkg/agentconfig/merge_test.go +++ b/pkg/agentconfig/merge_test.go @@ -514,8 +514,8 @@ func TestNilConfigMerge(t *testing.T) { // TestDeepCopyComplexPointers verifies deep copying of complex pointer fields func TestDeepCopyComplexPointers(t *testing.T) { primary := &agent.AgentConfig{ - Role: "Primary", - MaxIterations: intPtr(5), + Role: "Primary", + MaxIterations: intPtr(5), RequirePlanApproval: boolPtr(true), StreamConfig: &agent.StreamConfigYAML{ BufferSize: intPtr(100), diff --git a/pkg/agentconfig/models.go b/pkg/agentconfig/models.go index 931daf6a..a3c68d11 100644 --- a/pkg/agentconfig/models.go +++ b/pkg/agentconfig/models.go @@ -46,16 +46,16 @@ type ConfigurationResponse struct { // AgentConfigResponse represents a resolved agent configuration from the service type AgentConfigResponse struct { AgentConfig struct { - ID string `json:"id"` - AgentName string `json:"agent_name"` - Environment string `json:"environment"` - DisplayName string `json:"display_name"` - Description string `json:"description"` - Goal string `json:"goal"` - SystemPrompt string `json:"system_prompt"` - SchemaVersion string `json:"schema_version"` - CreatedAt time.Time `json:"created_at"` - UpdatedAt time.Time `json:"updated_at"` + ID string `json:"id"` + AgentName string `json:"agent_name"` + Environment string `json:"environment"` + DisplayName string `json:"display_name"` + Description string `json:"description"` + Goal string `json:"goal"` + SystemPrompt string `json:"system_prompt"` + SchemaVersion string `json:"schema_version"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` } `json:"agent_config"` GeneratedYAML string `json:"generated_yaml"` // YAML generated from structured data ResolvedYAML string `json:"resolved_yaml"` // YAML with variables resolved diff --git a/pkg/executionplan/generator.go b/pkg/executionplan/generator.go index 92df97c8..97f40ebe 100644 --- a/pkg/executionplan/generator.go +++ b/pkg/executionplan/generator.go @@ -10,10 +10,10 @@ import ( // Generator handles generation of execution plans type Generator struct { - llm interfaces.LLM - tools []interfaces.Tool - systemPrompt string - requireApproval bool + llm interfaces.LLM + tools []interfaces.Tool + systemPrompt string + requireApproval bool } // NewGenerator creates a new execution plan generator @@ -26,7 +26,6 @@ func NewGenerator(llm interfaces.LLM, tools []interfaces.Tool, systemPrompt stri } } - // GenerateExecutionPlan generates an execution plan based on the user input func (g *Generator) GenerateExecutionPlan(ctx context.Context, input string) (*ExecutionPlan, error) { // If no tools are available, return an error diff --git a/pkg/guardrails/tool_middleware.go b/pkg/guardrails/tool_middleware.go index eb664f01..9eebe877 100644 --- a/pkg/guardrails/tool_middleware.go +++ b/pkg/guardrails/tool_middleware.go @@ -6,6 +6,9 @@ import ( "github.com/Ingenimax/agent-sdk-go/pkg/interfaces" ) +// Compile-time assertion that ToolMiddleware implements interfaces.Tool. +var _ interfaces.Tool = (*ToolMiddleware)(nil) + // ToolMiddleware implements middleware for tool calls type ToolMiddleware struct { tool interfaces.Tool @@ -57,3 +60,26 @@ func (m *ToolMiddleware) Run(ctx context.Context, input string) (string, error) return processedOutput, nil } + +// Execute executes the tool with the given arguments, applying guardrails +func (m *ToolMiddleware) Execute(ctx context.Context, args string) (string, error) { + // Process request through guardrails + processedInput, err := m.pipeline.ProcessRequest(ctx, args) + if err != nil { + return "", err + } + + // Call the underlying tool + output, err := m.tool.Execute(ctx, processedInput) + if err != nil { + return "", err + } + + // Process response through guardrails + processedOutput, err := m.pipeline.ProcessResponse(ctx, output) + if err != nil { + return "", err + } + + return processedOutput, nil +} diff --git a/pkg/guardrails/tool_middleware_test.go b/pkg/guardrails/tool_middleware_test.go new file mode 100644 index 00000000..0c5e0564 --- /dev/null +++ b/pkg/guardrails/tool_middleware_test.go @@ -0,0 +1,107 @@ +package guardrails + +import ( + "context" + "testing" + + "github.com/Ingenimax/agent-sdk-go/pkg/interfaces" + "github.com/Ingenimax/agent-sdk-go/pkg/logging" +) + +type mockTool struct { + name string + description string + runOutput string + execOutput string + runErr error + execErr error + lastRunInput string + lastExecInput string +} + +func (m *mockTool) Name() string { return m.name } +func (m *mockTool) Description() string { return m.description } +func (m *mockTool) Parameters() map[string]interfaces.ParameterSpec { return nil } +func (m *mockTool) Run(ctx context.Context, input string) (string, error) { + m.lastRunInput = input + return m.runOutput, m.runErr +} +func (m *mockTool) Execute(ctx context.Context, args string) (string, error) { + m.lastExecInput = args + return m.execOutput, m.execErr +} + +func TestToolMiddleware_Execute(t *testing.T) { + tool := &mockTool{ + name: "test_tool", + execOutput: "raw output with badword inside", + } + + pipeline := NewPipeline([]Guardrail{NewContentFilter([]string{"badword"}, RedactAction)}, logging.New()) + middleware := NewToolMiddleware(tool, pipeline) + + result, err := middleware.Execute(context.Background(), "input with badword here") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result == "raw output with badword inside" { + t.Error("Execute() did not apply guardrails to output — guardrails are bypassed") + } + if result != "raw output with **** inside" { + t.Errorf("unexpected result: %q", result) + } + + // Verify that input guardrails were applied before reaching the tool + if tool.lastExecInput == "input with badword here" { + t.Error("Execute() did not apply guardrails to input — tool received raw input") + } + if tool.lastExecInput != "input with **** here" { + t.Errorf("unexpected input received by tool: %q", tool.lastExecInput) + } +} + +func TestToolMiddleware_Execute_BlockAction(t *testing.T) { + tool := &mockTool{ + name: "test_tool", + execOutput: "clean output", + } + + pipeline := NewPipeline([]Guardrail{NewContentFilter([]string{"blocked"}, BlockAction)}, logging.New()) + middleware := NewToolMiddleware(tool, pipeline) + + _, err := middleware.Execute(context.Background(), "this is blocked content") + if err == nil { + t.Error("expected error for blocked content, got nil") + } +} + +func TestToolMiddleware_Run(t *testing.T) { + tool := &mockTool{ + name: "test_tool", + runOutput: "raw output with badword inside", + } + + pipeline := NewPipeline([]Guardrail{NewContentFilter([]string{"badword"}, RedactAction)}, logging.New()) + middleware := NewToolMiddleware(tool, pipeline) + + result, err := middleware.Run(context.Background(), "input with badword here") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if result == "raw output with badword inside" { + t.Error("Run() did not apply guardrails to output — guardrails are bypassed") + } + if result != "raw output with **** inside" { + t.Errorf("unexpected result: %q", result) + } + + // Verify that input guardrails were applied before reaching the tool + if tool.lastRunInput == "input with badword here" { + t.Error("Run() did not apply guardrails to input — tool received raw input") + } + if tool.lastRunInput != "input with **** here" { + t.Errorf("unexpected input received by tool: %q", tool.lastRunInput) + } +} diff --git a/pkg/llm/deepseek/client.go b/pkg/llm/deepseek/client.go index fa3f0a9d..c3b60320 100644 --- a/pkg/llm/deepseek/client.go +++ b/pkg/llm/deepseek/client.go @@ -108,34 +108,34 @@ func (c *DeepSeekClient) SupportsStreaming() bool { // ChatCompletionRequest represents a request to the DeepSeek Chat Completion API type ChatCompletionRequest struct { - Model string `json:"model"` - Messages []Message `json:"messages"` - Temperature float64 `json:"temperature,omitempty"` - TopP float64 `json:"top_p,omitempty"` - FrequencyPenalty float64 `json:"frequency_penalty,omitempty"` - PresencePenalty float64 `json:"presence_penalty,omitempty"` - Stop []string `json:"stop,omitempty"` - MaxTokens int `json:"max_tokens,omitempty"` - Stream bool `json:"stream,omitempty"` - Tools []Tool `json:"tools,omitempty"` - ToolChoice interface{} `json:"tool_choice,omitempty"` - ResponseFormat *ResponseFormatParam `json:"response_format,omitempty"` + Model string `json:"model"` + Messages []Message `json:"messages"` + Temperature float64 `json:"temperature,omitempty"` + TopP float64 `json:"top_p,omitempty"` + FrequencyPenalty float64 `json:"frequency_penalty,omitempty"` + PresencePenalty float64 `json:"presence_penalty,omitempty"` + Stop []string `json:"stop,omitempty"` + MaxTokens int `json:"max_tokens,omitempty"` + Stream bool `json:"stream,omitempty"` + Tools []Tool `json:"tools,omitempty"` + ToolChoice interface{} `json:"tool_choice,omitempty"` + ResponseFormat *ResponseFormatParam `json:"response_format,omitempty"` } // Message represents a message in the chat type Message struct { - Role string `json:"role"` - Content string `json:"content,omitempty"` - ToolCalls []ToolCall `json:"tool_calls,omitempty"` - ToolCallID string `json:"tool_call_id,omitempty"` - Name string `json:"name,omitempty"` + Role string `json:"role"` + Content string `json:"content,omitempty"` + ToolCalls []ToolCall `json:"tool_calls,omitempty"` + ToolCallID string `json:"tool_call_id,omitempty"` + Name string `json:"name,omitempty"` } // ToolCall represents a tool call in the response type ToolCall struct { - ID string `json:"id"` - Type string `json:"type"` - Function FunctionCall `json:"function"` + ID string `json:"id"` + Type string `json:"type"` + Function FunctionCall `json:"function"` } // FunctionCall represents a function call @@ -146,8 +146,8 @@ type FunctionCall struct { // Tool represents a tool/function definition type Tool struct { - Type string `json:"type"` - Function FunctionDef `json:"function"` + Type string `json:"type"` + Function FunctionDef `json:"function"` } // FunctionDef represents a function definition @@ -455,13 +455,13 @@ func (c *DeepSeekClient) GenerateWithToolsDetailed(ctx context.Context, prompt s } c.logger.Debug(ctx, "Sending request with tools to DeepSeek", map[string]interface{}{ - "model": c.Model, - "temperature": req.Temperature, - "messages": len(req.Messages), - "tools": len(req.Tools), - "iteration": iteration + 1, - "maxIterations": maxIterations, - "org_id": orgID, + "model": c.Model, + "temperature": req.Temperature, + "messages": len(req.Messages), + "tools": len(req.Tools), + "iteration": iteration + 1, + "maxIterations": maxIterations, + "org_id": orgID, }) // Make request diff --git a/pkg/llm/deepseek/client_test.go b/pkg/llm/deepseek/client_test.go index f69c09a4..b3331112 100644 --- a/pkg/llm/deepseek/client_test.go +++ b/pkg/llm/deepseek/client_test.go @@ -13,11 +13,11 @@ import ( func TestNewClient(t *testing.T) { tests := []struct { - name string - apiKey string - options []Option - wantModel string - wantBase string + name string + apiKey string + options []Option + wantModel string + wantBase string }{ { name: "default configuration", diff --git a/pkg/llm/gemini/client.go b/pkg/llm/gemini/client.go index b5086303..0283f180 100644 --- a/pkg/llm/gemini/client.go +++ b/pkg/llm/gemini/client.go @@ -1212,7 +1212,7 @@ func (c *GeminiClient) CreateImageEditSession(ctx context.Context, options *inte } c.logger.Debug(ctx, "Creating image edit session", map[string]interface{}{ - "model": model, + "model": model, "has_system_instruction": options != nil && options.SystemInstruction != "", }) diff --git a/pkg/llm/gemini/image_edit_session.go b/pkg/llm/gemini/image_edit_session.go index ac343235..f45a57f5 100644 --- a/pkg/llm/gemini/image_edit_session.go +++ b/pkg/llm/gemini/image_edit_session.go @@ -28,9 +28,9 @@ func (s *GeminiImageEditSession) SendMessage(ctx context.Context, message string } s.logger.Debug(ctx, "Sending message to image edit session", map[string]interface{}{ - "model": s.model, - "message_len": len(message), - "has_options": options != nil, + "model": s.model, + "message_len": len(message), + "has_options": options != nil, }) // Build message part @@ -230,9 +230,9 @@ func (s *GeminiImageEditSession) parseResponse(result *genai.GenerateContentResp response.Metadata["model"] = s.model s.logger.Debug(context.Background(), "Parsed image edit response", map[string]interface{}{ - "text_len": len(response.Text), - "image_count": len(response.Images), - "has_usage": response.Usage != nil, + "text_len": len(response.Text), + "image_count": len(response.Images), + "has_usage": response.Usage != nil, }) return response, nil diff --git a/pkg/llm/gemini/streaming.go b/pkg/llm/gemini/streaming.go index d718fdec..2d637cad 100644 --- a/pkg/llm/gemini/streaming.go +++ b/pkg/llm/gemini/streaming.go @@ -425,6 +425,36 @@ func (c *GeminiClient) generateWithToolsAndStream(ctx context.Context, prompt st paramSchema.Type = genai.TypeObject } + // Handle array items + if param.Items != nil { + itemSchema := &genai.Schema{} + + // Set items type + switch param.Items.Type { + case "string": + itemSchema.Type = genai.TypeString + case "number", "integer": + itemSchema.Type = genai.TypeNumber + case "boolean": + itemSchema.Type = genai.TypeBoolean + case "array": + itemSchema.Type = genai.TypeArray + case "object": + itemSchema.Type = genai.TypeObject + } + + // Handle items enum if present + if param.Items.Enum != nil { + enumStrings := make([]string, len(param.Items.Enum)) + for i, e := range param.Items.Enum { + enumStrings[i] = fmt.Sprintf("%v", e) + } + itemSchema.Enum = enumStrings + } + + paramSchema.Items = itemSchema + } + if param.Enum != nil { enumStrings := make([]string, len(param.Enum)) for i, e := range param.Enum { diff --git a/pkg/llm/gemini/types.go b/pkg/llm/gemini/types.go index 4c2c4e08..dca9deb5 100644 --- a/pkg/llm/gemini/types.go +++ b/pkg/llm/gemini/types.go @@ -260,15 +260,15 @@ func GetModelCapabilities(model string) ModelCapabilities { } case ModelGemini20FlashPreviewImageGen: return ModelCapabilities{ - SupportsStreaming: true, - SupportsToolCalling: true, - SupportsVision: true, - SupportsAudio: false, - SupportsThinking: false, // 2.0 and 1.5 models don't support thinking - SupportsImageGeneration: true, // Can generate images - MaxInputTokens: 1048576, // 1M tokens - MaxOutputTokens: 8192, - MaxThinkingTokens: nil, + SupportsStreaming: true, + SupportsToolCalling: true, + SupportsVision: true, + SupportsAudio: false, + SupportsThinking: false, // 2.0 and 1.5 models don't support thinking + SupportsImageGeneration: true, // Can generate images + MaxInputTokens: 1048576, // 1M tokens + MaxOutputTokens: 8192, + MaxThinkingTokens: nil, SupportedMimeTypes: []string{ "image/png", "image/jpeg", "image/webp", "image/heic", "image/heif", "text/plain", @@ -282,8 +282,8 @@ func GetModelCapabilities(model string) ModelCapabilities { SupportsVision: true, // Can accept images as input for image-to-image SupportsAudio: false, SupportsThinking: false, - SupportsImageGeneration: true, // Primary purpose: generate images - SupportsMultiTurnImageEditing: true, // Supports chat-based image editing + SupportsImageGeneration: true, // Primary purpose: generate images + SupportsMultiTurnImageEditing: true, // Supports chat-based image editing MaxInputTokens: 32768, MaxOutputTokens: 8192, MaxThinkingTokens: nil, @@ -301,9 +301,9 @@ func GetModelCapabilities(model string) ModelCapabilities { SupportsToolCalling: false, // Image gen models typically don't support tools SupportsVision: true, // Can accept images as input SupportsAudio: false, - SupportsThinking: true, // Uses "Thinking" for complex instructions + SupportsThinking: true, // Uses "Thinking" for complex instructions SupportsImageGeneration: true, - SupportsMultiTurnImageEditing: true, // Primary feature: multi-turn image editing + SupportsMultiTurnImageEditing: true, // Primary feature: multi-turn image editing MaxInputTokens: 32768, MaxOutputTokens: 8192, MaxThinkingTokens: nil, diff --git a/pkg/mcp/lazy.go b/pkg/mcp/lazy.go index 96760384..37cf1c47 100644 --- a/pkg/mcp/lazy.go +++ b/pkg/mcp/lazy.go @@ -12,6 +12,7 @@ import ( "github.com/Ingenimax/agent-sdk-go/pkg/interfaces" "github.com/Ingenimax/agent-sdk-go/pkg/logging" + "github.com/Ingenimax/agent-sdk-go/pkg/sandbox" ) const ( @@ -89,9 +90,10 @@ func (cache *LazyMCPServerCache) getOrCreateServer(ctx context.Context, config L switch config.Type { case "stdio": server, err = NewStdioServer(ctx, StdioServerConfig{ - Command: config.Command, - Args: config.Args, - Env: config.Env, + Command: config.Command, + Args: config.Args, + Env: config.Env, + Executor: config.Executor, }) case "http": server, err = NewHTTPServer(ctx, HTTPServerConfig{ @@ -173,9 +175,10 @@ type LazyMCPServerConfig struct { Args []string Env []string URL string - Token string // Bearer token for HTTP authentication - HttpTransportMode string // "sse" or "streamable" - AllowedTools []string // List of allowed tool names for this MCP server + Token string // Bearer token for HTTP authentication + HttpTransportMode string // "sse" or "streamable" + AllowedTools []string // List of allowed tool names for this MCP server + Executor sandbox.CommandExecutor // Optional sandbox executor for stdio servers } // LazyMCPTool is a tool that initializes its MCP server on first use diff --git a/pkg/mcp/mcp.go b/pkg/mcp/mcp.go index e5fe68a8..5210c08f 100644 --- a/pkg/mcp/mcp.go +++ b/pkg/mcp/mcp.go @@ -16,6 +16,7 @@ import ( "github.com/Ingenimax/agent-sdk-go/pkg/interfaces" "github.com/Ingenimax/agent-sdk-go/pkg/logging" + "github.com/Ingenimax/agent-sdk-go/pkg/sandbox" "go.opentelemetry.io/otel/propagation" "go.opentelemetry.io/otel/trace" ) @@ -685,10 +686,11 @@ func (s *MCPServerImpl) Close() error { // StdioServerConfig holds configuration for a stdio MCP server type StdioServerConfig struct { - Command string - Args []string - Env []string - Logger logging.Logger + Command string + Args []string + Env []string + Logger logging.Logger + Executor sandbox.CommandExecutor // Optional sandboxed executor. Nil uses direct host execution. } // NewStdioServer creates a new MCPServer that communicates over stdio using the official SDK @@ -769,9 +771,18 @@ func NewStdioServerWithRetry(ctx context.Context, config StdioServerConfig, retr } } - // Create the command with context - // #nosec G204 -- commandPath is validated above with LookPath and security checks - cmd := exec.CommandContext(ctx, commandPath, config.Args...) + // Create the command, optionally through sandbox executor + var cmd *exec.Cmd + if config.Executor != nil { + var execErr error + cmd, execErr = config.Executor.Command(ctx, commandPath, config.Args...) + if execErr != nil { + return nil, fmt.Errorf("sandbox executor error: %w", execErr) + } + } else { + // #nosec G204 -- commandPath is validated above with LookPath and security checks + cmd = exec.CommandContext(ctx, commandPath, config.Args...) + } if len(config.Env) > 0 { cmd.Env = append(os.Environ(), config.Env...) diff --git a/pkg/mcp/sandbox_integration_test.go b/pkg/mcp/sandbox_integration_test.go new file mode 100644 index 00000000..454b2855 --- /dev/null +++ b/pkg/mcp/sandbox_integration_test.go @@ -0,0 +1,47 @@ +package mcp + +import ( + "context" + "os/exec" + "testing" + + "github.com/Ingenimax/agent-sdk-go/pkg/sandbox" +) + +// Verify the interface compiles — LocalExecutor satisfies CommandExecutor +var _ sandbox.CommandExecutor = &sandbox.LocalExecutor{} + +func TestStdioServerConfig_AcceptsExecutor(t *testing.T) { + config := StdioServerConfig{ + Command: "echo", + Args: []string{"hello"}, + Executor: &sandbox.LocalExecutor{}, + } + if config.Executor == nil { + t.Error("expected Executor to be set") + } +} + +// mockExecutor records calls for testing +type mockSandboxExecutor struct { + called bool + lastCmd string +} + +func (m *mockSandboxExecutor) Command(ctx context.Context, name string, args ...string) (*exec.Cmd, error) { + m.called = true + m.lastCmd = name + return exec.CommandContext(ctx, name, args...), nil +} + +func (m *mockSandboxExecutor) Close(ctx context.Context) error { return nil } + +func TestStdioServerConfig_ExecutorIsUsed(t *testing.T) { + mock := &mockSandboxExecutor{} + config := StdioServerConfig{ + Command: "echo", + Args: []string{"hello"}, + Executor: mock, + } + _ = config // Compiles = type is correct +} diff --git a/pkg/memory/factory.go b/pkg/memory/factory.go index 47158f75..5e1f523d 100644 --- a/pkg/memory/factory.go +++ b/pkg/memory/factory.go @@ -224,4 +224,4 @@ func (f *MemoryFactory) createVectorMemory(config map[string]interface{}, llmCli func NewMemoryFromConfig(config map[string]interface{}, llmClient interfaces.LLM) (interfaces.Memory, error) { factory := NewMemoryFactory() return factory.CreateMemory(config, llmClient) -} \ No newline at end of file +} diff --git a/pkg/orchestration/handoff.go b/pkg/orchestration/handoff.go index 7b573d44..423112c1 100644 --- a/pkg/orchestration/handoff.go +++ b/pkg/orchestration/handoff.go @@ -197,7 +197,7 @@ Respond with only the ID of the agent that should handle this query.`, formatAge func formatAgents(agents map[string]string) string { var result strings.Builder for id, desc := range agents { - result.WriteString(fmt.Sprintf("- %s: %s\n", id, desc)) + fmt.Fprintf(&result, "- %s: %s\n", id, desc) } return result.String() } diff --git a/pkg/orchestration/llm_orchestrator.go b/pkg/orchestration/llm_orchestrator.go index 7a50f3cf..c50ba010 100644 --- a/pkg/orchestration/llm_orchestrator.go +++ b/pkg/orchestration/llm_orchestrator.go @@ -401,7 +401,7 @@ func (o *LLMOrchestrator) generateFinalResponse(ctx context.Context, plan *Plan, for i, step := range plan.Steps { stepID := fmt.Sprintf("step_%d", i) if result, ok := results[stepID]; ok { - finalPrompt.WriteString(fmt.Sprintf("--- %s (%s) ---\n%s\n\n", step.Description, step.AgentID, result)) + fmt.Fprintf(&finalPrompt, "--- %s (%s) ---\n%s\n\n", step.Description, step.AgentID, result) completedSteps++ } } @@ -423,7 +423,7 @@ func (o *LLMOrchestrator) generateFinalResponse(ctx context.Context, plan *Plan, func formatAgentDescriptions(descriptions map[string]string) string { var result strings.Builder for id, desc := range descriptions { - result.WriteString(fmt.Sprintf("- %s: %s\n", id, desc)) + fmt.Fprintf(&result, "- %s: %s\n", id, desc) } return result.String() } diff --git a/pkg/prompts/template.go b/pkg/prompts/template.go index 2c87f6d9..1b8c6d8e 100644 --- a/pkg/prompts/template.go +++ b/pkg/prompts/template.go @@ -369,16 +369,16 @@ func serializeTemplate(tmpl *Template) string { var buf bytes.Buffer // Write metadata - buf.WriteString(fmt.Sprintf("name: %s\n", tmpl.Name)) - buf.WriteString(fmt.Sprintf("description: %s\n", tmpl.Description)) - buf.WriteString(fmt.Sprintf("format: %s\n", tmpl.Format)) + fmt.Fprintf(&buf, "name: %s\n", tmpl.Name) + fmt.Fprintf(&buf, "description: %s\n", tmpl.Description) + fmt.Fprintf(&buf, "format: %s\n", tmpl.Format) if len(tmpl.Tags) > 0 { - buf.WriteString(fmt.Sprintf("tags: %s\n", strings.Join(tmpl.Tags, ", "))) + fmt.Fprintf(&buf, "tags: %s\n", strings.Join(tmpl.Tags, ", ")) } for key, value := range tmpl.Metadata { - buf.WriteString(fmt.Sprintf("%s: %v\n", key, value)) + fmt.Fprintf(&buf, "%s: %v\n", key, value) } // Write content diff --git a/pkg/sandbox/allowlist.go b/pkg/sandbox/allowlist.go new file mode 100644 index 00000000..d9e3e70e --- /dev/null +++ b/pkg/sandbox/allowlist.go @@ -0,0 +1,48 @@ +package sandbox + +import ( + "fmt" + "path/filepath" + "strings" +) + +// Allowlist enforces which commands are permitted in the sandbox. +// Deny list takes precedence over allow list. Empty allow list denies all (fail-closed). +type Allowlist struct { + allowed map[string]bool + denied map[string]bool +} + +// NewAllowlist creates a new Allowlist from allow and deny lists. +func NewAllowlist(allowed, denied []string) *Allowlist { + a := &Allowlist{ + allowed: make(map[string]bool, len(allowed)), + denied: make(map[string]bool, len(denied)), + } + for _, cmd := range allowed { + a.allowed[strings.ToLower(cmd)] = true + } + for _, cmd := range denied { + a.denied[strings.ToLower(cmd)] = true + } + return a +} + +// Check returns nil if the command is permitted, ErrCommandDenied otherwise. +func (a *Allowlist) Check(command string) error { + base := strings.ToLower(filepath.Base(command)) + + if a.denied[base] { + return fmt.Errorf("%w: %q is explicitly denied", ErrCommandDenied, base) + } + + if len(a.allowed) == 0 { + return fmt.Errorf("%w: no commands are allowed (empty allowlist)", ErrCommandDenied) + } + + if !a.allowed[base] { + return fmt.Errorf("%w: %q is not in the allowlist", ErrCommandDenied, base) + } + + return nil +} diff --git a/pkg/sandbox/allowlist_test.go b/pkg/sandbox/allowlist_test.go new file mode 100644 index 00000000..150976c9 --- /dev/null +++ b/pkg/sandbox/allowlist_test.go @@ -0,0 +1,62 @@ +package sandbox + +import ( + "errors" + "testing" +) + +func TestAllowlist_Check_AllowedCommand(t *testing.T) { + al := NewAllowlist([]string{"git", "curl"}, nil) + if err := al.Check("git"); err != nil { + t.Errorf("expected git to be allowed, got: %v", err) + } +} + +func TestAllowlist_Check_AllowedAbsolutePath(t *testing.T) { + al := NewAllowlist([]string{"git"}, nil) + if err := al.Check("/usr/bin/git"); err != nil { + t.Errorf("expected /usr/bin/git to be allowed, got: %v", err) + } +} + +func TestAllowlist_Check_DeniedCommand(t *testing.T) { + al := NewAllowlist([]string{"git", "rm"}, []string{"rm"}) + err := al.Check("rm") + if err == nil { + t.Error("expected rm to be denied") + } + if !errors.Is(err, ErrCommandDenied) { + t.Errorf("expected ErrCommandDenied, got: %v", err) + } +} + +func TestAllowlist_Check_DenyTakesPrecedence(t *testing.T) { + al := NewAllowlist([]string{"rm"}, []string{"rm"}) + err := al.Check("rm") + if err == nil { + t.Error("deny should take precedence over allow") + } +} + +func TestAllowlist_Check_NotInAllowlist(t *testing.T) { + al := NewAllowlist([]string{"git"}, nil) + err := al.Check("curl") + if err == nil { + t.Error("expected curl to be denied when not in allowlist") + } +} + +func TestAllowlist_Check_EmptyAllowlistDeniesAll(t *testing.T) { + al := NewAllowlist(nil, nil) + err := al.Check("git") + if err == nil { + t.Error("expected all commands denied when allowlist is empty (fail-closed)") + } +} + +func TestAllowlist_Check_CaseInsensitive(t *testing.T) { + al := NewAllowlist([]string{"Git"}, nil) + if err := al.Check("git"); err != nil { + t.Errorf("expected case-insensitive match, got: %v", err) + } +} diff --git a/pkg/sandbox/config.go b/pkg/sandbox/config.go new file mode 100644 index 00000000..4feea110 --- /dev/null +++ b/pkg/sandbox/config.go @@ -0,0 +1,46 @@ +package sandbox + +import "time" + +// Config holds sandbox configuration, loadable from YAML. +type Config struct { + Enabled bool `json:"enabled" yaml:"enabled"` + Image string `json:"image,omitempty" yaml:"image,omitempty"` + AllowedCommands []string `json:"allowed_commands,omitempty" yaml:"allowed_commands,omitempty"` + DeniedCommands []string `json:"denied_commands,omitempty" yaml:"denied_commands,omitempty"` + PoolSize int `json:"pool_size,omitempty" yaml:"pool_size,omitempty"` + Timeout time.Duration `json:"timeout,omitempty" yaml:"timeout,omitempty"` + MemoryLimit string `json:"memory_limit,omitempty" yaml:"memory_limit,omitempty"` + CPULimit string `json:"cpu_limit,omitempty" yaml:"cpu_limit,omitempty"` + NetworkMode string `json:"network_mode,omitempty" yaml:"network_mode,omitempty"` + MountPaths []MountPath `json:"mount_paths,omitempty" yaml:"mount_paths,omitempty"` +} + +// MountPath represents a bind mount from host to container. +type MountPath struct { + Host string `json:"host" yaml:"host"` + Container string `json:"container" yaml:"container"` + ReadOnly bool `json:"read_only" yaml:"read_only"` +} + +// applyDefaults fills in zero-value fields with sensible defaults. +func (c *Config) applyDefaults() { + if c.PoolSize <= 0 { + c.PoolSize = 1 + } + if c.Timeout <= 0 { + c.Timeout = 30 * time.Second + } + if c.MemoryLimit == "" { + c.MemoryLimit = "256m" + } + if c.CPULimit == "" { + c.CPULimit = "0.5" + } + if c.NetworkMode == "" { + c.NetworkMode = "none" + } + if c.Image == "" { + c.Image = "ubuntu:22.04" + } +} diff --git a/pkg/sandbox/docker.go b/pkg/sandbox/docker.go new file mode 100644 index 00000000..922b2050 --- /dev/null +++ b/pkg/sandbox/docker.go @@ -0,0 +1,165 @@ +package sandbox + +import ( + "context" + "fmt" + "os/exec" + "strconv" + "strings" + "time" + + "github.com/Ingenimax/agent-sdk-go/pkg/logging" +) + +// DockerExecutor implements CommandExecutor using Docker containers. +type DockerExecutor struct { + config Config + allowlist *Allowlist + pool *Pool + logger logging.Logger +} + +// Compile-time check that DockerExecutor implements CommandExecutor. +var _ CommandExecutor = (*DockerExecutor)(nil) + +// NewDockerExecutor creates a new DockerExecutor, starts warm containers, and returns the executor. +// Fails fast if Docker is not available or the config is invalid. +func NewDockerExecutor(ctx context.Context, config Config, logger logging.Logger) (*DockerExecutor, error) { + if logger == nil { + logger = logging.New() + } + + // Verify Docker is available + dockerPath, err := exec.LookPath("docker") + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrDockerNotFound, err) + } + logger.Debug(ctx, "Docker found", map[string]interface{}{"path": dockerPath}) + + config.applyDefaults() + + allowlist := NewAllowlist(config.AllowedCommands, config.DeniedCommands) + + // Create warm containers + containers, err := createContainers(ctx, config, logger) + if err != nil { + return nil, fmt.Errorf("failed to create sandbox containers: %w", err) + } + + closeFn := func(ctx context.Context, id string) error { + return removeContainer(ctx, id, logger) + } + + return &DockerExecutor{ + config: config, + allowlist: allowlist, + pool: NewPool(containers, closeFn), + logger: logger, + }, nil +} + +// Command creates an exec.Cmd that runs inside a sandbox container via `docker exec`. +func (d *DockerExecutor) Command(ctx context.Context, name string, args ...string) (*exec.Cmd, error) { + if err := d.allowlist.Check(name); err != nil { + return nil, err + } + + container, err := d.pool.Acquire(ctx) + if err != nil { + return nil, err + } + + dockerArgs := make([]string, 0, 4+len(args)) + dockerArgs = append(dockerArgs, "exec", "-i", container.ID, name) + dockerArgs = append(dockerArgs, args...) + + cmd := exec.CommandContext(ctx, "docker", dockerArgs...) + return cmd, nil +} + +// Close stops and removes all sandbox containers. +func (d *DockerExecutor) Close(ctx context.Context) error { + return d.pool.Close(ctx) +} + +// createContainers starts warm containers based on config. +func createContainers(ctx context.Context, config Config, logger logging.Logger) ([]Container, error) { + containers := make([]Container, 0, config.PoolSize) + + for i := 0; i < config.PoolSize; i++ { + name := fmt.Sprintf("agent-sandbox-%d-%d", time.Now().UnixNano(), i) + + args := buildContainerArgs(config, name) + + logger.Info(ctx, "Creating sandbox container", map[string]interface{}{ + "name": name, + "image": config.Image, + }) + + cmd := exec.CommandContext(ctx, "docker", args...) + output, err := cmd.Output() + if err != nil { + // Clean up any containers that were created + for _, c := range containers { + _ = removeContainer(ctx, c.ID, logger) + } + return nil, fmt.Errorf("failed to create container %s: %w", name, err) + } + + containerID := strings.TrimSpace(string(output)) + containers = append(containers, Container{ + ID: containerID, + Name: name, + Ready: true, + CreatedAt: time.Now(), + }) + + logger.Info(ctx, "Sandbox container created", map[string]interface{}{ + "name": name, + "id": containerID, + }) + } + + return containers, nil +} + +// buildContainerArgs builds the docker run arguments from config. +func buildContainerArgs(config Config, name string) []string { + args := []string{ + "run", "-d", + "--name", name, + "--memory", config.MemoryLimit, + "--cpus", config.CPULimit, + "--network", config.NetworkMode, + "--read-only", + "--tmpfs", "/tmp:size=64m", + "--security-opt", "no-new-privileges", + "--cap-drop", "ALL", + "--pids-limit", strconv.Itoa(64), + } + + for _, mount := range config.MountPaths { + mountStr := mount.Host + ":" + mount.Container + if mount.ReadOnly { + mountStr += ":ro" + } + args = append(args, "-v", mountStr) + } + + args = append(args, config.Image, "sleep", "infinity") + return args +} + +// removeContainer stops and removes a container by ID. +func removeContainer(ctx context.Context, id string, logger logging.Logger) error { + cmd := exec.CommandContext(ctx, "docker", "rm", "-f", id) + if err := cmd.Run(); err != nil { + logger.Warn(ctx, "Failed to remove sandbox container", map[string]interface{}{ + "id": id, + "error": err.Error(), + }) + return err + } + logger.Debug(ctx, "Sandbox container removed", map[string]interface{}{"id": id}) + return nil +} diff --git a/pkg/sandbox/docker_integration_test.go b/pkg/sandbox/docker_integration_test.go new file mode 100644 index 00000000..9b35b708 --- /dev/null +++ b/pkg/sandbox/docker_integration_test.go @@ -0,0 +1,80 @@ +//go:build integration + +package sandbox + +import ( + "context" + "testing" + "time" + + "github.com/Ingenimax/agent-sdk-go/pkg/logging" +) + +func TestDockerExecutor_Integration_CreateAndExecute(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + logger := logging.New() + + executor, err := NewDockerExecutor(ctx, Config{ + Enabled: true, + Image: "alpine:3.19", + AllowedCommands: []string{"echo", "ls", "cat"}, + PoolSize: 1, + Timeout: 10 * time.Second, + }, logger) + if err != nil { + t.Fatalf("failed to create executor: %v", err) + } + defer executor.Close(ctx) + + // Test: execute an allowed command + cmd, err := executor.Command(ctx, "echo", "hello sandbox") + if err != nil { + t.Fatalf("failed to create command: %v", err) + } + output, err := cmd.Output() + if err != nil { + t.Fatalf("failed to run command: %v", err) + } + if got := string(output); got != "hello sandbox\n" { + t.Errorf("expected 'hello sandbox\\n', got %q", got) + } + + // Test: denied command + _, err = executor.Command(ctx, "rm", "-rf", "/") + if err == nil { + t.Error("expected error for denied command 'rm'") + } +} + +func TestDockerExecutor_Integration_ContainerIsolation(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + logger := logging.New() + + executor, err := NewDockerExecutor(ctx, Config{ + Enabled: true, + Image: "alpine:3.19", + AllowedCommands: []string{"cat"}, + PoolSize: 1, + NetworkMode: "none", + }, logger) + if err != nil { + t.Fatalf("failed to create executor: %v", err) + } + defer executor.Close(ctx) + + // Host's /etc/hostname should NOT be accessible as host content + cmd, err := executor.Command(ctx, "cat", "/etc/hostname") + if err != nil { + t.Fatalf("failed to create command: %v", err) + } + output, err := cmd.Output() + if err != nil { + t.Logf("cat /etc/hostname failed (expected in isolated container): %v", err) + return + } + t.Logf("container hostname: %s", string(output)) +} diff --git a/pkg/sandbox/docker_test.go b/pkg/sandbox/docker_test.go new file mode 100644 index 00000000..e11c335b --- /dev/null +++ b/pkg/sandbox/docker_test.go @@ -0,0 +1,134 @@ +package sandbox + +import ( + "context" + "errors" + "testing" +) + +func TestDockerExecutor_Command_DeniedByAllowlist(t *testing.T) { + executor := &DockerExecutor{ + config: Config{Enabled: true, Image: "ubuntu:22.04"}, + allowlist: NewAllowlist([]string{"git"}, nil), + pool: &Pool{ + containers: []Container{{ID: "test", Name: "test", Ready: true}}, + }, + } + + _, err := executor.Command(context.Background(), "rm", "-rf", "/") + if err == nil { + t.Fatal("expected error for denied command") + } + if !errors.Is(err, ErrCommandDenied) { + t.Errorf("expected ErrCommandDenied, got: %v", err) + } +} + +func TestDockerExecutor_Command_AllowedCommand(t *testing.T) { + executor := &DockerExecutor{ + config: Config{Enabled: true, Image: "ubuntu:22.04"}, + allowlist: NewAllowlist([]string{"git"}, nil), + pool: &Pool{ + containers: []Container{{ID: "abc123", Name: "sandbox-0", Ready: true}}, + }, + } + + cmd, err := executor.Command(context.Background(), "git", "status") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cmd == nil { + t.Fatal("expected non-nil cmd") + } + args := cmd.Args + if len(args) < 6 { + t.Fatalf("expected at least 6 docker exec args, got: %v", args) + } + if args[1] != "exec" { + t.Errorf("expected 'exec', got %q", args[1]) + } + if args[2] != "-i" { + t.Errorf("expected '-i', got %q", args[2]) + } + if args[3] != "abc123" { + t.Errorf("expected container ID 'abc123', got %q", args[3]) + } + if args[4] != "git" { + t.Errorf("expected command 'git', got %q", args[4]) + } + if args[5] != "status" { + t.Errorf("expected arg 'status', got %q", args[5]) + } +} + +func TestDockerExecutor_Close(t *testing.T) { + var closed []string + closeFn := func(ctx context.Context, id string) error { + closed = append(closed, id) + return nil + } + executor := &DockerExecutor{ + config: Config{Enabled: true}, + pool: &Pool{ + containers: []Container{{ID: "abc", Name: "s-0", Ready: true}}, + closeFn: closeFn, + }, + } + + if err := executor.Close(context.Background()); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(closed) != 1 || closed[0] != "abc" { + t.Errorf("expected container 'abc' to be closed, got: %v", closed) + } +} + +func TestBuildContainerArgs(t *testing.T) { + config := Config{ + Image: "ubuntu:22.04", + MemoryLimit: "256m", + CPULimit: "0.5", + NetworkMode: "none", + MountPaths: []MountPath{ + {Host: "/data", Container: "/mnt/data", ReadOnly: true}, + }, + } + + args := buildContainerArgs(config, "test-container") + + // Check key flags are present + expected := map[string]bool{ + "run": false, "-d": false, "--read-only": false, + "--cap-drop": false, "ALL": false, + "--security-opt": false, "no-new-privileges": false, + } + for _, arg := range args { + if _, ok := expected[arg]; ok { + expected[arg] = true + } + } + for flag, found := range expected { + if !found { + t.Errorf("expected flag %q in args", flag) + } + } + + // Check mount is present with :ro suffix + foundMount := false + for _, arg := range args { + if arg == "/data:/mnt/data:ro" { + foundMount = true + } + } + if !foundMount { + t.Error("expected mount /data:/mnt/data:ro in args") + } + + // Last args should be image + sleep infinity + if args[len(args)-3] != "ubuntu:22.04" { + t.Errorf("expected image as third-to-last arg, got %q", args[len(args)-3]) + } + if args[len(args)-2] != "sleep" || args[len(args)-1] != "infinity" { + t.Error("expected 'sleep infinity' as last args") + } +} diff --git a/pkg/sandbox/errors.go b/pkg/sandbox/errors.go new file mode 100644 index 00000000..a807e24d --- /dev/null +++ b/pkg/sandbox/errors.go @@ -0,0 +1,20 @@ +package sandbox + +import "errors" + +var ( + // ErrCommandDenied is returned when a command is not in the allowlist. + ErrCommandDenied = errors.New("sandbox: command not in allowlist") + + // ErrDockerNotFound is returned when the docker binary is not available. + ErrDockerNotFound = errors.New("sandbox: docker binary not found") + + // ErrContainerUnhealthy is returned when no healthy container is available. + ErrContainerUnhealthy = errors.New("sandbox: container not ready") + + // ErrCommandTimeout is returned when a command exceeds the configured timeout. + ErrCommandTimeout = errors.New("sandbox: command execution timed out") + + // ErrSandboxDisabled is returned when sandbox is not enabled but executor is called. + ErrSandboxDisabled = errors.New("sandbox: not enabled") +) diff --git a/pkg/sandbox/pool.go b/pkg/sandbox/pool.go new file mode 100644 index 00000000..39064d71 --- /dev/null +++ b/pkg/sandbox/pool.go @@ -0,0 +1,81 @@ +package sandbox + +import ( + "context" + "fmt" + "sync" + "time" +) + +// Container represents a running sandbox container. +type Container struct { + ID string + Name string + Ready bool + CreatedAt time.Time +} + +// Pool manages a set of warm sandbox containers with round-robin selection. +type Pool struct { + containers []Container + mu sync.Mutex + nextIdx int + closeFn func(ctx context.Context, id string) error +} + +// NewPool creates a pool with pre-created containers. +func NewPool(containers []Container, closeFn func(ctx context.Context, id string) error) *Pool { + return &Pool{ + containers: containers, + closeFn: closeFn, + } +} + +// Acquire returns the next available container using round-robin selection. +func (p *Pool) Acquire(ctx context.Context) (*Container, error) { + p.mu.Lock() + defer p.mu.Unlock() + + if len(p.containers) == 0 { + return nil, ErrContainerUnhealthy + } + + c := &p.containers[p.nextIdx] + p.nextIdx = (p.nextIdx + 1) % len(p.containers) + + if !c.Ready { + return nil, fmt.Errorf("%w: container %s is not ready", ErrContainerUnhealthy, c.Name) + } + + return c, nil +} + +// Close stops and removes all containers in the pool. +func (p *Pool) Close(ctx context.Context) error { + p.mu.Lock() + defer p.mu.Unlock() + + var lastErr error + for _, c := range p.containers { + if p.closeFn != nil { + if err := p.closeFn(ctx, c.ID); err != nil { + lastErr = err + } + } + } + p.containers = nil + return lastErr +} + +// MarkUnhealthy marks a container as not ready. +func (p *Pool) MarkUnhealthy(id string) { + p.mu.Lock() + defer p.mu.Unlock() + + for i := range p.containers { + if p.containers[i].ID == id { + p.containers[i].Ready = false + break + } + } +} diff --git a/pkg/sandbox/pool_test.go b/pkg/sandbox/pool_test.go new file mode 100644 index 00000000..12ac1b0b --- /dev/null +++ b/pkg/sandbox/pool_test.go @@ -0,0 +1,89 @@ +package sandbox + +import ( + "context" + "testing" +) + +func TestPool_Acquire_RoundRobin(t *testing.T) { + containers := []Container{ + {ID: "abc123", Name: "sandbox-0", Ready: true}, + {ID: "def456", Name: "sandbox-1", Ready: true}, + } + p := &Pool{containers: containers} + + c1, err := p.Acquire(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + c2, err := p.Acquire(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + c3, err := p.Acquire(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if c1.ID != "abc123" { + t.Errorf("expected first container, got %s", c1.ID) + } + if c2.ID != "def456" { + t.Errorf("expected second container, got %s", c2.ID) + } + if c3.ID != "abc123" { + t.Errorf("expected round-robin back to first, got %s", c3.ID) + } +} + +func TestPool_Acquire_EmptyPool(t *testing.T) { + p := &Pool{containers: nil} + _, err := p.Acquire(context.Background()) + if err == nil { + t.Error("expected error for empty pool") + } +} + +func TestPool_Acquire_UnhealthyContainer(t *testing.T) { + containers := []Container{ + {ID: "abc123", Name: "sandbox-0", Ready: false}, + } + p := &Pool{containers: containers} + _, err := p.Acquire(context.Background()) + if err == nil { + t.Error("expected error for unhealthy container") + } +} + +func TestPool_MarkUnhealthy(t *testing.T) { + containers := []Container{ + {ID: "abc123", Name: "sandbox-0", Ready: true}, + } + p := &Pool{containers: containers} + p.MarkUnhealthy("abc123") + + _, err := p.Acquire(context.Background()) + if err == nil { + t.Error("expected error after marking container unhealthy") + } +} + +func TestPool_Close(t *testing.T) { + var closed []string + closeFn := func(ctx context.Context, id string) error { + closed = append(closed, id) + return nil + } + containers := []Container{ + {ID: "abc123", Name: "sandbox-0", Ready: true}, + {ID: "def456", Name: "sandbox-1", Ready: true}, + } + p := &Pool{containers: containers, closeFn: closeFn} + + if err := p.Close(context.Background()); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(closed) != 2 { + t.Errorf("expected 2 containers closed, got %d", len(closed)) + } +} diff --git a/pkg/sandbox/sandbox.go b/pkg/sandbox/sandbox.go new file mode 100644 index 00000000..82644f26 --- /dev/null +++ b/pkg/sandbox/sandbox.go @@ -0,0 +1,29 @@ +package sandbox + +import ( + "context" + "os/exec" +) + +// CommandExecutor creates exec.Cmd instances, optionally sandboxed. +// Returns *exec.Cmd so MCP's CommandTransport can attach stdin/stdout pipes. +type CommandExecutor interface { + // Command creates an exec.Cmd for the given command and args. + Command(ctx context.Context, name string, args ...string) (*exec.Cmd, error) + // Close releases sandbox resources (stops containers, etc.). + Close(ctx context.Context) error +} + +// LocalExecutor runs commands directly on the host with no sandboxing. +// This is the default executor when no sandbox is configured. +type LocalExecutor struct{} + +// Command creates an exec.Cmd that runs directly on the host. +func (l *LocalExecutor) Command(ctx context.Context, name string, args ...string) (*exec.Cmd, error) { + return exec.CommandContext(ctx, name, args...), nil +} + +// Close is a no-op for LocalExecutor. +func (l *LocalExecutor) Close(_ context.Context) error { + return nil +} diff --git a/pkg/sandbox/sandbox_test.go b/pkg/sandbox/sandbox_test.go new file mode 100644 index 00000000..a338ee1e --- /dev/null +++ b/pkg/sandbox/sandbox_test.go @@ -0,0 +1,37 @@ +package sandbox + +import ( + "context" + "testing" +) + +func TestLocalExecutor_Command(t *testing.T) { + executor := &LocalExecutor{} + cmd, err := executor.Command(context.Background(), "echo", "hello") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cmd == nil { + t.Fatal("expected non-nil cmd") + } + if cmd.Path == "" { + t.Error("expected cmd.Path to be set") + } + output, err := cmd.Output() + if err != nil { + t.Fatalf("failed to run cmd: %v", err) + } + if string(output) != "hello\n" { + t.Errorf("unexpected output: %q", string(output)) + } +} + +func TestLocalExecutor_Close(t *testing.T) { + executor := &LocalExecutor{} + if err := executor.Close(context.Background()); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +// Compile-time check that LocalExecutor implements CommandExecutor +var _ CommandExecutor = (*LocalExecutor)(nil) diff --git a/pkg/tools/websearch/websearch.go b/pkg/tools/websearch/websearch.go index 72482535..a32882a8 100644 --- a/pkg/tools/websearch/websearch.go +++ b/pkg/tools/websearch/websearch.go @@ -173,11 +173,11 @@ func (t *Tool) Run(ctx context.Context, input string) (string, error) { // Format results var sb strings.Builder - sb.WriteString(fmt.Sprintf("Search results for '%s':\n\n", query)) + fmt.Fprintf(&sb, "Search results for '%s':\n\n", query) for i, item := range result.Items { - sb.WriteString(fmt.Sprintf("%d. %s\n", i+1, item.Title)) - sb.WriteString(fmt.Sprintf(" URL: %s\n", item.Link)) - sb.WriteString(fmt.Sprintf(" %s\n\n", item.Snippet)) + fmt.Fprintf(&sb, "%d. %s\n", i+1, item.Title) + fmt.Fprintf(&sb, " URL: %s\n", item.Link) + fmt.Fprintf(&sb, " %s\n\n", item.Snippet) } // Cache result