Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cmd/ax/internal/cliutil/cliutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,15 @@ func NewControllerFromConfig(ctx context.Context, cfg *config.Config) (*controll
}
}

// agent-sandbox backend (kubernetes-sigs/agent-sandbox). Parallel to
// SubstrateAgents above but targets the GA-stable Sandbox CRDs instead
// of Substrate's ate.dev resources, so it works on managed GKE
// (Autopilot or Standard) without privileged DaemonSet requirements.
for _, agentCfg := range cfg.Registry.AgentSandboxAgents {
if err := c.Registry().RegisterAgentSandbox(ctx, agentCfg); err != nil {
return nil, fmt.Errorf("failed to register agent-sandbox agent %s: %w", agentCfg.ID, err)
}
}

return c, nil
}
25 changes: 25 additions & 0 deletions examples/python_sandbox_agent/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Multi-stage build for the python_sandbox_agent gRPC server.
#
# Stage 1 (golang): cross-compile the Go binary statically so it runs in
# a distroless container.
# Stage 2 (python:3.12-slim): provides /usr/bin/python3 for the executor
# subprocess. Distroless lacks python.
#
# Image must be a child of a Python base because the agent shells out to
# python3 to execute user code. The surrounding agent-sandbox Pod (gVisor)
# provides the actual isolation; this container just runs python locally.

FROM golang:1.26 AS build
WORKDIR /src
COPY go.mod go.sum ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
go build -trimpath -ldflags='-s -w' \
-o /out/agent ./examples/python_sandbox_agent

FROM python:3.12-slim
COPY --from=build /out/agent /usr/local/bin/agent
USER 1000:1000
EXPOSE 8494
ENTRYPOINT ["/usr/local/bin/agent"]
95 changes: 95 additions & 0 deletions examples/python_sandbox_agent/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# python_sandbox_agent

User-code half of the `feat/agent-sandbox-backend` integration. Implements
`proto.AgentService` over gRPC on `:8494` inside a
`kubernetes-sigs/agent-sandbox` Sandbox pod. The body of `Connect()`
extracts the last user-role message's text, runs it as Python 3, and
streams stdout/stderr back as a single `AgentResponse`.

## Where it fits

```
AX server (ax serve)
│ AgentSandboxAgent (this branch)
SandboxClaim → Sandbox CR → gVisor pod ← this image runs here
└─ python_sandbox_agent gRPC :8494
└─ python3 (subprocess executor)
```

The surrounding Sandbox (gVisor) provides the kernel-level isolation that
makes it safe to run untrusted LLM-generated code. This binary itself is
small and unprivileged — it just runs `python3 -c <source>`.

## Build

```bash
docker build \
-f examples/python_sandbox_agent/Dockerfile \
-t <registry>/<project>/python-sandbox-agent:<tag> .
```

Push the resulting image to a registry your `SandboxTemplate` can pull from.

## Configure AX to use it

In `ax.yaml`:

```yaml
registry:
agent_sandbox_agents:
- id: py
name: Python Sandbox Agent
namespace: agent-platform
template: python-sandbox-template
port: 8494
protocol: axp
```

And in `agent-platform` namespace, a `SandboxTemplate` that points at
this image:

```yaml
apiVersion: extensions.agents.x-k8s.io/v1alpha1
kind: SandboxTemplate
metadata:
name: python-sandbox-template
namespace: agent-platform
spec:
podTemplate:
spec:
runtimeClassName: gvisor
restartPolicy: Never
automountServiceAccountToken: false
containers:
- name: agent
image: <registry>/<project>/python-sandbox-agent:latest
ports: [{ containerPort: 8494 }]
readinessProbe:
tcpSocket: { port: 8494 }
periodSeconds: 2
failureThreshold: 30
resources:
requests: { cpu: 100m, memory: 256Mi }
limits: { cpu: 500m, memory: 512Mi }
```

## Smoke test (no K8s)

```bash
go run ./examples/python_sandbox_agent &
grpcurl -plaintext \
-d '{"start":{"messages":[{"role":"user","content":{"text":{"text":"print(7*6)"}}}]}}' \
localhost:8494 ax.AgentService/Connect
# => exit_code=0\nstdout:\n42\n\nstderr:\n
```

## Tests

```bash
go test ./examples/python_sandbox_agent/...
```

7 tests, all PASS — covering the executor injection contract, last-user
extraction, error paths, exec timeout, and non-zero exit handling.
162 changes: 162 additions & 0 deletions examples/python_sandbox_agent/internal/server/server.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package server implements the proto.AgentService for the
// python_sandbox_agent. Split from package main so it's importable in tests.
package server

import (
"bytes"
"context"
"errors"
"fmt"
"os/exec"
"time"

"google.golang.org/grpc"

"github.com/google/ax/proto"
)

// Server is the AgentService implementation. Each Connect RPC pulls the
// latest user-message text, runs it as Python, and streams stdout + a
// terminal AgentEnd back to the AX controller.
type Server struct {
proto.UnimplementedAgentServiceServer

// executor lets tests inject a fake without spawning a real subprocess.
executor pythonExecutor

// execTimeout caps how long a single tool call can run.
execTimeout time.Duration
}

// pythonExecutor runs the given source as Python 3 and returns stdout +
// stderr + exit code. Implementations should respect ctx for cancellation.
type pythonExecutor interface {
Run(ctx context.Context, source string) (stdout, stderr string, exitCode int, err error)
}

// New builds a Server with sane defaults. The python executor uses the
// `python3` binary that must be present in the container PATH.
func New(opts ...Option) *Server {
s := &Server{
executor: &subprocessPythonExecutor{},
execTimeout: 60 * time.Second,
}
for _, opt := range opts {
opt(s)
}
return s
}

// Option configures a Server at construction.
type Option func(*Server)

// WithExecutor injects a custom pythonExecutor (tests).
func WithExecutor(e pythonExecutor) Option {
return func(s *Server) { s.executor = e }
}

// WithExecTimeout overrides the per-call execution timeout.
func WithExecTimeout(d time.Duration) Option {
return func(s *Server) { s.execTimeout = d }
}

// Connect implements proto.AgentService. Single-turn: read the Python
// source from the typed AgentStart.subagent_prompt (planner-driven
// invocations) or from the last user-text Message (direct callers like
// grpcurl-driven smokes), execute it, stream one AgentResponse, return.
func (s *Server) Connect(req *proto.AgentRequest, stream grpc.ServerStreamingServer[proto.AgentResponse]) error {
start := req.GetStart()
if start == nil {
return errors.New("AgentRequest.Start is required")
}
source := start.GetSubagentPrompt()
if source == "" {
var ok bool
source, ok = lastUserText(start.Messages)
if !ok {
return errors.New("no subagent_prompt and no user-text Message present in AgentStart")
}
}

ctx, cancel := context.WithTimeout(stream.Context(), s.execTimeout)
defer cancel()

stdout, stderr, exitCode, runErr := s.executor.Run(ctx, source)

body := fmt.Sprintf("exit_code=%d\nstdout:\n%s\nstderr:\n%s", exitCode, stdout, stderr)
if runErr != nil {
body = fmt.Sprintf("python execution failed: %v\n\n%s", runErr, body)
}

resp := &proto.AgentResponse{
ConversationId: req.ConversationId,
ExecId: req.ExecId,
Type: &proto.AgentResponse_Outputs{
Outputs: &proto.AgentOutputs{
Messages: []*proto.Message{{
Role: "assistant",
Content: &proto.Content{
Type: &proto.Content_Text{
Text: &proto.TextContent{Text: body},
},
},
}},
},
},
}
if err := stream.Send(resp); err != nil {
return fmt.Errorf("send response: %w", err)
}
return nil
}

// lastUserText returns the text content of the most recent user-role
// message. Filters out non-text content.
func lastUserText(msgs []*proto.Message) (string, bool) {
for i := len(msgs) - 1; i >= 0; i-- {
m := msgs[i]
if m == nil {
continue
}
t := m.GetContent().GetText()
if t == nil {
continue
}
return t.Text, true
}
return "", false
}

// subprocessPythonExecutor is the production executor; runs `python3 -c
// <source>` and captures stdio. The surrounding Sandbox/gVisor pod
// provides the actual isolation — this just needs to exec the local
// python3 binary.
type subprocessPythonExecutor struct{}

func (subprocessPythonExecutor) Run(ctx context.Context, source string) (string, string, int, error) {
cmd := exec.CommandContext(ctx, "python3", "-c", source)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err := cmd.Run()
exitCode := 0
if exitErr, ok := err.(*exec.ExitError); ok {
exitCode = exitErr.ExitCode()
err = nil // non-zero exit is expected for some programs; not a runner error
}
return stdout.String(), stderr.String(), exitCode, err
}
Loading