From 0bbbafbc3c245c58da0b4b54012dd25d7a1ce59b Mon Sep 17 00:00:00 2001 From: Lex Christopherson Date: Fri, 1 May 2026 12:11:46 -0600 Subject: [PATCH] feat: add ambient browser runtime bridge --- go.mod | 2 +- go.sum | 4 +- internal/browser/manager.go | 187 +++++++++++++++--- internal/browser/manager_test.go | 84 +++++++- internal/browser/runtime_probe.go | 152 ++++++++++++++ internal/browser/runtime_probe_test.go | 87 ++++++++ internal/browser/types.go | 24 +++ internal/loop/browser_rpc.go | 184 +++++++++++++++++ internal/loop/daemon.go | 58 ++++-- internal/loop/daemon_test.go | 39 ++++ internal/pi/executor.go | 105 ++++++---- internal/pi/extension/browser-extension.ts | 181 +++++++++++++++++ .../browser-methods.generated.test.mjs | 24 +++ .../pi/extension/browser-methods.generated.ts | 95 +++++++++ internal/pi/extension/browser-methods.ts | 92 +-------- internal/pi/extension/browser-tool.test.mjs | 28 +-- .../pi/extension/gsd-browser-skill/SKILL.md | 26 +++ internal/pi/extension/index.ts | 183 +---------------- internal/relay/conn.go | 24 ++- internal/session/actor.go | 85 +++++--- 20 files changed, 1275 insertions(+), 389 deletions(-) create mode 100644 internal/browser/runtime_probe.go create mode 100644 internal/browser/runtime_probe_test.go create mode 100644 internal/loop/browser_rpc.go create mode 100644 internal/pi/extension/browser-extension.ts create mode 100644 internal/pi/extension/browser-methods.generated.test.mjs create mode 100644 internal/pi/extension/browser-methods.generated.ts create mode 100644 internal/pi/extension/gsd-browser-skill/SKILL.md diff --git a/go.mod b/go.mod index 25fb356..3c4faa1 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.26.2 require ( github.com/coder/websocket v1.8.14 github.com/creack/pty v1.1.24 - github.com/gsd-build/protocol-go v0.33.0 + github.com/gsd-build/protocol-go v0.34.0 github.com/spf13/cobra v1.10.2 gopkg.in/natefinch/lumberjack.v2 v2.2.1 ) diff --git a/go.sum b/go.sum index ac197e2..706f753 100644 --- a/go.sum +++ b/go.sum @@ -3,8 +3,8 @@ github.com/coder/websocket v1.8.14/go.mod h1:NX3SzP+inril6yawo5CQXx8+fk145lPDC6p github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE= -github.com/gsd-build/protocol-go v0.33.0 h1:/UBKhB5bcW7QVvGNDH0h7KZIaVVqvE9/OtYi0uH4RrI= -github.com/gsd-build/protocol-go v0.33.0/go.mod h1:vECSwMFp59Ihu5ZH4aLF5fuW9zJ4a3ZXCYngmzfBn8s= +github.com/gsd-build/protocol-go v0.34.0 h1:Au69NlKq4NULgmUvleNod+PUVa1AVy9HjGyO5o+7TMI= +github.com/gsd-build/protocol-go v0.34.0/go.mod h1:vECSwMFp59Ihu5ZH4aLF5fuW9zJ4a3ZXCYngmzfBn8s= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= diff --git a/internal/browser/manager.go b/internal/browser/manager.go index 6748527..2ee3eda 100644 --- a/internal/browser/manager.go +++ b/internal/browser/manager.go @@ -2,6 +2,7 @@ package browser import ( "context" + "encoding/json" "fmt" "sync" "time" @@ -58,9 +59,49 @@ type Grant struct { GrantID string BrowserID string SessionID string + ChannelID string TaskID string } +func (m *Manager) Ensure(ctx context.Context, req EnsureRequest) (Grant, error) { + if grant, ok := m.GrantForTask(req.TaskID); ok { + return grant, nil + } + if grant, ok := m.GrantForSession(req.SessionID); ok { + return grant, nil + } + if req.GrantID == "" { + return Grant{}, fmt.Errorf("browser grant missing") + } + if req.SessionID == "" { + return Grant{}, fmt.Errorf("browser session missing") + } + if req.ExpiresAt == "" { + return Grant{}, fmt.Errorf("browser grant expiry missing") + } + if err := m.Open(ctx, &protocol.BrowserSessionOpen{ + Type: protocol.MsgTypeBrowserSessionOpen, + RequestID: fmt.Sprintf("browser_lazy_%d", time.Now().UnixNano()), + GrantID: req.GrantID, + SessionID: req.SessionID, + ProjectID: req.ProjectID, + TaskID: req.TaskID, + ChannelID: req.ChannelID, + MachineID: req.MachineID, + Mode: "clean", + ExpiresAt: req.ExpiresAt, + }); err != nil { + return Grant{}, err + } + if grant, ok := m.GrantForTask(req.TaskID); ok { + return grant, nil + } + if grant, ok := m.GrantForSession(req.SessionID); ok { + return grant, nil + } + return Grant{}, fmt.Errorf("browser grant unavailable after open") +} + func (m *Manager) Open(ctx context.Context, msg *protocol.BrowserSessionOpen) error { expiresAt, err := time.Parse(time.RFC3339Nano, msg.ExpiresAt) if err != nil { @@ -278,6 +319,7 @@ func (m *Manager) grantForStateLocked(state *sessionState) (Grant, bool) { GrantID: state.openRequest.GrantID, BrowserID: state.browserID, SessionID: state.openRequest.SessionID, + ChannelID: state.openRequest.ChannelID, TaskID: state.openRequest.TaskID, }, true } @@ -463,36 +505,41 @@ func (m *Manager) removeStateLocked(state *sessionState) { } func (m *Manager) Tool(ctx context.Context, msg *protocol.BrowserToolCall) error { + _, err := m.ToolResult(ctx, msg) + return err +} + +func (m *Manager) ToolResult(ctx context.Context, msg *protocol.BrowserToolCall) (ToolResult, error) { m.mu.Lock() state, ok := m.byID[msg.BrowserID] if !ok { m.mu.Unlock() - return fmt.Errorf("browser session not found") + return ToolResult{}, fmt.Errorf("browser session not found") } if time.Now().After(state.expiresAt) { m.mu.Unlock() - return fmt.Errorf("browser grant expired") + return ToolResult{}, fmt.Errorf("browser grant expired") } if state.owner != OwnerAgent { m.mu.Unlock() - return fmt.Errorf("browser control belongs to %s", state.owner) + return ToolResult{}, fmt.Errorf("browser control belongs to %s", state.owner) } req := state.openRequest risk := classifyBrowserTool(msg.Method, msg.ParamsJSON) - if msg.Method == "vault_save" { + summary := browserToolSummary(msg.Method, risk) + if isCredentialBrowserTool(msg.Method, risk) { m.mu.Unlock() - if err := m.sender.Send(ctx, &protocol.BrowserToolResult{ - Type: protocol.MsgTypeBrowserToolResult, - BrowserID: msg.BrowserID, - GrantID: msg.GrantID, - TaskID: msg.TaskID, - ToolUseID: msg.ToolUseID, + _ = m.sendToolStarted(ctx, msg, req, risk, summary) + _ = m.sendToolUpdated(ctx, msg, req, "rejected", summary, nil) + result := ToolResult{ OK: false, - Error: "agent-initiated vault_save is not allowed", - }); err != nil { - return fmt.Errorf("send browser vault_save rejection: %w", err) + Error: "browser credential methods are not available to agents", + ErrorCode: "feature_not_enabled", + } + if err := m.sendToolResult(ctx, msg, req, result); err != nil { + return result, fmt.Errorf("send browser credential method rejection: %w", err) } - return fmt.Errorf("agent-initiated vault_save is not allowed") + return result, fmt.Errorf("browser credential methods are not available to agents") } if browserRiskRequiresApproval(risk) { previousOwner := state.owner @@ -501,6 +548,8 @@ func (m *Manager) Tool(ctx context.Context, msg *protocol.BrowserToolCall) error state.controlVersion++ nextVersion := state.controlVersion m.mu.Unlock() + _ = m.sendToolStarted(ctx, msg, req, risk, summary) + _ = m.sendToolUpdated(ctx, msg, req, "approval_required", summary, nil) requestID := fmt.Sprintf("browser_sensitive_%d", time.Now().UnixNano()) if err := m.sender.Send(ctx, &protocol.BrowserSensitiveActionRequest{ Type: protocol.MsgTypeBrowserSensitiveActionRequest, @@ -522,27 +571,115 @@ func (m *Manager) Tool(ctx context.Context, msg *protocol.BrowserToolCall) error current.controlVersion = previousVersion } m.mu.Unlock() - return fmt.Errorf("send browser sensitive action request: %w", err) + return ToolResult{}, fmt.Errorf("send browser sensitive action request: %w", err) } - return fmt.Errorf("browser action requires approval: %s", risk) + return ToolResult{OK: false, Error: "browser action requires approval", ErrorCode: "approval_required"}, fmt.Errorf("browser action requires approval: %s", risk) } m.mu.Unlock() + if err := m.sendToolStarted(ctx, msg, req, risk, summary); err != nil { + return ToolResult{}, err + } result, err := m.service.Tool(ctx, msg.BrowserID, msg.Method, msg.ParamsJSON) if err != nil { - return err + result = ToolResult{OK: false, Error: err.Error(), ErrorCode: "browser_tool_failed"} + _ = m.sendToolUpdated(ctx, msg, req, "error", summary, nil) + _ = m.sendToolResult(ctx, msg, req, result) + return result, err + } + status := "ok" + if !result.OK { + status = "error" } + if err := m.sendToolUpdated(ctx, msg, req, status, summary, result.ResultJSON); err != nil { + return result, err + } + return result, m.sendToolResult(ctx, msg, req, result) +} + +func (m *Manager) sendToolStarted(ctx context.Context, msg *protocol.BrowserToolCall, req OpenRequest, risk BrowserRisk, summary string) error { + return m.sender.Send(ctx, &protocol.BrowserToolCallStarted{ + Type: protocol.MsgTypeBrowserToolCallStarted, + BrowserID: msg.BrowserID, + GrantID: msg.GrantID, + SessionID: req.SessionID, + ChannelID: req.ChannelID, + TaskID: msg.TaskID, + ToolUseID: msg.ToolUseID, + Method: msg.Method, + Category: string(risk), + Summary: summary, + Metadata: safeToolMetadata(msg.Method, msg.ParamsJSON), + At: time.Now().UTC().Format(time.RFC3339Nano), + }) +} + +func (m *Manager) sendToolUpdated(ctx context.Context, msg *protocol.BrowserToolCall, req OpenRequest, status string, summary string, metadata json.RawMessage) error { + return m.sender.Send(ctx, &protocol.BrowserToolCallUpdated{ + Type: protocol.MsgTypeBrowserToolCallUpdated, + BrowserID: msg.BrowserID, + GrantID: msg.GrantID, + SessionID: req.SessionID, + ChannelID: req.ChannelID, + TaskID: msg.TaskID, + ToolUseID: msg.ToolUseID, + Status: status, + Summary: summary, + Metadata: metadata, + At: time.Now().UTC().Format(time.RFC3339Nano), + }) +} + +func (m *Manager) sendToolResult(ctx context.Context, msg *protocol.BrowserToolCall, req OpenRequest, result ToolResult) error { return m.sender.Send(ctx, &protocol.BrowserToolResult{ - Type: protocol.MsgTypeBrowserToolResult, - BrowserID: msg.BrowserID, - GrantID: msg.GrantID, - TaskID: msg.TaskID, - ToolUseID: msg.ToolUseID, - OK: result.OK, - ResultJSON: result.ResultJSON, - Error: result.Error, + Type: protocol.MsgTypeBrowserToolResult, + BrowserID: msg.BrowserID, + GrantID: msg.GrantID, + SessionID: req.SessionID, + ChannelID: req.ChannelID, + TaskID: msg.TaskID, + ToolUseID: msg.ToolUseID, + OK: result.OK, + ResultJSON: result.ResultJSON, + Error: result.Error, + ErrorCode: result.ErrorCode, + Sensitivity: "public", + RedactionStatus: "not_needed", }) } func browserApprovalSummary(method string, risk BrowserRisk) string { return fmt.Sprintf("Run browser method %s (%s)", method, risk) } + +func browserToolSummary(method string, risk BrowserRisk) string { + return fmt.Sprintf("Run browser method %s (%s)", method, risk) +} + +func isCredentialBrowserTool(method string, risk BrowserRisk) bool { + if risk == BrowserRiskCredentialAuth { + return true + } + switch method { + case "save_state", "restore_state", "vault_save", "vault_login", "vault_list": + return true + default: + return false + } +} + +func safeToolMetadata(method string, params json.RawMessage) json.RawMessage { + if method != "navigate" || len(params) == 0 { + return nil + } + var payload struct { + URL string `json:"url"` + } + if err := json.Unmarshal(params, &payload); err != nil || payload.URL == "" { + return nil + } + data, err := json.Marshal(map[string]string{"url": payload.URL}) + if err != nil { + return nil + } + return data +} diff --git a/internal/browser/manager_test.go b/internal/browser/manager_test.go index 51a5505..235c89f 100644 --- a/internal/browser/manager_test.go +++ b/internal/browser/manager_test.go @@ -266,7 +266,7 @@ func TestManagerBlocksSensitiveToolUntilApproval(t *testing.T) { GrantID: "grant_1", TaskID: "task_1", ToolUseID: "tool_1", - Method: "vault_login", + Method: "fill_form", }) if err == nil { @@ -283,6 +283,86 @@ func TestManagerBlocksSensitiveToolUntilApproval(t *testing.T) { } } +func TestManagerSendsToolLifecycleEventsAndResultContext(t *testing.T) { + service := &fakeService{} + sender := &recordingSender{} + m := NewManager(ManagerOptions{Service: service, Sender: sender, FrameInterval: time.Hour}) + openBrowserForTest(t, m, "browser_1") + + err := m.Tool(context.Background(), &protocol.BrowserToolCall{ + Type: protocol.MsgTypeBrowserToolCall, + BrowserID: "browser_1", + GrantID: "grant_1", + TaskID: "task_1", + ToolUseID: "tool_1", + Method: "navigate", + ParamsJSON: json.RawMessage(`{"url":"https://example.com"}`), + }) + if err != nil { + t.Fatalf("tool: %v", err) + } + + var started *protocol.BrowserToolCallStarted + var updated *protocol.BrowserToolCallUpdated + var result *protocol.BrowserToolResult + for _, msg := range sender.snapshot() { + switch typed := msg.(type) { + case *protocol.BrowserToolCallStarted: + started = typed + case *protocol.BrowserToolCallUpdated: + updated = typed + case *protocol.BrowserToolResult: + result = typed + } + } + if started == nil || started.SessionID != "session_1" || started.ChannelID != "channel_1" || started.Category != string(BrowserRiskInspection) { + t.Fatalf("started = %+v", started) + } + if updated == nil || updated.Status != "ok" || updated.SessionID != "session_1" || updated.ChannelID != "channel_1" { + t.Fatalf("updated = %+v", updated) + } + if result == nil || !result.OK || result.SessionID != "session_1" || result.ChannelID != "channel_1" || result.RedactionStatus != "not_needed" { + t.Fatalf("result = %+v", result) + } +} + +func TestManagerRejectsCredentialMethodsWithoutApproval(t *testing.T) { + service := &fakeService{} + sender := &recordingSender{} + m := NewManager(ManagerOptions{Service: service, Sender: sender, FrameInterval: time.Hour}) + openBrowserForTest(t, m, "browser_1") + + err := m.Tool(context.Background(), &protocol.BrowserToolCall{ + Type: protocol.MsgTypeBrowserToolCall, + BrowserID: "browser_1", + GrantID: "grant_1", + TaskID: "task_1", + ToolUseID: "tool_1", + Method: "vault_login", + }) + if err == nil { + t.Fatal("expected credential method rejection") + } + if sender.hasType(protocol.MsgTypeBrowserSensitiveActionRequest) { + t.Fatalf("credential method should not request approval") + } + var result *protocol.BrowserToolResult + for _, msg := range sender.snapshot() { + if typed, ok := msg.(*protocol.BrowserToolResult); ok { + result = typed + } + } + if result == nil || result.OK || result.ErrorCode != "feature_not_enabled" { + t.Fatalf("result = %+v", result) + } + service.mu.Lock() + toolCalls := service.toolCalls + service.mu.Unlock() + if toolCalls != 0 { + t.Fatalf("credential method reached service") + } +} + func TestManagerRollsBackApprovalOwnerWhenRequestSendFails(t *testing.T) { service := &fakeService{} m := NewManager(ManagerOptions{Service: service, Sender: &recordingSender{}, FrameInterval: time.Hour}) @@ -295,7 +375,7 @@ func TestManagerRollsBackApprovalOwnerWhenRequestSendFails(t *testing.T) { GrantID: "grant_1", TaskID: "task_1", ToolUseID: "tool_1", - Method: "vault_login", + Method: "fill_form", }) if err == nil { diff --git a/internal/browser/runtime_probe.go b/internal/browser/runtime_probe.go new file mode 100644 index 0000000..b012fef --- /dev/null +++ b/internal/browser/runtime_probe.go @@ -0,0 +1,152 @@ +package browser + +import ( + "context" + "encoding/json" + "os" + "os/exec" + "regexp" + "strings" + "time" +) + +const RequiredRuntimeVersion = "0.1.20" + +type RuntimeStatus struct { + Installed bool + Version string + MinVersion string + MinVersionOK bool + Path string + ErrorCode string + ErrorMessage string + CloudMethodsVersion int + ChromeAvailable bool + Ready bool +} + +type methodManifest struct { + ManifestVersion int `json:"manifestVersion"` +} + +type daemonHealth struct { + OK bool `json:"ok"` + ChromeAvailable bool `json:"chromeAvailable"` + Error string `json:"error"` +} + +func ProbeRuntime(ctx context.Context, binaryPath string) RuntimeStatus { + if binaryPath == "" { + binaryPath = os.Getenv("GSD_BROWSER_PATH") + } + if binaryPath == "" { + binaryPath = "gsd-browser" + } + status := RuntimeStatus{MinVersion: RequiredRuntimeVersion, Path: binaryPath} + versionCtx, cancel := context.WithTimeout(ctx, 3*time.Second) + defer cancel() + + out, err := exec.CommandContext(versionCtx, binaryPath, "--version").CombinedOutput() + if err != nil { + status.ErrorCode = "browser_not_installed" + status.ErrorMessage = strings.TrimSpace(string(out)) + if status.ErrorMessage == "" { + status.ErrorMessage = err.Error() + } + return status + } + + status.Installed = true + status.Version = parseBrowserVersion(string(out)) + status.MinVersionOK = compareSemver(status.Version, RequiredRuntimeVersion) >= 0 + if !status.MinVersionOK { + status.ErrorCode = "version_too_old" + status.ErrorMessage = "gsd-browser " + status.Version + " is older than required " + RequiredRuntimeVersion + return status + } + + manifestCtx, manifestCancel := context.WithTimeout(ctx, 3*time.Second) + defer manifestCancel() + manifestOut, err := exec.CommandContext(manifestCtx, binaryPath, "cloud-methods", "--json").CombinedOutput() + if err != nil { + status.ErrorCode = "manifest_unavailable" + status.ErrorMessage = strings.TrimSpace(string(manifestOut)) + if status.ErrorMessage == "" { + status.ErrorMessage = err.Error() + } + return status + } + + var manifest methodManifest + if err := json.Unmarshal(manifestOut, &manifest); err != nil { + status.ErrorCode = "manifest_invalid" + status.ErrorMessage = err.Error() + return status + } + status.CloudMethodsVersion = manifest.ManifestVersion + + healthCtx, healthCancel := context.WithTimeout(ctx, 5*time.Second) + defer healthCancel() + healthOut, err := exec.CommandContext(healthCtx, binaryPath, "daemon", "health", "--json").CombinedOutput() + if err != nil { + status.ErrorCode = "chrome_missing" + status.ErrorMessage = strings.TrimSpace(string(healthOut)) + if status.ErrorMessage == "" { + status.ErrorMessage = err.Error() + } + return status + } + var health daemonHealth + if err := json.Unmarshal(healthOut, &health); err != nil || !health.OK || !health.ChromeAvailable { + status.ErrorCode = "chrome_missing" + if health.Error != "" { + status.ErrorMessage = health.Error + } else if err != nil { + status.ErrorMessage = err.Error() + } else { + status.ErrorMessage = "Chrome/Chromium is unavailable" + } + return status + } + status.ChromeAvailable = true + status.Ready = true + return status +} + +func parseBrowserVersion(out string) string { + re := regexp.MustCompile(`\d+\.\d+\.\d+`) + if match := re.FindString(out); match != "" { + return match + } + return strings.TrimSpace(out) +} + +func compareSemver(a, b string) int { + parse := func(s string) [3]int { + var out [3]int + parts := strings.Split(s, ".") + for i := 0; i < len(parts) && i < 3; i++ { + for _, ch := range parts[i] { + if ch < '0' || ch > '9' { + break + } + out[i] = out[i]*10 + int(ch-'0') + } + } + return out + } + av := parse(a) + bv := parse(b) + for i := 0; i < 3; i++ { + if av[i] > bv[i] { + return 1 + } + if av[i] < bv[i] { + return -1 + } + } + if a == "" && b != "" { + return -1 + } + return 0 +} diff --git a/internal/browser/runtime_probe_test.go b/internal/browser/runtime_probe_test.go new file mode 100644 index 0000000..8d0c602 --- /dev/null +++ b/internal/browser/runtime_probe_test.go @@ -0,0 +1,87 @@ +package browser + +import ( + "context" + "os" + "path/filepath" + "testing" +) + +func TestProbeRuntimeInstalled(t *testing.T) { + bin := writeFakeBrowser(t, `#!/bin/sh +if [ "$1" = "--version" ]; then echo "gsd-browser 0.1.20"; exit 0; fi +if [ "$1" = "cloud-methods" ]; then echo '{"manifestVersion":1}'; exit 0; fi +if [ "$1" = "daemon" ] && [ "$2" = "health" ]; then echo '{"ok":true,"chromeAvailable":true}'; exit 0; fi +exit 1 +`) + got := ProbeRuntime(context.Background(), bin) + if !got.Ready || !got.Installed || got.Version != "0.1.20" || !got.MinVersionOK || got.CloudMethodsVersion != 1 || !got.ChromeAvailable { + t.Fatalf("ProbeRuntime = %+v", got) + } +} + +func TestProbeRuntimeMissingBinary(t *testing.T) { + got := ProbeRuntime(context.Background(), filepath.Join(t.TempDir(), "missing")) + if got.ErrorCode != "browser_not_installed" || got.Ready { + t.Fatalf("ProbeRuntime = %+v", got) + } +} + +func TestProbeRuntimeOldVersion(t *testing.T) { + bin := writeFakeBrowser(t, `#!/bin/sh +if [ "$1" = "--version" ]; then echo "gsd-browser 0.1.18"; exit 0; fi +exit 1 +`) + got := ProbeRuntime(context.Background(), bin) + if got.ErrorCode != "version_too_old" || got.MinVersionOK || got.Ready { + t.Fatalf("ProbeRuntime = %+v", got) + } +} + +func TestProbeRuntimeMalformedManifest(t *testing.T) { + bin := writeFakeBrowser(t, `#!/bin/sh +if [ "$1" = "--version" ]; then echo "gsd-browser 0.1.20"; exit 0; fi +if [ "$1" = "cloud-methods" ]; then echo '{'; exit 0; fi +exit 1 +`) + got := ProbeRuntime(context.Background(), bin) + if got.ErrorCode != "manifest_invalid" || got.Ready { + t.Fatalf("ProbeRuntime = %+v", got) + } +} + +func TestProbeRuntimeChromeMissing(t *testing.T) { + bin := writeFakeBrowser(t, `#!/bin/sh +if [ "$1" = "--version" ]; then echo "gsd-browser 0.1.20"; exit 0; fi +if [ "$1" = "cloud-methods" ]; then echo '{"manifestVersion":1}'; exit 0; fi +if [ "$1" = "daemon" ] && [ "$2" = "health" ]; then echo '{"ok":false,"chromeAvailable":false,"error":"no chrome"}'; exit 0; fi +exit 1 +`) + got := ProbeRuntime(context.Background(), bin) + if got.ErrorCode != "chrome_missing" || got.Ready { + t.Fatalf("ProbeRuntime = %+v", got) + } +} + +func TestProbeRuntimeHonorsEnvPath(t *testing.T) { + bin := writeFakeBrowser(t, `#!/bin/sh +if [ "$1" = "--version" ]; then echo "gsd-browser 0.1.20"; exit 0; fi +if [ "$1" = "cloud-methods" ]; then echo '{"manifestVersion":1}'; exit 0; fi +if [ "$1" = "daemon" ] && [ "$2" = "health" ]; then echo '{"ok":true,"chromeAvailable":true}'; exit 0; fi +exit 1 +`) + t.Setenv("GSD_BROWSER_PATH", bin) + got := ProbeRuntime(context.Background(), "") + if got.Path != bin || !got.Ready { + t.Fatalf("ProbeRuntime = %+v", got) + } +} + +func writeFakeBrowser(t *testing.T, body string) string { + t.Helper() + path := filepath.Join(t.TempDir(), "gsd-browser") + if err := os.WriteFile(path, []byte(body), 0o755); err != nil { + t.Fatalf("write fake browser: %v", err) + } + return path +} diff --git a/internal/browser/types.go b/internal/browser/types.go index 87062a8..22b5bae 100644 --- a/internal/browser/types.go +++ b/internal/browser/types.go @@ -26,6 +26,16 @@ type OpenRequest struct { ExpiresAt string } +type EnsureRequest struct { + GrantID string + SessionID string + ProjectID string + TaskID string + ChannelID string + MachineID string + ExpiresAt string +} + type OpenResult struct { BrowserID string URL string @@ -67,4 +77,18 @@ type ToolResult struct { OK bool ResultJSON json.RawMessage Error string + ErrorCode string +} + +type ToolRPCRequest struct { + GrantID string `json:"grantId"` + SessionID string `json:"sessionId"` + ProjectID string `json:"projectId"` + TaskID string `json:"taskId"` + ChannelID string `json:"channelId"` + MachineID string `json:"machineId"` + ExpiresAt string `json:"expiresAt"` + ToolUseID string `json:"toolUseId"` + Method string `json:"method"` + Params json.RawMessage `json:"params"` } diff --git a/internal/loop/browser_rpc.go b/internal/loop/browser_rpc.go new file mode 100644 index 0000000..7f54659 --- /dev/null +++ b/internal/loop/browser_rpc.go @@ -0,0 +1,184 @@ +package loop + +import ( + "context" + "encoding/binary" + "encoding/json" + "fmt" + "io" + "log/slog" + "net" + "os" + "path/filepath" + "time" + + "github.com/gsd-build/daemon/internal/browser" + protocol "github.com/gsd-build/protocol-go" +) + +type browserRPCRequest struct { + JSONRPC string `json:"jsonrpc"` + ID json.RawMessage `json:"id,omitempty"` + Method string `json:"method"` + Params browser.ToolRPCRequest `json:"params"` +} + +type browserRPCResponse struct { + JSONRPC string `json:"jsonrpc"` + ID json.RawMessage `json:"id,omitempty"` + Result any `json:"result,omitempty"` + Error *browserRPCError `json:"error,omitempty"` +} + +type browserRPCError struct { + Code int `json:"code"` + Message string `json:"message"` +} + +func (d *Daemon) runBrowserRPC(ctx context.Context) error { + if d.browserManager == nil || d.browserRPCSocket == "" { + return nil + } + if err := os.MkdirAll(filepath.Dir(d.browserRPCSocket), 0o700); err != nil { + return err + } + _ = os.Remove(d.browserRPCSocket) + listener, err := net.Listen("unix", d.browserRPCSocket) + if err != nil { + return err + } + defer listener.Close() + defer os.Remove(d.browserRPCSocket) + _ = os.Chmod(d.browserRPCSocket, 0o600) + + go func() { + <-ctx.Done() + _ = listener.Close() + }() + + for { + conn, err := listener.Accept() + if err != nil { + if ctx.Err() != nil { + return nil + } + return err + } + go d.handleBrowserRPCConn(ctx, conn) + } +} + +func (d *Daemon) handleBrowserRPCConn(ctx context.Context, conn net.Conn) { + defer conn.Close() + deadline := time.Now().Add(35 * time.Second) + _ = conn.SetDeadline(deadline) + payload, err := readBrowserRPCFrame(conn) + if err != nil { + slog.Debug("browser rpc read failed", "err", err) + return + } + var req browserRPCRequest + if err := json.Unmarshal(payload, &req); err != nil { + _ = writeBrowserRPCFrame(conn, browserRPCResponse{ + JSONRPC: "2.0", + Error: &browserRPCError{Code: -32700, Message: "invalid browser rpc request"}, + }) + return + } + resp := d.handleBrowserRPC(ctx, req) + _ = writeBrowserRPCFrame(conn, resp) +} + +func (d *Daemon) handleBrowserRPC(ctx context.Context, req browserRPCRequest) browserRPCResponse { + resp := browserRPCResponse{JSONRPC: "2.0", ID: req.ID} + if req.Method != "browser_tool" { + resp.Error = &browserRPCError{Code: -32601, Message: "unknown browser rpc method"} + return resp + } + grant, err := d.browserManager.Ensure(ctx, browser.EnsureRequest{ + GrantID: req.Params.GrantID, + SessionID: req.Params.SessionID, + ProjectID: req.Params.ProjectID, + TaskID: req.Params.TaskID, + ChannelID: req.Params.ChannelID, + MachineID: req.Params.MachineID, + ExpiresAt: req.Params.ExpiresAt, + }) + if err != nil { + resp.Error = &browserRPCError{Code: -32000, Message: err.Error()} + return resp + } + toolUseID := req.Params.ToolUseID + if toolUseID == "" { + toolUseID = fmt.Sprintf("browser_tool_%d", time.Now().UnixNano()) + } + params := req.Params.Params + if len(params) == 0 { + params = json.RawMessage(`{}`) + } + result, err := d.browserManager.ToolResult(ctx, &protocol.BrowserToolCall{ + Type: protocol.MsgTypeBrowserToolCall, + BrowserID: grant.BrowserID, + GrantID: grant.GrantID, + TaskID: req.Params.TaskID, + ToolUseID: toolUseID, + Method: req.Params.Method, + ParamsJSON: params, + }) + if err != nil { + resp.Error = &browserRPCError{Code: -32001, Message: err.Error()} + return resp + } + if len(result.ResultJSON) == 0 { + resp.Result = map[string]any{"ok": result.OK} + return resp + } + var decoded any + if err := json.Unmarshal(result.ResultJSON, &decoded); err != nil { + resp.Result = string(result.ResultJSON) + return resp + } + resp.Result = decoded + return resp +} + +func readBrowserRPCFrame(r io.Reader) ([]byte, error) { + var header [4]byte + if _, err := io.ReadFull(r, header[:]); err != nil { + return nil, err + } + size := binary.BigEndian.Uint32(header[:]) + if size > 16*1024*1024 { + return nil, fmt.Errorf("browser rpc frame too large: %d", size) + } + buf := make([]byte, size) + _, err := io.ReadFull(r, buf) + return buf, err +} + +func writeBrowserRPCFrame(w io.Writer, resp browserRPCResponse) error { + data, err := json.Marshal(resp) + if err != nil { + return err + } + var header [4]byte + binary.BigEndian.PutUint32(header[:], uint32(len(data))) + if err := writeBrowserRPCFull(w, header[:]); err != nil { + return err + } + return writeBrowserRPCFull(w, data) +} + +func writeBrowserRPCFull(w io.Writer, data []byte) error { + for len(data) > 0 { + n, err := w.Write(data) + if err != nil { + return err + } + if n == 0 { + return io.ErrShortWrite + } + data = data[n:] + } + return nil +} diff --git a/internal/loop/daemon.go b/internal/loop/daemon.go index 05ba619..1fcba79 100644 --- a/internal/loop/daemon.go +++ b/internal/loop/daemon.go @@ -70,6 +70,7 @@ type Daemon struct { previewWS *preview.WebSocketBridge previewWork chan struct{} browserManager *browser.Manager + browserRPCSocket string agentTouchedFiles agentTouchedFileStore runCtxMu sync.RWMutex runCtx context.Context @@ -479,6 +480,7 @@ func NewWithPiBinaryPath(cfg *config.Config, version, piBinaryOverride string) ( } } sockPath := filepath.Join(homeDir, ".gsd-cloud", "daemon.sock") + browserRPCSocket := filepath.Join(homeDir, ".gsd-cloud", "browser-rpc.sock") agentDir := filepath.Join(homeDir, ".gsd-cloud", "agents") subagentAuthSecret, err := generateSubagentAuthSecret() if err != nil { @@ -502,6 +504,10 @@ func NewWithPiBinaryPath(cfg *config.Config, version, piBinaryOverride string) ( if absDir, err := filepath.Abs(browserStateDir); err == nil { browserStateDir = absDir } + browserPath := os.Getenv("GSD_BROWSER_PATH") + if browserPath == "" { + browserPath = "gsd-browser" + } d := &Daemon{ cfg: cfg, @@ -521,6 +527,7 @@ func NewWithPiBinaryPath(cfg *config.Config, version, piBinaryOverride string) ( previewWS: preview.NewWebSocketBridge(previewRegistry, client), previewWork: make(chan struct{}, preview.DefaultMaxActiveStreams), sockPath: sockPath, + browserRPCSocket: browserRPCSocket, subagentAuthSecret: subagentAuthSecret, agentDir: agentDir, subagentStreams: make(map[string]*pi.ChildTranslator), @@ -529,7 +536,7 @@ func NewWithPiBinaryPath(cfg *config.Config, version, piBinaryOverride string) ( subagentRunIDs: make(map[string]string), subagentParentSessions: make(map[string]string), browserManager: browser.NewManager(browser.ManagerOptions{ - Service: browser.LocalService{BinaryPath: "gsd-browser", StateDir: browserStateDir}, + Service: browser.LocalService{BinaryPath: browserPath, StateDir: browserStateDir}, Sender: client, }), } @@ -693,6 +700,11 @@ func (d *Daemon) Run(ctx context.Context) error { slog.Warn("socket API failed", "error", err) } }() + go func() { + if err := d.runBrowserRPC(ctx); err != nil { + slog.Warn("browser RPC socket failed", "error", err) + } + }() go d.runTokenRefreshCheck(ctx) go d.runAgentTouchedFileSweep(ctx) @@ -955,6 +967,8 @@ func (d *Daemon) handleTask(msg *protocol.Task) error { } browserGrantID := "" browserID := "" + browserGrant := msg.BrowserGrant + runtime := browser.ProbeRuntime(ctx, os.Getenv("GSD_BROWSER_PATH")) if d.browserManager != nil { if browserGrant, ok := d.browserManager.GrantForTask(msg.TaskID); ok { browserGrantID = browserGrant.GrantID @@ -975,21 +989,28 @@ func (d *Daemon) handleTask(msg *protocol.Task) error { authToken = d.cfg.AuthToken } actor, err = d.manager.Spawn(ctx, session.Options{ - SessionID: msg.SessionID, - CWD: msg.CWD, - Model: msg.Model, - Effort: msg.Effort, - PermissionMode: msg.PermissionMode, - ResumeSession: msg.ClaudeSessionID, - PiBinaryPath: d.piBinaryPath, - PiExtensionPath: d.piExtensionPath, - ServerURL: serverURL, - MachineID: machineID, - AuthToken: authToken, - DaemonSocketPath: d.sockPath, - AgentDir: d.agentDir, - BrowserGrantID: browserGrantID, - BrowserID: browserID, + SessionID: msg.SessionID, + CWD: msg.CWD, + Model: msg.Model, + Effort: msg.Effort, + PermissionMode: msg.PermissionMode, + ResumeSession: msg.ClaudeSessionID, + PiBinaryPath: d.piBinaryPath, + PiExtensionPath: d.piExtensionPath, + ServerURL: serverURL, + MachineID: machineID, + AuthToken: authToken, + DaemonSocketPath: d.sockPath, + AgentDir: d.agentDir, + BrowserGrantID: browserGrantID, + BrowserID: browserID, + BrowserGrant: browserGrant, + BrowserRuntime: session.BrowserRuntimeSnapshot{ + ErrorCode: runtime.ErrorCode, + ErrorMessage: runtime.ErrorMessage, + Version: runtime.Version, + }, + BrowserRPCSocket: d.browserRPCSocket, RecordTouchedFile: d.recordAgentTouchedFile, }) if err != nil { @@ -1005,6 +1026,11 @@ func (d *Daemon) handleTask(msg *protocol.Task) error { } } else { actor.SetBrowserContext(browserGrantID, browserID) + actor.SetBrowserGrant(browserGrant, session.BrowserRuntimeSnapshot{ + ErrorCode: runtime.ErrorCode, + ErrorMessage: runtime.ErrorMessage, + Version: runtime.Version, + }, d.browserRPCSocket) } // Task execution errors (e.g. claude binary not found, executor not ready) diff --git a/internal/loop/daemon_test.go b/internal/loop/daemon_test.go index c928fd2..75848e8 100644 --- a/internal/loop/daemon_test.go +++ b/internal/loop/daemon_test.go @@ -512,6 +512,45 @@ func TestHandleTaskUsesSessionScopedBrowserGrant(t *testing.T) { } } +func TestBrowserRPCEnsuresSessionAndRunsTool(t *testing.T) { + browserManager := browser.NewManager(browser.ManagerOptions{ + Service: loopBrowserService{}, + Sender: &loopBrowserSender{}, + FrameInterval: time.Hour, + }) + d := &Daemon{browserManager: browserManager} + + resp := d.handleBrowserRPC(context.Background(), browserRPCRequest{ + JSONRPC: "2.0", + Method: "browser_tool", + Params: browser.ToolRPCRequest{ + GrantID: "grant_rpc", + SessionID: "sess-rpc", + TaskID: "task-rpc", + ChannelID: "ch-rpc", + ProjectID: "project-rpc", + MachineID: "machine-rpc", + ExpiresAt: time.Now().Add(time.Hour).Format(time.RFC3339Nano), + ToolUseID: "tool-rpc", + Method: "snapshot", + }, + }) + if resp.Error != nil { + t.Fatalf("rpc error = %+v", resp.Error) + } + result, ok := resp.Result.(map[string]any) + if !ok || result["ok"] != true { + t.Fatalf("rpc result = %#v", resp.Result) + } + grant, ok := browserManager.GrantForTask("task-rpc") + if !ok { + t.Fatal("expected lazy browser grant") + } + if grant.BrowserID != "grant_rpc" || grant.ChannelID != "ch-rpc" { + t.Fatalf("grant = %+v", grant) + } +} + func TestDaemonHandlesPreviewOpen(t *testing.T) { daemon, relayClient := newTestDaemonWithPreview(t) err := daemon.handleMessage(&protocol.Envelope{ diff --git a/internal/pi/executor.go b/internal/pi/executor.go index ce87fcc..ff11b12 100644 --- a/internal/pi/executor.go +++ b/internal/pi/executor.go @@ -16,6 +16,7 @@ import ( "os" "os/exec" "os/user" + "path/filepath" "runtime" "strings" "syscall" @@ -59,32 +60,39 @@ func piExitError(code int, stderr string) error { // Options configures a pi process. type Options struct { - BinaryPath string // pi binary; defaults to "pi" - CWD string - Model string // forwarded as --model - ResumeSession string // forwarded as --session ; empty = --no-session - TaskID string - SessionID string - ChannelID string - Prompt string - CustomInstructions string - ExtensionPath string // forwarded as -e - Provider string // forwarded as --provider - SkillPaths []string - DisableSkills bool - BrowserGrantID string - BrowserID string - BrowserSessionID string - WarmClaudeSDK bool - PlanCapability *protocol.PlanCapability - DaemonSocketPath string - SubagentAuthToken string - ParentSessionID string - AgentDir string - SubagentsPrompt string - AgentToolsSocket string - AgentToolsToken string - ToolProfile string + BinaryPath string // pi binary; defaults to "pi" + CWD string + Model string // forwarded as --model + ResumeSession string // forwarded as --session ; empty = --no-session + TaskID string + SessionID string + ChannelID string + Prompt string + CustomInstructions string + ExtensionPath string // forwarded as -e + Provider string // forwarded as --provider + SkillPaths []string + DisableSkills bool + BrowserGrantID string + BrowserID string + BrowserSessionID string + BrowserProjectID string + BrowserMachineID string + BrowserGrantExpiresAt string + BrowserRPCSocket string + BrowserRuntimeErrorCode string + BrowserRuntimeErrorMessage string + BrowserRuntimeVersion string + WarmClaudeSDK bool + PlanCapability *protocol.PlanCapability + DaemonSocketPath string + SubagentAuthToken string + ParentSessionID string + AgentDir string + SubagentsPrompt string + AgentToolsSocket string + AgentToolsToken string + ToolProfile string } // ProviderOrDefault returns the Pi provider name to use for a task. @@ -236,9 +244,7 @@ func processEnv(ctx context.Context, base []string, opts Options) []string { ), opts, ), - opts.BrowserGrantID, - opts.BrowserID, - opts.BrowserSessionID, + opts, ), opts.PlanCapability, ), @@ -729,21 +735,50 @@ func serviceManagerEnv(ctx context.Context, key string) string { return "" } -func browserEnv(base []string, grantID string, browserID string, sessionID string) []string { - env := make([]string, 0, len(base)+3) +func browserEnv(base []string, opts Options) []string { + env := make([]string, 0, len(base)+10) for _, entry := range base { if strings.HasPrefix(entry, "GSD_BROWSER_") { continue } + if strings.HasPrefix(entry, "GSD_DAEMON_BROWSER_RPC_SOCKET=") { + continue + } env = append(env, entry) } - if grantID != "" && browserID != "" && sessionID != "" { + if opts.BrowserGrantID != "" && opts.BrowserSessionID != "" { env = append(env, - "GSD_BROWSER_GRANT_ID="+grantID, - "GSD_BROWSER_ID="+browserID, - "GSD_BROWSER_SESSION_ID="+sessionID, + "GSD_BROWSER_GRANT_ID="+opts.BrowserGrantID, + "GSD_BROWSER_SESSION_ID="+opts.BrowserSessionID, ) } + if opts.BrowserID != "" { + env = append(env, "GSD_BROWSER_ID="+opts.BrowserID) + } + if opts.BrowserProjectID != "" { + env = append(env, "GSD_PROJECT_ID="+opts.BrowserProjectID) + } + if opts.BrowserMachineID != "" { + env = append(env, "GSD_MACHINE_ID="+opts.BrowserMachineID) + } + if opts.BrowserGrantExpiresAt != "" { + env = append(env, "GSD_BROWSER_GRANT_EXPIRES_AT="+opts.BrowserGrantExpiresAt) + } + if opts.BrowserRPCSocket != "" { + env = append(env, "GSD_DAEMON_BROWSER_RPC_SOCKET="+opts.BrowserRPCSocket) + } + if opts.BrowserRuntimeErrorCode != "" { + env = append(env, "GSD_BROWSER_RUNTIME_ERROR_CODE="+opts.BrowserRuntimeErrorCode) + } + if opts.BrowserRuntimeErrorMessage != "" { + env = append(env, "GSD_BROWSER_RUNTIME_ERROR_MESSAGE="+opts.BrowserRuntimeErrorMessage) + } + if opts.BrowserRuntimeVersion != "" { + env = append(env, "GSD_BROWSER_RUNTIME_VERSION="+opts.BrowserRuntimeVersion) + } + if opts.ExtensionPath != "" { + env = append(env, "GSD_BROWSER_SKILL_DIR="+filepath.Join(filepath.Dir(opts.ExtensionPath), "gsd-browser-skill")) + } return env } diff --git a/internal/pi/extension/browser-extension.ts b/internal/pi/extension/browser-extension.ts new file mode 100644 index 0000000..a61485a --- /dev/null +++ b/internal/pi/extension/browser-extension.ts @@ -0,0 +1,181 @@ +import net from "node:net"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; +import { Type } from "@sinclair/typebox"; +import { + BROWSER_METHOD_CATEGORY, + BROWSER_TOOL_CATEGORIES, + BROWSER_TOOL_METHODS, + BrowserToolCategorySchema, + BrowserToolMethodSchema, + type BrowserToolMethod, +} from "./browser-methods.generated.js"; + +const extensionDir = dirname(fileURLToPath(import.meta.url)); + +export type BrowserGrant = { + grantId: string; + sessionId: string; + taskId: string; + channelId: string; + projectId: string; + machineId: string; + expiresAt: string; +}; + +const BrowserToolParams = Type.Object({ + method: BrowserToolMethodSchema, + category: Type.Optional(BrowserToolCategorySchema), + intent: Type.Optional(Type.String()), + params: Type.Optional(Type.Record(Type.String(), Type.Any())), +}); + +export function browserGrantFromEnv(): BrowserGrant | undefined { + const grantId = process.env.GSD_BROWSER_GRANT_ID; + const sessionId = process.env.GSD_BROWSER_SESSION_ID; + const taskId = process.env.GSD_TASK_ID; + const channelId = process.env.GSD_CHANNEL_ID; + const projectId = process.env.GSD_PROJECT_ID; + const machineId = process.env.GSD_MACHINE_ID; + const expiresAt = process.env.GSD_BROWSER_GRANT_EXPIRES_AT; + if (!grantId || !sessionId || !taskId || !channelId || !projectId || !machineId || !expiresAt) { + return undefined; + } + return { grantId, sessionId, taskId, channelId, projectId, machineId, expiresAt }; +} + +export function browserToolDefinition() { + return { + name: "gsd_browser", + label: "GSD Browser", + description: + "Use the active task-scoped GSD shared browser for browser automation, rendered UI verification, navigation, snapshots, ref-based interaction, screenshots, console/network inspection, visual diffs, traces, and artifacts. Use snapshot refs before clicking/filling. Use bare method names such as navigate, snapshot, click_ref, console, and visual_diff.", + promptSnippet: + "GSD Browser is available for website interaction, rendered UI evidence, screenshots, console/network checks, auth flows, and responsive testing. Load the gsd-browser skill when browser behavior matters.", + promptGuidelines: [ + "Use gsd_browser proactively when rendered browser behavior is evidence.", + "Run snapshot before ref-based interaction and re-snapshot after page changes.", + "State intent before multi-step browser work.", + "Request approval for credential, payment, destructive, external-effect, and network-mutation actions.", + ], + parameters: BrowserToolParams, + input_schema: { + type: "object", + additionalProperties: false, + properties: { + method: { type: "string", enum: BROWSER_TOOL_METHODS }, + category: { type: "string", enum: BROWSER_TOOL_CATEGORIES }, + intent: { type: "string" }, + params: { type: "object", additionalProperties: true }, + }, + required: ["method"], + }, + }; +} + +export function browserMethodCategory(method: string) { + return BROWSER_METHOD_CATEGORY[method as BrowserToolMethod] ?? "inspection"; +} + +export function browserActionSummary(method: string, params: Record = {}) { + if (method === "navigate" && typeof params.url === "string") return `Navigate to ${params.url}`; + if (method === "snapshot") return "Snapshot page"; + if (method === "click_ref" && typeof params.ref === "string") return `Click ${params.ref}`; + if (method === "fill_ref" && typeof params.ref === "string") return `Fill ${params.ref}`; + if (method === "console") return "Check console"; + if (method === "network") return "Check network"; + return method.replaceAll("_", " "); +} + +async function browserRpc(grant: BrowserGrant, toolCallId: string, method: string, params: unknown, signal?: AbortSignal) { + const socketPath = process.env.GSD_DAEMON_BROWSER_RPC_SOCKET; + if (!socketPath) throw new Error("daemon browser RPC socket is unavailable"); + const payload = Buffer.from(JSON.stringify({ + jsonrpc: "2.0", + id: Date.now(), + method: "browser_tool", + params: { ...grant, toolUseId: toolCallId, method, params: params ?? {} }, + })); + const header = Buffer.alloc(4); + header.writeUInt32BE(payload.length, 0); + + return await new Promise((resolve, reject) => { + const socket = net.createConnection(socketPath); + const chunks: Buffer[] = []; + let expected = 0; + let settled = false; + const rejectOnce = (err: Error) => { + if (settled) return; + settled = true; + socket.destroy(); + reject(err); + }; + const resolveOnce = (value: unknown) => { + if (settled) return; + settled = true; + socket.end(); + resolve(value); + }; + if (signal?.aborted) return rejectOnce(new Error("browser tool aborted")); + signal?.addEventListener("abort", () => rejectOnce(new Error("browser tool aborted")), { once: true }); + socket.setTimeout(30_000, () => rejectOnce(new Error("browser tool timed out"))); + socket.on("connect", () => socket.write(Buffer.concat([header, payload]))); + socket.on("data", (chunk) => { + chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); + const all = Buffer.concat(chunks); + if (expected === 0 && all.length >= 4) expected = all.readUInt32BE(0); + if (expected > 16 * 1024 * 1024) return rejectOnce(new Error(`browser rpc frame too large: ${expected}`)); + if (expected > 0 && all.length >= expected + 4) { + const response = JSON.parse(all.subarray(4, expected + 4).toString("utf8")); + if (response.error) rejectOnce(new Error(response.error.message ?? "browser tool failed")); + else resolveOnce(response.result ?? {}); + } + }); + socket.on("error", rejectOnce); + socket.on("close", () => { + if (!settled) rejectOnce(new Error("browser rpc socket closed before response")); + }); + }); +} + +export function registerBrowserExtension(pi: ExtensionAPI) { + if (typeof (pi as any).on === "function") { + (pi as any).on("resources_discover", () => ({ + skillPaths: [join(extensionDir, "gsd-browser-skill", "SKILL.md")], + })); + } + + pi.registerTool({ + ...browserToolDefinition(), + async execute(toolCallId: string, params: any, signal?: AbortSignal) { + const grant = browserGrantFromEnv(); + const method = params.method as string; + const category = params.category ?? browserMethodCategory(method); + const summary = browserActionSummary(method, params.params ?? {}); + if (!grant) { + const code = process.env.GSD_BROWSER_RUNTIME_ERROR_CODE || "browser_context_unavailable"; + const message = process.env.GSD_BROWSER_RUNTIME_ERROR_MESSAGE || "GSD Browser runtime is unavailable for this task."; + return { + content: [{ type: "text", text: message }], + isError: true, + details: { toolCallId, method, category, summary, code }, + }; + } + try { + const result = await browserRpc(grant, toolCallId, method, params.params ?? {}, signal); + return { + content: [{ type: "text", text: JSON.stringify(result) }], + isError: false, + details: { ...grant, toolCallId, method, category, summary, safeResult: result }, + }; + } catch (err) { + return { + content: [{ type: "text", text: err instanceof Error ? err.message : String(err) }], + isError: true, + details: { ...grant, toolCallId, method, category, summary }, + }; + } + }, + } as any); +} diff --git a/internal/pi/extension/browser-methods.generated.test.mjs b/internal/pi/extension/browser-methods.generated.test.mjs new file mode 100644 index 0000000..ca9a14f --- /dev/null +++ b/internal/pi/extension/browser-methods.generated.test.mjs @@ -0,0 +1,24 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import { + BROWSER_METHOD_CATEGORY, + BROWSER_TOOL_METHODS, +} from "./browser-methods.generated.ts"; + +describe("browser methods registry", () => { + it("keeps credential and vault methods policy-visible but tool-disabled", () => { + assert.equal(BROWSER_METHOD_CATEGORY.vault_login, "credential_auth"); + assert.equal(BROWSER_METHOD_CATEGORY.vault_save, "credential_auth"); + assert.equal(BROWSER_METHOD_CATEGORY.save_state, "credential_auth"); + assert.equal(BROWSER_TOOL_METHODS.includes("vault_login"), false); + assert.equal(BROWSER_TOOL_METHODS.includes("vault_save"), false); + assert.equal(BROWSER_TOOL_METHODS.includes("save_state"), false); + }); + + it("includes inspection and artifact methods used by the ambient loop", () => { + assert.equal(BROWSER_TOOL_METHODS.includes("navigate"), true); + assert.equal(BROWSER_TOOL_METHODS.includes("snapshot"), true); + assert.equal(BROWSER_TOOL_METHODS.includes("click_ref"), true); + assert.equal(BROWSER_TOOL_METHODS.includes("visual_diff"), true); + }); +}); diff --git a/internal/pi/extension/browser-methods.generated.ts b/internal/pi/extension/browser-methods.generated.ts new file mode 100644 index 0000000..1247d66 --- /dev/null +++ b/internal/pi/extension/browser-methods.generated.ts @@ -0,0 +1,95 @@ +import { Type } from "@sinclair/typebox"; + +export const BROWSER_TOOL_CATEGORIES = [ + "navigation", + "interaction", + "artifact_generation", + "inspection", + "external_effect", + "network_mutation", + "credential_auth", + "composite", +] as const; + +export type BrowserToolCategory = (typeof BROWSER_TOOL_CATEGORIES)[number]; + +export const BROWSER_METHOD_CATEGORY = { + navigate: "navigation", + back: "navigation", + forward: "navigation", + reload: "navigation", + list_pages: "navigation", + switch_page: "navigation", + close_page: "navigation", + list_frames: "navigation", + select_frame: "navigation", + click: "interaction", + type: "interaction", + press: "interaction", + hover: "interaction", + scroll: "interaction", + select_option: "interaction", + set_checked: "interaction", + drag: "interaction", + set_viewport: "interaction", + click_ref: "interaction", + hover_ref: "interaction", + fill_ref: "interaction", + emulate_device: "interaction", + upload_file: "artifact_generation", + debug_bundle: "artifact_generation", + screenshot: "artifact_generation", + zoom_region: "artifact_generation", + save_pdf: "artifact_generation", + visual_diff: "artifact_generation", + generate_test: "artifact_generation", + har_export: "artifact_generation", + trace_start: "artifact_generation", + trace_stop: "artifact_generation", + snapshot: "inspection", + get_ref: "inspection", + accessibility_tree: "inspection", + find: "inspection", + page_source: "inspection", + assert: "inspection", + diff: "inspection", + wait_for: "inspection", + analyze_form: "inspection", + find_best: "inspection", + console: "inspection", + network: "inspection", + dialog: "inspection", + timeline: "inspection", + session_summary: "inspection", + extract: "inspection", + action_cache: "inspection", + check_injection: "inspection", + eval: "external_effect", + fill_form: "external_effect", + act: "external_effect", + mock_route: "network_mutation", + block_urls: "network_mutation", + clear_routes: "network_mutation", + save_state: "credential_auth", + restore_state: "credential_auth", + vault_save: "credential_auth", + vault_login: "credential_auth", + vault_list: "credential_auth", + batch: "composite", +} as const satisfies Record; + +const DISABLED_BROWSER_METHOD_CATEGORIES = new Set(["credential_auth"]); + +export const BROWSER_TOOL_METHODS = Object.entries(BROWSER_METHOD_CATEGORY) + .filter(([, category]) => !DISABLED_BROWSER_METHOD_CATEGORIES.has(category)) + .map(([method]) => method) as BrowserToolMethod[]; + +export type BrowserToolMethod = keyof typeof BROWSER_METHOD_CATEGORY; + +export const BrowserToolMethodSchema = Type.Union( + BROWSER_TOOL_METHODS.map((method) => Type.Literal(method)) as any, +); + +export const BrowserToolCategorySchema = Type.Union( + BROWSER_TOOL_CATEGORIES.map((category) => Type.Literal(category)) as any, +); diff --git a/internal/pi/extension/browser-methods.ts b/internal/pi/extension/browser-methods.ts index bf40f3d..b654238 100644 --- a/internal/pi/extension/browser-methods.ts +++ b/internal/pi/extension/browser-methods.ts @@ -1,91 +1 @@ -import { Type } from "@sinclair/typebox"; - -export const BROWSER_TOOL_CATEGORIES = [ - "navigation", - "interaction", - "artifact_generation", - "inspection", - "external_effect", - "network_mutation", - "credential_auth", - "composite", -] as const; - -export type BrowserToolCategory = (typeof BROWSER_TOOL_CATEGORIES)[number]; - -export const BROWSER_METHOD_CATEGORY = { - navigate: "navigation", - back: "navigation", - forward: "navigation", - reload: "navigation", - list_pages: "navigation", - switch_page: "navigation", - close_page: "navigation", - list_frames: "navigation", - select_frame: "navigation", - click: "interaction", - type: "interaction", - press: "interaction", - hover: "interaction", - scroll: "interaction", - select_option: "interaction", - set_checked: "interaction", - drag: "interaction", - set_viewport: "interaction", - click_ref: "interaction", - hover_ref: "interaction", - fill_ref: "interaction", - emulate_device: "interaction", - upload_file: "artifact_generation", - debug_bundle: "artifact_generation", - screenshot: "artifact_generation", - zoom_region: "artifact_generation", - save_pdf: "artifact_generation", - visual_diff: "artifact_generation", - generate_test: "artifact_generation", - har_export: "artifact_generation", - trace_start: "artifact_generation", - trace_stop: "artifact_generation", - snapshot: "inspection", - get_ref: "inspection", - accessibility_tree: "inspection", - find: "inspection", - page_source: "inspection", - assert: "inspection", - diff: "inspection", - wait_for: "inspection", - analyze_form: "inspection", - find_best: "inspection", - console: "inspection", - network: "inspection", - dialog: "inspection", - timeline: "inspection", - session_summary: "inspection", - extract: "inspection", - action_cache: "inspection", - check_injection: "inspection", - eval: "external_effect", - fill_form: "external_effect", - act: "external_effect", - mock_route: "network_mutation", - block_urls: "network_mutation", - clear_routes: "network_mutation", - save_state: "credential_auth", - restore_state: "credential_auth", - vault_save: "credential_auth", - vault_login: "credential_auth", - vault_list: "credential_auth", - batch: "composite", -} as const satisfies Record; - -export const BROWSER_TOOL_METHODS = Object.keys(BROWSER_METHOD_CATEGORY) as BrowserToolMethod[]; - -export type BrowserToolMethod = keyof typeof BROWSER_METHOD_CATEGORY; - -export const BrowserToolMethodSchema = Type.Union( - BROWSER_TOOL_METHODS.map((method) => Type.Literal(method)) as any, -); - -export const BrowserToolCategorySchema = Type.Union( - BROWSER_TOOL_CATEGORIES.map((category) => Type.Literal(category)) as any, -); +export * from "./browser-methods.generated.js"; diff --git a/internal/pi/extension/browser-tool.test.mjs b/internal/pi/extension/browser-tool.test.mjs index 32df071..301598b 100644 --- a/internal/pi/extension/browser-tool.test.mjs +++ b/internal/pi/extension/browser-tool.test.mjs @@ -9,8 +9,12 @@ import { const browserGrant = { grantId: "grant_1", - browserId: "browser_1", sessionId: "session_1", + taskId: "task_1", + channelId: "channel_1", + projectId: "project_1", + machineId: "machine_1", + expiresAt: "2026-05-01T12:00:00Z", }; describe("browser tool registration", () => { @@ -19,20 +23,21 @@ describe("browser tool registration", () => { assert.equal(tools.some((tool) => tool.name === "gsd_browser"), true); }); - it("does not surface gsd_browser without a browser grant", () => { + it("surfaces gsd_browser even without a browser grant", () => { const tools = buildClaudeCliBrowserTools({}); - assert.equal(tools.some((tool) => tool.name === "gsd_browser"), false); + assert.equal(tools.some((tool) => tool.name === "gsd_browser"), true); }); - it("describes only supported bare browser methods", () => { - const [tool] = buildClaudeCliBrowserTools({ browserGrant }); + it("describes the bundled skill routing behavior", () => { + const [tool] = buildClaudeCliBrowserTools({}); assert.ok(tool); assert.deepEqual(tool.input_schema.properties.method.enum.includes("navigate"), true); assert.deepEqual(tool.input_schema.properties.method.enum.includes("visual_diff"), true); - assert.deepEqual(tool.input_schema.properties.method.enum.includes("vault_login"), true); + assert.deepEqual(tool.input_schema.properties.method.enum.includes("vault_login"), false); assert.deepEqual(tool.input_schema.properties.method.enum.includes("browser.navigate"), false); assert.deepEqual(tool.input_schema.properties.category.enum, BROWSER_TOOL_CATEGORIES); - assert.match(tool.description, /do not prefix/i); + assert.match(tool.description, /rendered UI/i); + assert.match(tool.promptSnippet, /Load the gsd-browser skill/i); }); it("keeps browser method registry and categories explicit", () => { @@ -93,11 +98,6 @@ describe("browser tool registration", () => { "mock_route", "block_urls", "clear_routes", - "save_state", - "restore_state", - "vault_save", - "vault_login", - "vault_list", "batch", ]); assert.equal(BROWSER_METHOD_CATEGORY.eval, "external_effect"); @@ -127,13 +127,13 @@ describe("browser tool registration", () => { assert.equal(tools.some((tool) => tool.name === "ask_human"), true); }); - it("filters a pi-registered gsd_browser when no browser grant exists", () => { + it("adds the ambient gsd_browser when no browser grant exists", () => { const tools = mergeClaudeCliTools([ { name: "gsd_browser", description: "Registered browser tool", parameters: {} }, { name: "ask_human", description: "Ask", parameters: {} }, ], undefined); - assert.equal(tools.some((tool) => tool.name === "gsd_browser"), false); + assert.equal(tools.some((tool) => tool.name === "gsd_browser"), true); assert.equal(tools.some((tool) => tool.name === "ask_human"), true); }); }); diff --git a/internal/pi/extension/gsd-browser-skill/SKILL.md b/internal/pi/extension/gsd-browser-skill/SKILL.md new file mode 100644 index 0000000..3ff9c57 --- /dev/null +++ b/internal/pi/extension/gsd-browser-skill/SKILL.md @@ -0,0 +1,26 @@ +--- +name: gsd-browser +description: Use when a task needs browser automation, website interaction, rendered UI verification, navigation, form filling, screenshots, responsive testing, login flows, console/network inspection, visual diffs, prompt injection checks, or live browser observation. +--- + +# GSD Browser + +Use `gsd_browser` when rendered browser behavior is evidence. Prefer normal code and terminal tools for source-code inspection. + +## Workflow + +1. Navigate with `method: "navigate"`. +2. Snapshot with `method: "snapshot"` before clicking or filling. +3. Use refs from snapshots for interactions. +4. Re-snapshot after navigation, submission, DOM changes, or modal changes. +5. Capture meaningful screenshots or snapshots when they help future review. +6. Check console and network when validating web app behavior. +7. State intent before multi-step browser work. + +## Risk + +Ask for approval before login submission, payment, checkout, destructive account changes, credential auth, external-effect actions, network mutation, or vault operations. + +## Recovery + +On stale refs, run a new snapshot. On navigation failure, inspect console/network and retry only when safe. On missing runtime, explain that the local machine needs `gsd-browser` installed or updated. diff --git a/internal/pi/extension/index.ts b/internal/pi/extension/index.ts index b457781..1e4ac1b 100644 --- a/internal/pi/extension/index.ts +++ b/internal/pi/extension/index.ts @@ -8,9 +8,6 @@ */ import crypto from "node:crypto"; -import net from "node:net"; -import os from "node:os"; -import path from "node:path"; import { createSdkMcpServer, query, @@ -45,11 +42,11 @@ import { registerOpenRouterProvider } from "./openrouter-provider.js"; import { WarmClaudeSdkWorker } from "./claude-sdk-worker.js"; import { registerSubagentTool } from "./subagent.js"; import { - BROWSER_TOOL_CATEGORIES, - BROWSER_TOOL_METHODS, - BrowserToolCategorySchema, - BrowserToolMethodSchema, -} from "./browser-methods.js"; + browserGrantFromEnv, + browserToolDefinition, + registerBrowserExtension, + type BrowserGrant, +} from "./browser-extension.js"; import { filterToolsByPolicy, hasSubagentToolPolicy, @@ -91,12 +88,6 @@ type ActiveToolCall = { jsonAcc: string; }; -type BrowserGrant = { - grantId: string; - browserId: string; - sessionId: string; -}; - function isWriteEpipe(err: unknown) { return isRecord(err) && err.code === "EPIPE" && err.syscall === "write"; } @@ -113,44 +104,8 @@ function installClaudeSdkPipeGuard() { installClaudeSdkPipeGuard(); -const BrowserToolParams = Type.Object({ - method: BrowserToolMethodSchema, - category: Type.Optional(BrowserToolCategorySchema), - params: Type.Optional(Type.Record(Type.String(), Type.Any())), -}); - -function browserToolDefinition() { - return { - name: "gsd_browser", - label: "GSD Browser", - description: - "Use the active task-scoped GSD shared browser session for page navigation, inspection, ref-based interaction, screenshots, network controls, auth state, traces, and artifacts. Prefer snapshot with refs before interacting. Pass bare method names such as navigate, snapshot, click_ref, visual_diff, or vault_login; do not prefix methods with browser.", - parameters: BrowserToolParams, - input_schema: { - type: "object", - additionalProperties: false, - properties: { - method: { - type: "string", - enum: BROWSER_TOOL_METHODS, - description: - "Bare browser operation name. Use navigate, not browser.navigate.", - }, - category: { - type: "string", - enum: BROWSER_TOOL_CATEGORIES, - description: - "Optional method classification for UI and policy. The daemon executes method and params.", - }, - params: { type: "object", additionalProperties: true }, - }, - required: ["method"], - }, - }; -} - export function buildClaudeCliBrowserTools(context: { browserGrant?: BrowserGrant }) { - return context.browserGrant ? [browserToolDefinition()] : []; + return [browserToolDefinition()]; } function piToolName(toolDef: PiTool | ReturnType) { @@ -302,14 +257,6 @@ function registerVisibleTool( return registered; } -function browserGrantFromEnv() { - const grantId = process.env.GSD_BROWSER_GRANT_ID; - const browserId = process.env.GSD_BROWSER_ID; - const sessionId = process.env.GSD_BROWSER_SESSION_ID; - if (!grantId || !browserId || !sessionId) return undefined; - return { grantId, browserId, sessionId }; -} - function warmClaudeOptionsKey(model: Model, context: Context) { const tools = ((context.tools as PiTool[] | undefined) ?? []).map((toolDef) => toolDef.name).sort(); return JSON.stringify({ @@ -326,82 +273,6 @@ function resetWarmClaudeWorker() { warmClaudeOptionsSignature = ""; } -async function browserRpc(browserId: string, method: string, params: unknown, signal?: AbortSignal) { - const socketPath = path.join(os.homedir(), ".gsd-browser", "sessions", browserId, "daemon.sock"); - const payload = Buffer.from(JSON.stringify({ - jsonrpc: "2.0", - id: Date.now(), - method: "cloud_tool", - params: { method, params: params ?? {} }, - })); - const header = Buffer.alloc(4); - header.writeUInt32BE(payload.length, 0); - - return await new Promise((resolve, reject) => { - const socket = net.createConnection(socketPath); - const chunks: Buffer[] = []; - let expected = 0; - let settled = false; - - const cleanup = () => { - signal?.removeEventListener("abort", onAbort); - socket.removeAllListeners("timeout"); - }; - const rejectOnce = (err: Error) => { - if (settled) return; - settled = true; - cleanup(); - socket.destroy(); - reject(err); - }; - const resolveOnce = (value: unknown) => { - if (settled) return; - settled = true; - cleanup(); - socket.end(); - resolve(value); - }; - const onAbort = () => rejectOnce(new Error("browser tool aborted")); - - if (signal?.aborted) { - rejectOnce(new Error("browser tool aborted")); - return; - } - signal?.addEventListener("abort", onAbort, { once: true }); - socket.setTimeout(30_000, () => rejectOnce(new Error("browser tool timed out"))); - socket.on("connect", () => { - socket.write(Buffer.concat([header, payload]), (err) => { - if (err) rejectOnce(err); - }); - }); - socket.on("data", (chunk) => { - chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); - const all = Buffer.concat(chunks); - if (expected === 0 && all.length >= 4) expected = all.readUInt32BE(0); - if (expected > 16 * 1024 * 1024) { - rejectOnce(new Error(`browser rpc frame too large: ${expected}`)); - return; - } - if (expected > 0 && all.length >= expected + 4) { - try { - const response = JSON.parse(all.subarray(4, expected + 4).toString("utf8")); - if (response.error) rejectOnce(new Error(response.error.message ?? "browser tool failed")); - else resolveOnce(response.result ?? {}); - } catch (err) { - rejectOnce(err instanceof Error ? err : new Error(String(err))); - } - } - }); - socket.on("end", () => rejectOnce(new Error("browser rpc ended before response"))); - socket.on("close", (hadError) => { - if (!settled) { - rejectOnce(new Error(hadError ? "browser rpc socket closed after error" : "browser rpc socket closed before response")); - } - }); - socket.on("error", (err) => rejectOnce(err)); - }); -} - function isRecord(value: unknown): value is Record { return typeof value === "object" && value !== null && !Array.isArray(value); } @@ -1016,51 +887,15 @@ function registerAskHumanTool(pi: ExtensionAPI, registeredTools?: ToolRegistrati registeredTools?.push(describeRegisteredTool("human", definition)); } -function registerBrowserTool(pi: ExtensionAPI, registeredTools?: ToolRegistrationDiagnostic[]) { - const definition = browserToolDefinition(); - const registeredDefinition = { - name: definition.name, - label: definition.label, - description: definition.description, - parameters: definition.parameters, - async execute(_toolCallId: string, params: any, signal?: AbortSignal) { - const browserGrant = browserGrantFromEnv(); - if (!browserGrant) { - return { - content: [{ type: "text", text: "No task-scoped browser grant is active." }], - isError: true, - details: {}, - }; - } - try { - const result = await browserRpc(browserGrant.browserId, params.method, params.params ?? {}, signal); - return { - content: [{ type: "text", text: JSON.stringify(result) }], - isError: false, - details: { browserId: browserGrant.browserId, grantId: browserGrant.grantId }, - }; - } catch (err) { - return { - content: [{ type: "text", text: err instanceof Error ? err.message : String(err) }], - isError: true, - details: { browserId: browserGrant.browserId, grantId: browserGrant.grantId }, - }; - } - }, - }; - pi.registerTool(registeredDefinition as any); - registeredTools?.push(describeRegisteredTool("browser", registeredDefinition)); -} - export default function (pi: ExtensionAPI) { const subagentAllowedTools = parseAllowedTools(process.env.GSD_SUBAGENT_ALLOWED_TOOLS); - const browserGrant = browserGrantFromEnv(); const profile = toolProfile(); const registeredTools: ToolRegistrationDiagnostic[] = []; if (!isMinimalToolProfile()) { registerAskHumanTool(pi, registeredTools); - if (browserGrant && (isToolAllowed("browser", subagentAllowedTools) || !hasSubagentToolPolicy())) { - registerBrowserTool(pi, registeredTools); + if (isToolAllowed("browser", subagentAllowedTools) || !hasSubagentToolPolicy()) { + registerBrowserExtension(pi); + registeredTools.push(describeRegisteredTool("browser", browserToolDefinition())); } registerTrackedTool(pi, registeredTools, "human", askUserQuestionsTool as any); for (const backgroundTool of backgroundTools) { diff --git a/internal/relay/conn.go b/internal/relay/conn.go index 5909c1b..dd6b01e 100644 --- a/internal/relay/conn.go +++ b/internal/relay/conn.go @@ -9,11 +9,13 @@ import ( "log/slog" "math/rand" "net/http" + "os" "sync" "sync/atomic" "time" "github.com/coder/websocket" + "github.com/gsd-build/daemon/internal/browser" "github.com/gsd-build/daemon/internal/preview" protocol "github.com/gsd-build/protocol-go" ) @@ -98,6 +100,9 @@ func (c *Client) Connect(ctx context.Context, activeTasks []string) (*protocol.W } conn.SetReadLimit(1 << 20) // 1 MB + browserPath := os.Getenv("GSD_BROWSER_PATH") + runtime := browser.ProbeRuntime(ctx, browserPath) + // Send Hello hello := protocol.Hello{ Type: protocol.MsgTypeHello, @@ -117,12 +122,21 @@ func (c *Client) Connect(ctx context.Context, activeTasks []string) (*protocol.W PreviewWebSocketProtocols: true, LocalServerDetection: true, Skills: true, - BrowserSessions: true, - BrowserFrameStream: true, - BrowserUserControl: true, - BrowserIdentities: true, - BrowserSensitiveActionApproval: true, + BrowserSessions: runtime.Ready, + BrowserFrameStream: runtime.Ready, + BrowserUserControl: runtime.Ready, + BrowserIdentities: runtime.Ready, + BrowserSensitiveActionApproval: runtime.Ready, BrowserMaxFrameBytes: 262144, + BrowserRuntimeInstalled: runtime.Installed, + BrowserRuntimeVersion: runtime.Version, + BrowserRuntimeMinVersion: runtime.MinVersion, + BrowserRuntimeMinVersionOK: runtime.MinVersionOK, + BrowserRuntimePath: runtime.Path, + BrowserRuntimeErrorCode: runtime.ErrorCode, + BrowserRuntimeErrorMessage: runtime.ErrorMessage, + BrowserCloudMethodsVersion: runtime.CloudMethodsVersion, + BrowserChromeAvailable: runtime.ChromeAvailable, }, } buf, err := json.Marshal(hello) diff --git a/internal/session/actor.go b/internal/session/actor.go index 34e6e77..62c5e38 100644 --- a/internal/session/actor.go +++ b/internal/session/actor.go @@ -60,6 +60,9 @@ type Options struct { Uploader ImageUploader // nil = image upload disabled BrowserGrantID string BrowserID string + BrowserGrant *protocol.BrowserGrantContext + BrowserRuntime BrowserRuntimeSnapshot + BrowserRPCSocket string RecordTouchedFile func(channelID string, cwd string, path string) OnTaskIdle func() ProjectID string @@ -160,6 +163,9 @@ type taskContext struct { DisableSkills bool BrowserGrantID string BrowserID string + BrowserGrant *protocol.BrowserGrantContext + BrowserRuntime BrowserRuntimeSnapshot + BrowserRPCSocket string PlanCapability *protocol.PlanCapability PlanRuntime *planRuntimeReporter } @@ -352,6 +358,23 @@ func (a *Actor) SetBrowserContext(grantID string, browserID string) { a.opts.BrowserID = browserID } +type BrowserRuntimeSnapshot struct { + ErrorCode string + ErrorMessage string + Version string +} + +func (a *Actor) SetBrowserGrant(grant *protocol.BrowserGrantContext, runtime BrowserRuntimeSnapshot, rpcSocket string) { + a.taskMu.Lock() + defer a.taskMu.Unlock() + a.opts.BrowserGrant = grant + a.opts.BrowserRuntime = runtime + a.opts.BrowserRPCSocket = rpcSocket + if grant != nil { + a.opts.BrowserGrantID = grant.GrantID + } +} + // SendTask queues a task for execution. Non-blocking if the channel has capacity. func (a *Actor) SendTask(task protocol.Task) error { select { @@ -643,6 +666,9 @@ func (a *Actor) executeTask(ctx context.Context, task protocol.Task) error { DisableSkills: task.DisableSkills, BrowserGrantID: a.opts.BrowserGrantID, BrowserID: a.opts.BrowserID, + BrowserGrant: a.opts.BrowserGrant, + BrowserRuntime: a.opts.BrowserRuntime, + BrowserRPCSocket: a.opts.BrowserRPCSocket, PlanCapability: task.PlanCapability, } tc.PlanRuntime = newPlanRuntimeReporter(tc.TaskID, tc.PlanCapability) @@ -928,30 +954,41 @@ func (a *Actor) runPiExecutor(actorCtx context.Context, taskCtx context.Context, } opts := pi.Options{ - BinaryPath: binaryPath, - CWD: a.opts.CWD, - Model: model, - ResumeSession: sessionFile, - TaskID: tc.TaskID, - SessionID: a.opts.SessionID, - ChannelID: tc.ChannelID, - Prompt: prompt, - CustomInstructions: tc.CustomInstructions, - ExtensionPath: a.opts.PiExtensionPath, - Provider: provider, - SkillPaths: skillPaths, - DisableSkills: tc.DisableSkills, - BrowserGrantID: tc.BrowserGrantID, - BrowserID: tc.BrowserID, - BrowserSessionID: a.opts.SessionID, - WarmClaudeSDK: a.opts.WarmClaudeSDK, - PlanCapability: tc.PlanCapability, - DaemonSocketPath: a.opts.DaemonSocketPath, - SubagentAuthToken: subagentAuthToken, - ParentSessionID: a.opts.SessionID, - AgentDir: a.opts.AgentDir, - SubagentsPrompt: subagentsPrompt, - ToolProfile: inferToolProfile(prompt), + BinaryPath: binaryPath, + CWD: a.opts.CWD, + Model: model, + ResumeSession: sessionFile, + TaskID: tc.TaskID, + SessionID: a.opts.SessionID, + ChannelID: tc.ChannelID, + Prompt: prompt, + CustomInstructions: tc.CustomInstructions, + ExtensionPath: a.opts.PiExtensionPath, + Provider: provider, + SkillPaths: skillPaths, + DisableSkills: tc.DisableSkills, + BrowserGrantID: tc.BrowserGrantID, + BrowserID: tc.BrowserID, + BrowserSessionID: a.opts.SessionID, + BrowserRPCSocket: tc.BrowserRPCSocket, + BrowserRuntimeErrorCode: tc.BrowserRuntime.ErrorCode, + BrowserRuntimeErrorMessage: tc.BrowserRuntime.ErrorMessage, + BrowserRuntimeVersion: tc.BrowserRuntime.Version, + WarmClaudeSDK: a.opts.WarmClaudeSDK, + PlanCapability: tc.PlanCapability, + DaemonSocketPath: a.opts.DaemonSocketPath, + SubagentAuthToken: subagentAuthToken, + ParentSessionID: a.opts.SessionID, + AgentDir: a.opts.AgentDir, + SubagentsPrompt: subagentsPrompt, + ToolProfile: inferToolProfile(prompt), + } + if tc.BrowserGrant != nil { + opts.BrowserGrantID = tc.BrowserGrant.GrantID + opts.BrowserSessionID = tc.BrowserGrant.SessionID + opts.BrowserProjectID = tc.BrowserGrant.ProjectID + opts.BrowserMachineID = tc.BrowserGrant.MachineID + opts.BrowserGrantExpiresAt = tc.BrowserGrant.ExpiresAt } if a.opts.AgentTools != nil { control, err := a.opts.AgentTools.StartTask(taskCtx, agentterminal.TaskScope{