From 7c54fe66ad0807cd17fb053d84e2d3305b4902c0 Mon Sep 17 00:00:00 2001 From: Mohit Sahoo Date: Mon, 25 May 2026 02:59:25 +0530 Subject: [PATCH 1/3] feat(appshots): support Linux capture - Enable AppShots availability and hotkey routing on Linux - Capture focused Linux windows through the Computer Use backend - Add screenshot CLI fallbacks plus Rust and patcher tests - Document Linux AppShots usage and backend commands --- CHANGELOG.md | 8 + README.md | 10 +- computer-use-linux/src/appshot.rs | 409 ++++++++++++++++++ computer-use-linux/src/bare_modifier.rs | 331 ++++++++++++++ computer-use-linux/src/main.rs | 44 +- computer-use-linux/src/screenshot.rs | 175 +++++++- scripts/patch-linux-window-ui.js | 10 + scripts/patch-linux-window-ui.test.js | 110 +++++ scripts/patches/appshots.js | 198 +++++++++ .../all-linux/main-process/appshots/patch.js | 23 + .../core/all-linux/webview/appshots/patch.js | 29 ++ 11 files changed, 1341 insertions(+), 6 deletions(-) create mode 100644 computer-use-linux/src/appshot.rs create mode 100644 computer-use-linux/src/bare_modifier.rs create mode 100644 scripts/patches/appshots.js create mode 100644 scripts/patches/core/all-linux/main-process/appshots/patch.js create mode 100644 scripts/patches/core/all-linux/webview/appshots/patch.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 52fa3471..4d5214b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/). forces native Wayland with GPU compositing enabled and skips forced renderer accessibility by default for Wayland desktops where XWayland or software rendering is unstable. +- Linux AppShots now use the bundled Computer Use backend to attach the focused + Linux window with metadata, AT-SPI text, and a screenshot. The ASAR patcher + exposes the upstream AppShots composer control and global double-modifier + hotkey on Linux and routes capture updates through the same renderer event + contract as macOS. +- Linux screenshot capture now falls back to common desktop tools (`grim`, + `gnome-screenshot`, `spectacle`, ImageMagick `import`) when GNOME Shell DBus + and XDG Desktop Portal capture are unavailable. - New opt-in Linux feature `read-aloud-mcp` that stages a standalone Rust Read Aloud MCP plugin with `doctor`, `read_aloud`, and `stop` tools. The MCP server reuses the Kokoro runner/model configuration from the Read Aloud UI feature diff --git a/README.md b/README.md index a52bf77b..7aaf3749 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ Anything systemd-based should work for the optional auto-updater service (`syste | Multi-instance launcher | ๐Ÿงช opt-in | `--new-instance` or `CODEX_MULTI_LAUNCH=1` allocates a bounded webview port and isolated Electron profile | | GUI install prompts (`kdialog` / `zenity`) | โœ… if installed | Falls back to interactive terminal prompt | | Linux browser annotations | โœ… always | Stored-anchor screenshots, isolated marker rendering | +| Linux AppShots | โœ… always | Global hotkey and composer capture attach the focused Linux window with screenshot and AT-SPI text through the bundled Computer Use backend | | Chrome plugin native host | โœ… always | Auto-installs the upstream Chrome plugin plus Linux native-messaging support for Chrome, Brave, and Chromium | | Linux Computer Use | โš ๏ธ opt-in | MCP backend registers by default; the in-app UI is opt-in. Supports screenshots, accessibility, window targeting, and input synthesis | | Linux Read Aloud | ๐Ÿงช opt-in experiment | `linux-features/read-aloud` adds an explicit response speaker button; `linux-features/read-aloud-mcp` stages a separate MCP plugin so the agent can read text aloud on request | @@ -210,7 +211,7 @@ The scheduled `Populate Cachix` workflow builds the default Codex Desktop packag Linux Computer Use is an **opt-in** plugin that lets Codex inspect and control desktop apps on Linux through a native Rust MCP backend (`codex-computer-use-linux`). It is designed and maintained by [@avifenesh](https://github.com/avifenesh) and supports: - app listing and accessibility trees via AT-SPI -- screenshots through GNOME Shell DBus or XDG Desktop Portal +- screenshots through GNOME Shell DBus, XDG Desktop Portal, or CLI fallbacks such as `grim`, `gnome-screenshot`, `spectacle`, and ImageMagick `import` - window listing and focusing on GNOME, KWin/Plasma, Hyprland, and i3 - keyboard, text, click, scroll, and drag input through a uinput absolute pointer, the XDG Desktop Portal RemoteDesktop session, or `ydotool` @@ -263,9 +264,16 @@ You can also invoke the backend binary directly: ./codex-app/resources/plugins/openai-bundled/plugins/computer-use/bin/codex-computer-use-linux setup # enables GNOME accessibility ./codex-app/resources/plugins/openai-bundled/plugins/computer-use/bin/codex-computer-use-linux apps # lists running apps via AT-SPI ./codex-app/resources/plugins/openai-bundled/plugins/computer-use/bin/codex-computer-use-linux windows # lists targetable windows +./codex-app/resources/plugins/openai-bundled/plugins/computer-use/bin/codex-computer-use-linux focused-window ./codex-app/resources/plugins/openai-bundled/plugins/computer-use/bin/codex-computer-use-linux screenshot +./codex-app/resources/plugins/openai-bundled/plugins/computer-use/bin/codex-computer-use-linux appshot [APP_NAME|pid:PID] ``` +For the full AppShots UI path, use the AppShots hotkey in the running app. On +Linux the default is pressing both Shift keys at once. The AppShots settings +page also offers both Alt keys, plus `Ctrl+Alt+A` as a normal-accelerator +fallback. + ### Enabling Computer Use UI By default the MCP backend registers, but the Codex Desktop sidebar does not surface the Computer Use controls. If you want to use it through the in-app UI, opt in by setting one of: diff --git a/computer-use-linux/src/appshot.rs b/computer-use-linux/src/appshot.rs new file mode 100644 index 00000000..e79e6f55 --- /dev/null +++ b/computer-use-linux/src/appshot.rs @@ -0,0 +1,409 @@ +use crate::atspi_tree::{snapshot_tree, AccessibilityNode}; +use crate::screenshot::{capture_screenshot, ScreenshotCapture}; +use crate::windows::{focused_window, WindowBounds, WindowInfo}; +use anyhow::{Context, Result}; +use base64::{engine::general_purpose::STANDARD, Engine}; +use image::{DynamicImage, ImageFormat}; +use serde::Serialize; +use std::io::Cursor; + +const MAX_APP_FILTER_CHARS: usize = 256; +const MAX_AX_TEXT_CHARS: usize = 60_000; +const MAX_FIELD_CHARS: usize = 500; +const SNAPSHOT_NODE_LIMIT: usize = 220; +const SNAPSHOT_DEPTH_LIMIT: u32 = 12; + +#[derive(Debug, Clone, Serialize)] +pub struct AppshotCapture { + pub focused_window: Option, + pub focused_window_error: Option, + pub screenshot: Option, + pub screenshot_error: Option, + pub accessibility_nodes: Vec, + pub accessibility_error: Option, + pub accessibility_text: String, +} + +pub async fn capture_appshot(app_filter: Option<&str>) -> AppshotCapture { + let (focused_window, focused_window_error) = match focused_window().await { + Ok(window) => (window, None), + Err(error) => (None, Some(format!("{error:#}"))), + }; + + let (screenshot, screenshot_error) = match capture_screenshot().await { + Ok(capture) => ( + Some( + crop_capture_to_window(capture, focused_window.as_ref()) + .unwrap_or_else(|capture| capture), + ), + None, + ), + Err(error) => (None, Some(format!("{error:#}"))), + }; + + let selector = accessibility_selector(app_filter, focused_window.as_ref()); + let (accessibility_nodes, accessibility_error) = if selector.app_filter.is_none() + && selector.target_pid.is_none() + { + ( + Vec::new(), + Some("no focused window or app filter was available for AT-SPI capture".to_string()), + ) + } else { + match snapshot_tree( + selector.app_filter.as_deref(), + selector.target_pid, + SNAPSHOT_NODE_LIMIT, + SNAPSHOT_DEPTH_LIMIT, + ) + .await + { + Ok(nodes) => (nodes, None), + Err(error) => (Vec::new(), Some(format!("{error:#}"))), + } + }; + + let accessibility_text = appshot_accessibility_text( + focused_window.as_ref(), + &accessibility_nodes, + accessibility_error.as_deref(), + ); + + AppshotCapture { + focused_window, + focused_window_error, + screenshot, + screenshot_error, + accessibility_nodes, + accessibility_error, + accessibility_text, + } +} + +fn crop_capture_to_window( + capture: ScreenshotCapture, + focused_window: Option<&WindowInfo>, +) -> std::result::Result { + let Some(bounds) = focused_window.and_then(|window| window.bounds.as_ref()) else { + return Err(capture); + }; + let Ok(cropped) = crop_capture_to_bounds(&capture, bounds) else { + return Err(capture); + }; + Ok(cropped) +} + +fn crop_capture_to_bounds( + capture: &ScreenshotCapture, + bounds: &WindowBounds, +) -> Result { + let Some((x, y, width, height)) = crop_rect(capture.width, capture.height, bounds) else { + return Ok(capture.clone()); + }; + + if x == 0 && y == 0 && width == capture.width && height == capture.height { + return Ok(capture.clone()); + } + + let image = decode_png_data_url(&capture.data_url)?; + let cropped = image.crop_imm(x, y, width, height); + let mut bytes = Cursor::new(Vec::new()); + cropped + .write_to(&mut bytes, ImageFormat::Png) + .context("failed to encode cropped screenshot PNG")?; + let encoded = STANDARD.encode(bytes.into_inner()); + + Ok(ScreenshotCapture { + mime_type: "image/png".to_string(), + data_url: format!("data:image/png;base64,{encoded}"), + source: format!("{}:window-crop", capture.source), + width, + height, + }) +} + +fn decode_png_data_url(data_url: &str) -> Result { + let encoded = data_url + .strip_prefix("data:image/png;base64,") + .context("screenshot data URL was not a PNG data URL")?; + let bytes = STANDARD + .decode(encoded) + .context("failed to decode screenshot data URL")?; + image::load_from_memory_with_format(&bytes, ImageFormat::Png) + .context("failed to decode screenshot PNG") +} + +fn crop_rect( + image_width: u32, + image_height: u32, + bounds: &WindowBounds, +) -> Option<(u32, u32, u32, u32)> { + let x = bounds.x.unwrap_or(0).max(0) as u32; + let y = bounds.y.unwrap_or(0).max(0) as u32; + if x >= image_width || y >= image_height { + return None; + } + + let width = bounds.width.min(image_width - x); + let height = bounds.height.min(image_height - y); + if width == 0 || height == 0 { + return None; + } + + Some((x, y, width, height)) +} + +struct AccessibilitySelector { + app_filter: Option, + target_pid: Option, +} + +fn accessibility_selector( + app_filter: Option<&str>, + focused_window: Option<&WindowInfo>, +) -> AccessibilitySelector { + let explicit_filter = app_filter + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(truncate_filter); + let explicit_pid = explicit_filter + .as_deref() + .and_then(|value| value.strip_prefix("pid:")) + .and_then(|value| value.parse::().ok()); + let app_filter = explicit_filter + .filter(|value| !value.starts_with("pid:") && !value.starts_with("window:")) + .or_else(|| { + focused_window.and_then(|window| { + first_non_empty([ + window.app_id.as_deref(), + window.wm_class.as_deref(), + window.title.as_deref(), + ]) + .map(truncate_filter) + }) + }); + + AccessibilitySelector { + app_filter, + target_pid: explicit_pid.or_else(|| focused_window.and_then(|window| window.pid)), + } +} + +pub fn appshot_accessibility_text( + focused_window: Option<&WindowInfo>, + nodes: &[AccessibilityNode], + accessibility_error: Option<&str>, +) -> String { + let mut output = String::new(); + output.push_str("Linux AppShot accessibility snapshot\n"); + if let Some(window) = focused_window { + if let Some(app_name) = + first_non_empty([window.app_id.as_deref(), window.wm_class.as_deref()]) + { + push_capped_line( + &mut output, + &format!("Application: {}", normalize_field(app_name)), + ); + } + if let Some(title) = window + .title + .as_deref() + .filter(|value| !value.trim().is_empty()) + { + let app_name = first_non_empty([window.app_id.as_deref(), window.wm_class.as_deref()]) + .map(normalize_field) + .unwrap_or_else(|| "Linux app".to_string()); + push_capped_line( + &mut output, + &format!( + "Window: \"{}\", App: {}", + normalize_field(title).replace('"', "'"), + app_name + ), + ); + } + if let Some(pid) = window.pid { + push_capped_line(&mut output, &format!("Process ID: {pid}")); + } + } + if let Some(error) = accessibility_error.filter(|value| !value.trim().is_empty()) { + push_capped_line( + &mut output, + &format!("Accessibility error: {}", normalize_field(error)), + ); + } + if nodes.is_empty() { + push_capped_line(&mut output, "No accessible UI elements were captured."); + return output; + } + + output.push('\n'); + output.push_str("Elements:\n"); + for node in nodes { + if output.len() >= MAX_AX_TEXT_CHARS { + output.push_str("...\n"); + break; + } + push_capped_line(&mut output, &node_line(node)); + } + + output +} + +fn node_line(node: &AccessibilityNode) -> String { + let indent = " ".repeat(node.depth.min(12) as usize); + let mut parts = vec![node.role.clone()]; + if let Some(name) = node.name.as_deref().map(normalize_field) { + parts.push(format!("name=\"{}\"", name.replace('"', "'"))); + } + if let Some(description) = node.description.as_deref().map(normalize_field) { + parts.push(format!("description=\"{}\"", description.replace('"', "'"))); + } + if let Some(text) = node + .text + .as_ref() + .and_then(|text| text.content.as_deref()) + .map(normalize_field) + { + parts.push(format!("text=\"{}\"", text.replace('"', "'"))); + } + if let Some(value) = node + .value + .as_ref() + .and_then(|value| value.text.as_deref()) + .map(normalize_field) + { + parts.push(format!("value=\"{}\"", value.replace('"', "'"))); + } + if let Some(bounds) = node.bounds.as_ref() { + parts.push(format!( + "bounds={}x{}+{}+{}", + bounds.width, bounds.height, bounds.x, bounds.y + )); + } + if !node.states.is_empty() { + parts.push(format!("states={}", node.states.join(","))); + } + + format!("{indent}- {}", parts.join(" ")) +} + +fn first_non_empty<'a, const N: usize>(values: [Option<&'a str>; N]) -> Option<&'a str> { + values + .into_iter() + .flatten() + .map(str::trim) + .find(|value| !value.is_empty()) +} + +fn truncate_filter(value: &str) -> String { + truncate_chars(value, MAX_APP_FILTER_CHARS) +} + +fn normalize_field(value: &str) -> String { + truncate_chars(&collapse_whitespace(value), MAX_FIELD_CHARS) +} + +fn collapse_whitespace(value: &str) -> String { + value.split_whitespace().collect::>().join(" ") +} + +fn truncate_chars(value: &str, max_chars: usize) -> String { + let mut iter = value.chars(); + let truncated = iter.by_ref().take(max_chars).collect::(); + if iter.next().is_some() { + format!("{truncated}...") + } else { + truncated + } +} + +fn push_capped_line(output: &mut String, line: &str) { + if output.len() >= MAX_AX_TEXT_CHARS { + return; + } + let remaining = MAX_AX_TEXT_CHARS - output.len(); + if line.len() + 1 <= remaining { + output.push_str(line); + output.push('\n'); + return; + } + + let slice = truncate_chars(line, remaining.saturating_sub(4)); + output.push_str(&slice); + output.push_str("...\n"); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::windows::{WindowBounds, GNOME_SHELL_INTROSPECT_BACKEND}; + + fn window() -> WindowInfo { + WindowInfo { + window_id: 42, + title: Some("Codex Desktop".to_string()), + app_id: Some("codex-desktop".to_string()), + wm_class: Some("codex-desktop".to_string()), + pid: Some(1234), + bounds: Some(WindowBounds { + x: Some(10), + y: Some(20), + width: 800, + height: 600, + }), + workspace: None, + focused: true, + hidden: false, + client_type: Some("wayland".to_string()), + backend: GNOME_SHELL_INTROSPECT_BACKEND.to_string(), + terminal: None, + } + } + + #[test] + fn accessibility_selector_prefers_explicit_pid() { + let selector = accessibility_selector(Some("pid:999"), Some(&window())); + + assert_eq!(selector.app_filter, Some("codex-desktop".to_string())); + assert_eq!(selector.target_pid, Some(999)); + } + + #[test] + fn accessibility_text_includes_renderer_window_title_hint() { + let text = appshot_accessibility_text(Some(&window()), &[], None); + + assert!(text.contains("Application: codex-desktop")); + assert!(text.contains("Window: \"Codex Desktop\", App: codex-desktop")); + assert!(text.contains("No accessible UI elements were captured.")); + } + + #[test] + fn crop_rect_clamps_window_bounds_to_screenshot() { + let rect = crop_rect( + 100, + 80, + &WindowBounds { + x: Some(10), + y: Some(20), + width: 200, + height: 200, + }, + ); + + assert_eq!(rect, Some((10, 20, 90, 60))); + assert_eq!( + crop_rect( + 100, + 80, + &WindowBounds { + x: Some(200), + y: Some(0), + width: 10, + height: 10, + }, + ), + None + ); + } +} diff --git a/computer-use-linux/src/bare_modifier.rs b/computer-use-linux/src/bare_modifier.rs new file mode 100644 index 00000000..68a93405 --- /dev/null +++ b/computer-use-linux/src/bare_modifier.rs @@ -0,0 +1,331 @@ +use anyhow::{bail, Context, Result}; +use std::collections::HashMap; +use std::io::{BufRead, BufReader, Write}; +use std::process::{Command, Stdio}; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum EventKind { + Press, + Release, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum TriggerMode { + Press, + Release, +} + +#[derive(Debug)] +struct KeyPair { + canonical: &'static str, + left_symbols: &'static [&'static str], + right_symbols: &'static [&'static str], + fallback_left: u16, + fallback_right: u16, +} + +#[derive(Debug)] +struct MonitorState { + left_down: bool, + right_down: bool, + armed: bool, + trigger_mode: TriggerMode, +} + +pub fn run(args: I) -> Result<()> +where + I: IntoIterator, +{ + let (key, trigger_mode) = parse_args(args)?; + let pair = + key_pair(&key).with_context(|| format!("unsupported bare modifier hotkey '{key}'"))?; + let (left_code, right_code) = resolve_keycodes(pair)?; + monitor_xinput(left_code, right_code, trigger_mode) +} + +fn parse_args(args: I) -> Result<(String, TriggerMode)> +where + I: IntoIterator, +{ + let mut key = None; + let mut trigger_mode = TriggerMode::Press; + let mut args = args.into_iter(); + + while let Some(arg) = args.next() { + match arg.as_str() { + "--key" => { + key = args.next(); + if key.is_none() { + bail!("--key requires a value"); + } + } + "--immediate" => trigger_mode = TriggerMode::Press, + "--trigger-on-release" => trigger_mode = TriggerMode::Release, + _ if key.is_none() && !arg.starts_with('-') => key = Some(arg), + _ => bail!("unknown bare modifier monitor argument '{arg}'"), + } + } + + let key = key.context("missing --key")?; + Ok((key, trigger_mode)) +} + +fn key_pair(key: &str) -> Option<&'static KeyPair> { + let normalized = normalize_key(key); + KEY_PAIRS.iter().find(|pair| { + normalize_key(pair.canonical) == normalized + || pair + .aliases() + .iter() + .any(|alias| normalize_key(alias) == normalized) + }) +} + +fn normalize_key(key: &str) -> String { + key.chars() + .filter(|ch| !ch.is_ascii_whitespace() && *ch != '-' && *ch != '_') + .flat_map(char::to_lowercase) + .collect() +} + +fn resolve_keycodes(pair: &KeyPair) -> Result<(u16, u16)> { + let xmodmap = read_xmodmap_keycodes().unwrap_or_default(); + let left = find_keysym_code(&xmodmap, pair.left_symbols).unwrap_or(pair.fallback_left); + let right = find_keysym_code(&xmodmap, pair.right_symbols).unwrap_or(pair.fallback_right); + if left == right { + bail!( + "left and right {} keys resolved to the same keycode", + pair.canonical + ); + } + Ok((left, right)) +} + +fn read_xmodmap_keycodes() -> Result> { + let output = Command::new("xmodmap") + .arg("-pke") + .output() + .context("failed to run xmodmap")?; + if !output.status.success() { + bail!("xmodmap -pke failed"); + } + Ok(parse_xmodmap(&String::from_utf8_lossy(&output.stdout))) +} + +fn parse_xmodmap(output: &str) -> HashMap { + let mut keycodes = HashMap::new(); + for line in output.lines() { + let Some((prefix, symbols)) = line.split_once('=') else { + continue; + }; + let mut prefix_parts = prefix.split_whitespace(); + if prefix_parts.next() != Some("keycode") { + continue; + } + let Some(code) = prefix_parts + .next() + .and_then(|part| part.parse::().ok()) + else { + continue; + }; + for symbol in symbols.split_whitespace() { + keycodes.entry(symbol.to_string()).or_insert(code); + } + } + keycodes +} + +fn find_keysym_code(keycodes: &HashMap, symbols: &[&str]) -> Option { + symbols + .iter() + .find_map(|symbol| keycodes.get(*symbol).copied()) +} + +fn monitor_xinput(left_code: u16, right_code: u16, trigger_mode: TriggerMode) -> Result<()> { + let mut child = Command::new("xinput") + .args(["test-xi2", "--root"]) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn() + .context("failed to start xinput test-xi2 --root")?; + let stdout = child.stdout.take().context("xinput stdout unavailable")?; + println!("ready"); + std::io::stdout().flush().ok(); + + let mut state = MonitorState { + left_down: false, + right_down: false, + armed: false, + trigger_mode, + }; + let mut pending_event = None; + + for line in BufReader::new(stdout).lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.contains("(RawKeyPress)") { + pending_event = Some(EventKind::Press); + continue; + } + if trimmed.contains("(RawKeyRelease)") { + pending_event = Some(EventKind::Release); + continue; + } + if let Some(kind) = pending_event { + if let Some(code) = parse_detail(trimmed) { + handle_event(&mut state, kind, code, left_code, right_code); + pending_event = None; + } + } + } + + let _ = child.kill(); + Ok(()) +} + +fn parse_detail(line: &str) -> Option { + let detail = line.strip_prefix("detail:")?.trim(); + detail.parse().ok() +} + +fn handle_event( + state: &mut MonitorState, + kind: EventKind, + code: u16, + left_code: u16, + right_code: u16, +) { + let was_chord_down = state.left_down && state.right_down; + + match kind { + EventKind::Press if code == left_code => state.left_down = true, + EventKind::Press if code == right_code => state.right_down = true, + EventKind::Release if code == left_code => state.left_down = false, + EventKind::Release if code == right_code => state.right_down = false, + _ => return, + } + + let chord_down = state.left_down && state.right_down; + if chord_down && !state.armed { + state.armed = true; + if state.trigger_mode == TriggerMode::Press { + println!("down"); + std::io::stdout().flush().ok(); + } + } + + if was_chord_down && !chord_down && state.armed { + if state.trigger_mode == TriggerMode::Release { + println!("down"); + } + println!("up"); + std::io::stdout().flush().ok(); + state.armed = false; + } +} + +impl KeyPair { + fn aliases(&self) -> &'static [&'static str] { + match self.canonical { + "DoubleShift" => &["Shift+Shift", "LeftShift+RightShift"], + "DoubleAlt" => &[ + "Alt+Alt", + "Option+Option", + "DoubleOption", + "LeftAlt+RightAlt", + "LeftOption+RightOption", + ], + "DoubleSuper" => &[ + "Super+Super", + "Meta+Meta", + "Command+Command", + "DoubleCommand", + "DoubleMeta", + "LeftSuper+RightSuper", + "LeftCommand+RightCommand", + "LeftMeta+RightMeta", + ], + "DoubleControl" => &[ + "Control+Control", + "Ctrl+Ctrl", + "DoubleCtrl", + "LeftControl+RightControl", + "LeftCtrl+RightCtrl", + ], + _ => &[], + } + } +} + +static KEY_PAIRS: &[KeyPair] = &[ + KeyPair { + canonical: "DoubleShift", + left_symbols: &["Shift_L"], + right_symbols: &["Shift_R"], + fallback_left: 50, + fallback_right: 62, + }, + KeyPair { + canonical: "DoubleAlt", + left_symbols: &["Alt_L", "Meta_L"], + right_symbols: &["Alt_R", "ISO_Level3_Shift", "Meta_R"], + fallback_left: 64, + fallback_right: 108, + }, + KeyPair { + canonical: "DoubleSuper", + left_symbols: &["Super_L", "Meta_L", "Hyper_L"], + right_symbols: &["Super_R", "Meta_R", "Hyper_R"], + fallback_left: 133, + fallback_right: 134, + }, + KeyPair { + canonical: "DoubleControl", + left_symbols: &["Control_L"], + right_symbols: &["Control_R"], + fallback_left: 37, + fallback_right: 105, + }, +]; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_xmodmap_keycodes() { + let parsed = parse_xmodmap( + "keycode 50 = Shift_L ISO_Prev_Group Shift_L\nkeycode 62 = Shift_R ISO_Next_Group Shift_R\n", + ); + assert_eq!(parsed.get("Shift_L"), Some(&50)); + assert_eq!(parsed.get("Shift_R"), Some(&62)); + } + + #[test] + fn normalizes_modifier_aliases() { + assert_eq!(key_pair("Shift + Shift").unwrap().canonical, "DoubleShift"); + assert_eq!(key_pair("DoubleOption").unwrap().canonical, "DoubleAlt"); + assert_eq!( + key_pair("left_meta + right_meta").unwrap().canonical, + "DoubleSuper" + ); + } + + #[test] + fn emits_press_and_release_once_per_chord() { + let mut state = MonitorState { + left_down: false, + right_down: false, + armed: false, + trigger_mode: TriggerMode::Press, + }; + handle_event(&mut state, EventKind::Press, 50, 50, 62); + assert!(!state.armed); + handle_event(&mut state, EventKind::Press, 62, 50, 62); + assert!(state.armed); + handle_event(&mut state, EventKind::Press, 62, 50, 62); + assert!(state.armed); + handle_event(&mut state, EventKind::Release, 50, 50, 62); + assert!(!state.armed); + } +} diff --git a/computer-use-linux/src/main.rs b/computer-use-linux/src/main.rs index 0735b2c9..52f70c98 100644 --- a/computer-use-linux/src/main.rs +++ b/computer-use-linux/src/main.rs @@ -1,5 +1,7 @@ mod abs_pointer; +mod appshot; mod atspi_tree; +mod bare_modifier; mod cosmic_helper; mod diagnostics; mod gnome_extension; @@ -93,6 +95,44 @@ async fn main() -> Result<()> { ); Ok(()) } + Some("focused-window") => { + let report = match windows::focused_window().await { + Ok(window) => { + let backend = window + .as_ref() + .map(|window| window.backend.as_str()) + .unwrap_or(windows::GNOME_SHELL_INTROSPECT_BACKEND); + serde_json::json!({ + "backend": backend, + "focused_window": window, + "error": null, + "permissions_hint": null, + }) + } + Err(error) => { + let error = format!("{error:#}"); + serde_json::json!({ + "backend": "unavailable", + "focused_window": null, + "error": error, + "permissions_hint": windows::window_permission_hint(&error), + }) + } + }; + println!("{}", serde_json::to_string_pretty(&report)?); + Ok(()) + } + Some("appshot") => { + let app_filter = std::env::args().nth(2); + let capture = appshot::capture_appshot(app_filter.as_deref()).await; + println!( + "{}", + serde_json::to_string_pretty(&capture) + .context("failed to serialize AppShot capture")? + ); + Ok(()) + } + Some("bare-modifier-monitor") => bare_modifier::run(std::env::args().skip(2)), Some("windows") => { let report = match windows::list_windows().await { Ok(windows) => { @@ -135,7 +175,7 @@ async fn main() -> Result<()> { } Some(command) => { anyhow::bail!( - "unknown command '{command}'. Expected one of: mcp, doctor, setup, apps, state, screenshot, windows, setup-window-targeting" + "unknown command '{command}'. Expected one of: mcp, doctor, setup, apps, state, screenshot, focused-window, appshot, bare-modifier-monitor, windows, setup-window-targeting" ); } None => { @@ -147,6 +187,6 @@ async fn main() -> Result<()> { fn print_help() { println!( - "codex-computer-use-linux\n\nUsage:\n codex-computer-use-linux mcp\n codex-computer-use-linux doctor\n codex-computer-use-linux setup\n codex-computer-use-linux setup-window-targeting\n codex-computer-use-linux apps\n codex-computer-use-linux state [APP_NAME]\n codex-computer-use-linux screenshot\n codex-computer-use-linux windows" + "codex-computer-use-linux\n\nUsage:\n codex-computer-use-linux mcp\n codex-computer-use-linux doctor\n codex-computer-use-linux setup\n codex-computer-use-linux setup-window-targeting\n codex-computer-use-linux apps\n codex-computer-use-linux state [APP_NAME]\n codex-computer-use-linux screenshot\n codex-computer-use-linux focused-window\n codex-computer-use-linux appshot [APP_NAME|pid:PID]\n codex-computer-use-linux bare-modifier-monitor --key DoubleShift [--immediate]\n codex-computer-use-linux windows" ); } diff --git a/computer-use-linux/src/screenshot.rs b/computer-use-linux/src/screenshot.rs index bfac45ca..b54684c9 100644 --- a/computer-use-linux/src/screenshot.rs +++ b/computer-use-linux/src/screenshot.rs @@ -7,7 +7,10 @@ use serde::Serialize; use std::{ collections::HashMap, fs, + io::Read, path::{Path, PathBuf}, + process::{Command, Stdio}, + thread, time::{Duration, SystemTime, UNIX_EPOCH}, }; use zbus::{ @@ -41,9 +44,12 @@ pub async fn capture_screenshot() -> Result { Ok(capture) => Ok(capture), Err(gnome_error) => match capture_with_portal().await { Ok(capture) => Ok(capture), - Err(portal_error) => Err(anyhow!( - "GNOME Shell screenshot failed: {gnome_error}; XDG portal screenshot failed: {portal_error}" - )), + Err(portal_error) => match capture_with_cli_fallback().await { + Ok(capture) => Ok(capture), + Err(cli_error) => Err(anyhow!( + "GNOME Shell screenshot failed: {gnome_error}; XDG portal screenshot failed: {portal_error}; CLI screenshot fallback failed: {cli_error}" + )), + }, }, } } @@ -135,6 +141,144 @@ async fn capture_with_portal() -> Result { read_png_as_capture(path, "xdg-desktop-portal", ScreenshotCleanup::Preserve).await } +async fn capture_with_cli_fallback() -> Result { + let mut attempts = Vec::new(); + for candidate in screenshot_command_candidates() { + if !command_exists(candidate.program) { + attempts.push(format!("{} not found", candidate.program)); + continue; + } + + let path = temp_png_path(candidate.source); + let result = run_screenshot_command(&candidate, &path) + .and_then(|_| read_png_as_capture_inner(&path, candidate.source)); + cleanup_gnome_requested_path(&path); + + match result { + Ok(capture) => return Ok(capture), + Err(error) => attempts.push(format!("{} failed: {error:#}", candidate.program)), + } + } + + bail!("{}", attempts.join("; ")) +} + +#[derive(Debug, Clone, Copy)] +struct ScreenshotCommand { + source: &'static str, + program: &'static str, + args: &'static [&'static str], + output_path_arg: OutputPathArg, +} + +#[derive(Debug, Clone, Copy)] +enum OutputPathArg { + Append, + After(&'static str), +} + +fn screenshot_command_candidates() -> Vec { + vec![ + ScreenshotCommand { + source: "grim", + program: "grim", + args: &[], + output_path_arg: OutputPathArg::Append, + }, + ScreenshotCommand { + source: "gnome-screenshot", + program: "gnome-screenshot", + args: &[], + output_path_arg: OutputPathArg::After("-f"), + }, + ScreenshotCommand { + source: "spectacle", + program: "spectacle", + args: &["-b", "-n"], + output_path_arg: OutputPathArg::After("-o"), + }, + ScreenshotCommand { + source: "imagemagick-import", + program: "import", + args: &["-window", "root"], + output_path_arg: OutputPathArg::Append, + }, + ] +} + +fn run_screenshot_command(candidate: &ScreenshotCommand, path: &Path) -> Result<()> { + let path = path + .to_str() + .context("temporary screenshot path is not valid UTF-8")?; + let args = screenshot_command_args(candidate, path); + let mut child = Command::new(candidate.program) + .args(&args) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::piped()) + .spawn() + .with_context(|| format!("failed to spawn {}", candidate.program))?; + let started_at = std::time::Instant::now(); + + loop { + if let Some(status) = child + .try_wait() + .with_context(|| format!("failed to wait for {}", candidate.program))? + { + let mut stderr = String::new(); + if let Some(mut stream) = child.stderr.take() { + let _ = stream.read_to_string(&mut stderr); + } + if status.success() { + return Ok(()); + } + let stderr = stderr.trim(); + if stderr.is_empty() { + bail!("{} exited with status {status}", candidate.program); + } + bail!( + "{} exited with status {status}: {stderr}", + candidate.program + ); + } + + if started_at.elapsed() >= Duration::from_secs(10) { + let _ = child.kill(); + let _ = child.wait(); + bail!("{} timed out", candidate.program); + } + thread::sleep(Duration::from_millis(50)); + } +} + +fn screenshot_command_args(candidate: &ScreenshotCommand, output_path: &str) -> Vec { + let mut args = candidate + .args + .iter() + .map(|arg| (*arg).to_string()) + .collect::>(); + match candidate.output_path_arg { + OutputPathArg::Append => args.push(output_path.to_string()), + OutputPathArg::After(flag) => { + args.push(flag.to_string()); + args.push(output_path.to_string()); + } + } + args +} + +fn command_exists(program: &str) -> bool { + let program_path = Path::new(program); + if program_path.components().count() > 1 { + return program_path.is_file(); + } + + std::env::var_os("PATH") + .into_iter() + .flat_map(|paths| std::env::split_paths(&paths).collect::>()) + .any(|dir| dir.join(program).is_file()) +} + async fn portal_response_stream(connection: &zbus::Connection) -> Result { let response_rule = MatchRule::builder() .msg_type(MessageType::Signal) @@ -291,6 +435,31 @@ mod tests { png } + #[test] + fn builds_cli_screenshot_args() { + let import = ScreenshotCommand { + source: "imagemagick-import", + program: "import", + args: &["-window", "root"], + output_path_arg: OutputPathArg::Append, + }; + assert_eq!( + screenshot_command_args(&import, "/tmp/shot.png"), + vec!["-window", "root", "/tmp/shot.png"] + ); + + let spectacle = ScreenshotCommand { + source: "spectacle", + program: "spectacle", + args: &["-b", "-n"], + output_path_arg: OutputPathArg::After("-o"), + }; + assert_eq!( + screenshot_command_args(&spectacle, "/tmp/shot.png"), + vec!["-b", "-n", "-o", "/tmp/shot.png"] + ); + } + #[test] fn decodes_file_uri_percent_escapes() { assert_eq!( diff --git a/scripts/patch-linux-window-ui.js b/scripts/patch-linux-window-ui.js index fc8a259e..045a39d1 100644 --- a/scripts/patch-linux-window-ui.js +++ b/scripts/patch-linux-window-ui.js @@ -30,6 +30,12 @@ const { applyAutomationScheduleMultiTimePatch, patchAutomationScheduleAssets, } = require("./patches/automation-schedule.js"); +const { + applyLinuxAppshotAvailabilityPatch, + applyLinuxAppshotHotkeyPatch, + applyLinuxAppshotMainProcessPatch, + applyLinuxAppshotSettingsHotkeyPatch, +} = require("./patches/appshots.js"); const { applyLinuxChromePluginAutoInstallPatch, } = require("./patches/chrome-plugin.js"); @@ -155,6 +161,10 @@ module.exports = { applyKeybindsSettingsSectionsPatch, applyKeybindsSettingsSharedPatch, applyLinuxAppSunsetPatch, + applyLinuxAppshotAvailabilityPatch, + applyLinuxAppshotHotkeyPatch, + applyLinuxAppshotMainProcessPatch, + applyLinuxAppshotSettingsHotkeyPatch, applyLinuxAppUpdaterBridgePatch, applyLinuxAppUpdaterMenuPatch, applyLinuxAvatarOverlayMousePassthroughPatch, diff --git a/scripts/patch-linux-window-ui.test.js b/scripts/patch-linux-window-ui.test.js index 5de91127..77f4e408 100644 --- a/scripts/patch-linux-window-ui.test.js +++ b/scripts/patch-linux-window-ui.test.js @@ -14,6 +14,10 @@ const { COMPUTER_USE_UI_SETTINGS_KEY, applyAutomationScheduleMultiTimePatch, applyKeybindsSettingsIndexPatch, + applyLinuxAppshotAvailabilityPatch, + applyLinuxAppshotHotkeyPatch, + applyLinuxAppshotMainProcessPatch, + applyLinuxAppshotSettingsHotkeyPatch, applyLinuxComputerUseFeaturePatch, applyLinuxComputerUseInstallFlowPatch, applyLinuxComputerUsePluginGatePatch, @@ -497,6 +501,8 @@ test("default core patch descriptors are grouped and unique", () => { "linux-single-instance", "linux-computer-use-ui-feature", "linux-computer-use-plugin-gate", + "linux-appshots-main-process", + "linux-appshots-hotkey", "linux-chrome-plugin-auto-install", "browser-use-node-repl-approval", "linux-chrome-extension-status", @@ -514,6 +520,8 @@ test("default core patch descriptors are grouped and unique", () => { "opaque-window-default-webview-index", "opaque-window-default-resolved-theme", "subagent-nickname-metadata-shape", + "linux-appshots-availability", + "linux-appshots-settings-hotkey", "linux-computer-use-ui-availability", "linux-computer-use-install-flow", "linux-app-updater-bridge", @@ -579,6 +587,37 @@ function computerUseGateBundleFixture() { ].join(""); } +function appshotAvailabilityBundleFixture() { + return [ + "import{o as e}from\"./statsig-CpJRDK88.js\";import{t}from\"./use-platform-ByMJlQVq.js\";", + "function n(){let{platform:n}=t(),r=e(`1304276663`);return n===`macOS`&&r}export{n as t};", + ].join(""); +} + +function appshotMainProcessBundleFixture() { + return [ + "var F=`codex_desktop:message-for-view`;function nS(e,t){e.isDestroyed()||e.send(F,t)}", + "\"computer-use-frontmost-window\":async()=>process.platform===`darwin`?Xo():null,", + "\"computer-use-start-capture\":async({animationDestination:e,bundleIdentifier:t,origin:n,requestId:r})=>{if(process.platform!==`darwin`||this.requestComputerUseCaptureWorker==null||this.subscribeComputerUseCaptureWorkerEvent==null)return null;let i=GO({backgroundColor:e.backgroundColor,cornerRadius:e.cornerRadius,primaryTextColor:e.primaryTextColor,viewportFrame:e.viewportFrame,webContents:n});return i==null?null:eS({animationTarget:i,bundleIdentifier:t,origin:n,requestComputerUseCaptureWorker:this.requestComputerUseCaptureWorker,requestId:r,subscribeComputerUseCaptureWorkerEvent:this.subscribeComputerUseCaptureWorkerEvent})}", + ].join(""); +} + +function appshotHotkeyMainBundleFixture() { + return [ + "var uG=`DoubleCommand`,dG=6e4;", + "function rw(e,t=process.platform){return t===`darwin`&&aw(e)!=null}", + "function bw(e,t,r){if(iw(e))return rw(e)?QC(e,t,r?.bareModifierTrigger):null;let i=Ew(e),a=()=>{t.onPressed()},o=n.globalShortcut.register(i,a);return o?{handlesRelease:!1,unregister:()=>{n.globalShortcut.unregister(i)}}:null}", + "function fG({globalState:e,windowManager:n,enabled:r}){let i=e.get(`appshotHotkey`)??uG,a=null,o=()=>({supported:r&&process.platform===`darwin`,configuredHotkey:i,isActive:a!=null}),s=()=>{if(a?.unregister(),a=null,!r||process.platform!==`darwin`||i==null){t.$r().info(`Appshot hotkey inactive`,{safe:{enabled:r,platform:process.platform,configured:i!=null},sensitive:{}});return}if(t.$r().info(`Registering appshot hotkey`,{safe:{hotkey:i},sensitive:{}}),Cw(i,{bareModifierTrigger:`immediatePress`}),a=bw(i,{onPressed:()=>{t.$r().info(`Appshot hotkey pressed`,{safe:{hotkey:i},sensitive:{}});let e=n.getPrimaryWindow();if(e==null||e.isDestroyed()){return}let r=n.wasPrimaryWindowFocusedWithin(e,dG);r||n.sendMessageToWindow(e,{type:`navigate-to-route`,path:`/`,state:{focusComposerNonce:Date.now()}}),n.sendMessageToWindow(e,{type:`appshot-shortcut`})}},{bareModifierTrigger:`immediatePress`}),a==null)throw Error(`Unable to register shortcut: ${i}`);t.$r().info(`Registered appshot hotkey`,{safe:{hotkey:i},sensitive:{}})},c=n=>{if(!r||process.platform!==`darwin`)return{success:!1,error:`Not supported.`,state:o()};if(n!=null){let e=Sw(n);if(e!=null)return{success:!1,error:e,state:o()}}let a=i;i=n;try{s()}catch(e){i=a;return{success:!1,error:e instanceof Error?e.message:String(e),state:o()}}return e.set(`appshotHotkey`,i??void 0),{success:!0,state:o()}};try{s()}catch(e){}return{getState:o,setHotkey:c,dispose:()=>{a?.unregister(),a=null}}}", + ].join(""); +} + +function appshotSettingsBundleFixture() { + return [ + "var O=d(),A=e(t(),1),j=n(),M=[{hotkey:`DoubleCommand`,label:`\\u2318 + \\u2318`},{hotkey:`DoubleOption`,label:`\\u2325 + \\u2325`},{hotkey:`DoubleShift`,label:`\\u21e7 + \\u21e7`}];", + "function N(){let{data:h}=l(`appshot-hotkey-state`,{queryConfig:{enabled:t}}),x=c(`appshot-set-hotkey`);let w=h?.configuredHotkey??null,E=M.find(e=>e.hotkey===w)??null;return E}", + ].join(""); +} + function currentPluginGateBundleFixture() { return [ "var lt=`browser-use`,ut=`chrome`,dt=`chrome-internal`,xt=`chrome-dev`,ft=`computer-use`,pt=`latex-tectonic`;", @@ -1692,6 +1731,77 @@ test("allows bundled Computer Use on Linux as well as macOS", () => { assert.doesNotMatch(patched, /t===`darwin`&&e\.computerUse/); }); +test("enables AppShots availability on Linux", () => { + const patched = applyPatchTwice( + applyLinuxAppshotAvailabilityPatch, + appshotAvailabilityBundleFixture(), + ); + + assert.match(patched, /return n===`linux`\|\|n===`macOS`&&r/); +}); + +test("routes AppShots capture through Linux Computer Use backend", () => { + const patched = applyPatchTwice( + applyLinuxAppshotMainProcessPatch, + appshotMainProcessBundleFixture(), + ); + + assert.match( + patched, + /process\.platform===`linux`\?codexLinuxAppshotFrontmostWindow\(\):process\.platform===`darwin`\?Xo\(\):null/, + ); + assert.match( + patched, + /if\(process\.platform===`linux`\)return codexLinuxAppshotStartCapture\(\{origin:n,requestId:r,bundleIdentifier:t\}\);/, + ); + assert.match(patched, /function codexLinuxAppshotBackendPath/); + assert.match(patched, /type:`metadata`,app:\{bundleIdentifier:a\.bundleIdentifier/); + assert.match(patched, /type:`axText`,text:i\.accessibility_text/); + assert.match(patched, /type:`screenshot`,screenshotDataURL:o/); + assert.match(patched, /type:`completed`,transitionSnapshotDataURL:o/); +}); + +test("enables the AppShots global hotkey on Linux", () => { + const patched = applyPatchTwice( + applyLinuxAppshotHotkeyPatch, + appshotHotkeyMainBundleFixture(), + ); + + assert.match( + patched, + /e\.get\(`appshotHotkey`\)\?\?\(process\.platform===`linux`\?`DoubleShift`:uG\)/, + ); + assert.match( + patched, + /process\.platform===`linux`&&typeof codexLinuxAppshotBareModifierHotkey==`function`&&codexLinuxAppshotBareModifierHotkey\(e\)/, + ); + assert.match( + patched, + /codexLinuxAppshotRegisterBareModifierHotkey\(e,t,r\?\.bareModifierTrigger\)/, + ); + assert.match( + patched, + /supported:r&&\(process\.platform===`darwin`\|\|process\.platform===`linux`\)/, + ); + assert.match(patched, /process\.platform!==`darwin`&&process\.platform!==`linux`/); + assert.match(patched, /type:`appshot-shortcut`/); +}); + +test("shows Linux AppShots hotkey choices in settings", () => { + const patched = applyPatchTwice( + applyLinuxAppshotSettingsHotkeyPatch, + appshotSettingsBundleFixture(), + ); + + assert.match( + patched, + /typeof navigator!=`undefined`&&navigator\.userAgent\.includes\(`Linux`\)\?\[\{hotkey:`DoubleShift`,label:`Shift \+ Shift`\}/, + ); + assert.match(patched, /\{hotkey:`DoubleAlt`,label:`Alt \+ Alt`\}/); + assert.match(patched, /\{hotkey:`Ctrl\+Alt\+A`,label:`Ctrl \+ Alt \+ A`\}/); + assert.match(patched, /:\[\{hotkey:`DoubleCommand`/); +}); + test("adds Keybinds settings route after upstream minified variable drift", () => { const patched = applyPatchTwice(applyKeybindsSettingsIndexPatch, keybindsIndexBundleFixture()); diff --git a/scripts/patches/appshots.js b/scripts/patches/appshots.js new file mode 100644 index 00000000..9013fde3 --- /dev/null +++ b/scripts/patches/appshots.js @@ -0,0 +1,198 @@ +"use strict"; + +const APPSHOT_HELPER_MARKER = "codexLinuxAppshotStartCapture"; +const LINUX_APPSHOT_DEFAULT_HOTKEY = "DoubleShift"; +const LINUX_APPSHOT_FALLBACK_HOTKEY = "Ctrl+Alt+A"; + +function applyLinuxAppshotAvailabilityPatch(currentSource) { + if (currentSource.includes("n===`linux`||n===`macOS`&&r")) { + return currentSource; + } + + let changed = false; + const patchedSource = currentSource.replace( + /return ([A-Za-z_$][\w$]*)===`macOS`&&([A-Za-z_$][\w$]*)/g, + (match, platformVar, flagVar) => { + changed = true; + return `return ${platformVar}===\`linux\`||${platformVar}===\`macOS\`&&${flagVar}`; + }, + ); + + if (changed) { + return patchedSource; + } + + if (currentSource.includes("1304276663") || currentSource.includes("macOS")) { + console.warn("WARN: Could not find AppShots availability gate โ€” skipping Linux AppShots availability patch"); + } + return currentSource; +} + +function applyLinuxAppshotMainProcessPatch(currentSource) { + if (currentSource.includes(APPSHOT_HELPER_MARKER)) { + return currentSource; + } + + let changed = false; + let patchedSource = currentSource.replace( + /"computer-use-frontmost-window":async\(\)=>process\.platform===`darwin`\?([A-Za-z_$][\w$]*)\(\):null/g, + (match, macFrontmostFn) => { + changed = true; + return `"computer-use-frontmost-window":async()=>process.platform===\`linux\`?codexLinuxAppshotFrontmostWindow():process.platform===\`darwin\`?${macFrontmostFn}():null`; + }, + ); + + patchedSource = patchedSource.replace( + /"computer-use-start-capture":async\(\{animationDestination:([A-Za-z_$][\w$]*),bundleIdentifier:([A-Za-z_$][\w$]*),origin:([A-Za-z_$][\w$]*),requestId:([A-Za-z_$][\w$]*)\}\)=>\{if\(process\.platform!==`darwin`\|\|this\.requestComputerUseCaptureWorker==null\|\|this\.subscribeComputerUseCaptureWorkerEvent==null\)return null;/g, + (match, animationDestinationVar, bundleIdentifierVar, originVar, requestIdVar) => { + changed = true; + return `"computer-use-start-capture":async({animationDestination:${animationDestinationVar},bundleIdentifier:${bundleIdentifierVar},origin:${originVar},requestId:${requestIdVar}})=>{if(process.platform===\`linux\`)return codexLinuxAppshotStartCapture({origin:${originVar},requestId:${requestIdVar},bundleIdentifier:${bundleIdentifierVar}});if(process.platform!==\`darwin\`||this.requestComputerUseCaptureWorker==null||this.subscribeComputerUseCaptureWorkerEvent==null)return null;`; + }, + ); + + if (!changed) { + if (currentSource.includes("computer-use-frontmost-window") || currentSource.includes("computer-use-start-capture")) { + console.warn("WARN: Could not find AppShots main-process capture handlers โ€” skipping Linux AppShots main-process patch"); + } + return currentSource; + } + + const sendMessageFn = findMessageForViewSendFunction(currentSource); + if (sendMessageFn == null) { + console.warn("WARN: Could not find renderer message sender โ€” skipping Linux AppShots main-process patch"); + return currentSource; + } + + return appendLinuxAppshotHelper(patchedSource, sendMessageFn); +} + +function applyLinuxAppshotHotkeyPatch(currentSource) { + if (currentSource.includes("process.platform===`linux`?`DoubleShift`")) { + return currentSource; + } + + let changed = false; + let patchedSource = currentSource.replace( + /let ([A-Za-z_$][\w$]*)=([A-Za-z_$][\w$]*)\.get\(`appshotHotkey`\)\?\?([A-Za-z_$][\w$]*),([A-Za-z_$][\w$]*)=null,([A-Za-z_$][\w$]*)=\(\)=>\(\{supported:([A-Za-z_$][\w$]*)&&process\.platform===`darwin`,configuredHotkey:\1,isActive:\4!=null\}\),([A-Za-z_$][\w$]*)=\(\)=>\{if\(\4\?\.unregister\(\),\4=null,!\6\|\|process\.platform!==`darwin`\|\|\1==null\)\{/, + ( + match, + configuredVar, + globalStateVar, + defaultHotkeyVar, + registrationVar, + stateFnVar, + enabledVar, + reconcileFnVar, + ) => { + changed = true; + return `let ${configuredVar}=${globalStateVar}.get(\`appshotHotkey\`)??(process.platform===\`linux\`?\`${LINUX_APPSHOT_DEFAULT_HOTKEY}\`:${defaultHotkeyVar});let ${registrationVar}=null,${stateFnVar}=()=>({supported:${enabledVar}&&(process.platform===\`darwin\`||process.platform===\`linux\`),configuredHotkey:${configuredVar},isActive:${registrationVar}!=null}),${reconcileFnVar}=()=>{if(${registrationVar}?.unregister(),${registrationVar}=null,!${enabledVar}||process.platform!==\`darwin\`&&process.platform!==\`linux\`||${configuredVar}==null){`; + }, + ); + + patchedSource = patchedSource.replace( + /function ([A-Za-z_$][\w$]*)\(([A-Za-z_$][\w$]*),([A-Za-z_$][\w$]*)=process\.platform\)\{return \3===`darwin`&&([A-Za-z_$][\w$]*)\(\2\)!=null\}/, + (match, supportedFn, hotkeyVar, platformVar, canonicalBareModifierFn) => { + changed = true; + return `function ${supportedFn}(${hotkeyVar},${platformVar}=process.platform){return ${platformVar}===\`darwin\`&&${canonicalBareModifierFn}(${hotkeyVar})!=null||${platformVar}===\`linux\`&&typeof codexLinuxAppshotBareModifierHotkey==\`function\`&&codexLinuxAppshotBareModifierHotkey(${hotkeyVar})}`; + }, + ); + + patchedSource = patchedSource.replace( + /if\(([A-Za-z_$][\w$]*)\(([A-Za-z_$][\w$]*)\)\)return ([A-Za-z_$][\w$]*)\(\2\)\?([A-Za-z_$][\w$]*)\(\2,([A-Za-z_$][\w$]*),([A-Za-z_$][\w$]*)\?\.(bareModifierTrigger)\):null;let /, + ( + match, + isBareModifierFn, + hotkeyVar, + supportedFn, + macRegisterFn, + handlersVar, + optionsVar, + triggerProperty, + ) => { + changed = true; + return `if(${isBareModifierFn}(${hotkeyVar}))return process.platform===\`linux\`&&typeof codexLinuxAppshotBareModifierHotkey==\`function\`&&codexLinuxAppshotBareModifierHotkey(${hotkeyVar})?codexLinuxAppshotRegisterBareModifierHotkey(${hotkeyVar},${handlersVar},${optionsVar}?.${triggerProperty}):${supportedFn}(${hotkeyVar})?${macRegisterFn}(${hotkeyVar},${handlersVar},${optionsVar}?.${triggerProperty}):null;let `; + }, + ); + + patchedSource = patchedSource.replace( + /if\(!([A-Za-z_$][\w$]*)\|\|process\.platform!==`darwin`\)return\{success:!1,error:`Not supported\.`,state:([A-Za-z_$][\w$]*)\(\)\};if\(([A-Za-z_$][\w$]*)!=null\)\{/, + (match, enabledVar, stateFnVar, nextHotkeyVar) => { + changed = true; + return `if(!${enabledVar}||process.platform!==\`darwin\`&&process.platform!==\`linux\`)return{success:!1,error:\`Not supported.\`,state:${stateFnVar}()};if(${nextHotkeyVar}!=null){`; + }, + ); + + if (changed) { + return patchedSource; + } + + if (currentSource.includes("appshotHotkey") || currentSource.includes("appshot-hotkey-state")) { + console.warn("WARN: Could not find AppShots hotkey controller โ€” skipping Linux AppShots hotkey patch"); + } + return currentSource; +} + +function applyLinuxAppshotSettingsHotkeyPatch(currentSource) { + if (currentSource.includes("DoubleAlt") && currentSource.includes("Ctrl+Alt+A")) { + return currentSource; + } + + let changed = false; + const patchedSource = currentSource.replace( + /((?:var\s+|,)([A-Za-z_$][\w$]*)=)(\[\{hotkey:`DoubleCommand`,label:`[^`]+`\},\{hotkey:`DoubleOption`,label:`[^`]+`\},\{hotkey:`DoubleShift`,label:`[^`]+`\}\])(?=;)/, + (match, declarationPrefix, optionsVar, macOptions) => { + changed = true; + return `${declarationPrefix}typeof navigator!=\`undefined\`&&navigator.userAgent.includes(\`Linux\`)?[{hotkey:\`${LINUX_APPSHOT_DEFAULT_HOTKEY}\`,label:\`Shift + Shift\`},{hotkey:\`DoubleAlt\`,label:\`Alt + Alt\`},{hotkey:\`${LINUX_APPSHOT_FALLBACK_HOTKEY}\`,label:\`Ctrl + Alt + A\`}]:${macOptions}`; + }, + ); + + if (changed) { + return patchedSource; + } + + if (currentSource.includes("appshot-hotkey-state") || currentSource.includes("DoubleCommand")) { + console.warn("WARN: Could not find AppShots settings hotkey options โ€” skipping Linux AppShots settings patch"); + } + return currentSource; +} + +function findMessageForViewSendFunction(source) { + const channelVar = source.match(/(?:var|let|const)\s+([A-Za-z_$][\w$]*)=`codex_desktop:message-for-view`/)?.[1]; + if (channelVar == null) { + return source.includes("function nS(") ? "nS" : null; + } + + const escapedChannelVar = escapeRegExp(channelVar); + const sendFnMatch = source.match(new RegExp( + String.raw`function\s+([A-Za-z_$][\w$]*)\(([A-Za-z_$][\w$]*),([A-Za-z_$][\w$]*)\)\{\2\.isDestroyed\(\)\|\|\2\.send\(${escapedChannelVar},\3\)\}`, + )); + return sendFnMatch?.[1] ?? (source.includes("function nS(") ? "nS" : null); +} + +function appendLinuxAppshotHelper(source, sendMessageFn) { + return `${source} +;function codexLinuxAppshotRequire(e){return require(e)} +function codexLinuxAppshotBackendPath(){let e=codexLinuxAppshotRequire(\`node:fs\`),t=codexLinuxAppshotRequire(\`node:path\`),n=codexLinuxAppshotRequire(\`node:os\`),r=process.env.CODEX_ELECTRON_RESOURCES_PATH||process.resourcesPath,i=process.env.CODEX_HOME||(process.env.HOME?t.join(process.env.HOME,\`.codex\`):t.join(n.homedir(),\`.codex\`)),a=[process.env.CODEX_LINUX_COMPUTER_USE_BACKEND_SOURCE,r&&t.join(r,\`plugins\`,\`openai-bundled\`,\`plugins\`,\`computer-use\`,\`bin\`,\`codex-computer-use-linux\`),i&&t.join(i,\`plugins\`,\`cache\`,\`openai-bundled\`,\`computer-use\`,\`latest\`,\`bin\`,\`codex-computer-use-linux\`)];for(let t of a){if(typeof t!=\`string\`||t.length===0)continue;try{if(e.existsSync(t))return t}catch{}}return null} +function codexLinuxAppshotBackendJson(e,t=45000){let n=codexLinuxAppshotBackendPath();if(n==null)return Promise.reject(Error(\`Linux Computer Use backend is not installed\`));let r=codexLinuxAppshotRequire(\`node:child_process\`);return new Promise((i,a)=>{r.execFile(n,e,{timeout:t,maxBuffer:67108864},(e,t,n)=>{if(e!=null){a(Error((n||e.message||\`Linux Computer Use backend failed\`).trim()));return}try{i(JSON.parse(t))}catch(e){a(Error(\`Linux Computer Use backend returned invalid JSON\`))}})})} +function codexLinuxAppshotNormalizeBareModifier(e){let t=String(e??\`\`).replace(/[\\s_-]/g,\`\`).toLowerCase();if(t===\`doubleshift\`||t===\`leftshift+rightshift\`)return\`DoubleShift\`;if(t===\`doublealt\`||t===\`doubleoption\`||t===\`leftalt+rightalt\`||t===\`leftoption+rightoption\`)return\`DoubleAlt\`;if(t===\`doublesuper\`||t===\`doublemeta\`||t===\`doublecommand\`||t===\`leftmeta+rightmeta\`||t===\`leftcommand+rightcommand\`)return\`DoubleSuper\`;return null} +function codexLinuxAppshotBareModifierHotkey(e){return codexLinuxAppshotNormalizeBareModifier(e)!=null} +function codexLinuxAppshotRegisterBareModifierHotkey(e,t,n=\`press\`){let r=codexLinuxAppshotNormalizeBareModifier(e),i=codexLinuxAppshotBackendPath();if(r==null||i==null)return null;let a=codexLinuxAppshotRequire(\`node:child_process\`),o=[\`bare-modifier-monitor\`,\`--key\`,r];n===\`immediatePress\`?o.push(\`--immediate\`):n===\`release\`&&o.push(\`--trigger-on-release\`);let s=a.spawn(i,o,{stdio:[\`ignore\`,\`pipe\`,\`ignore\`]}),c=!1,l=!1,u=e=>{switch(e){case\`ready\`:return;case\`down\`:t.onPressed();return;case\`up\`:t.onReleased?.();return;case\`permission-denied\`:return;case\`\`:return;default:return}},d=\`\`;return s.stdout?.on(\`data\`,e=>{d+=e.toString(\`utf8\`);let t=d.indexOf(\`\\n\`);for(;t!==-1;)u(d.slice(0,t).trim()),d=d.slice(t+1),t=d.indexOf(\`\\n\`)}),s.once(\`error\`,()=>{l||s.kill()}),s.once(\`exit\`,()=>{c||l||(l=!0)}),{handlesRelease:!0,unregister:()=>{c=!0,l=!0,s.kill()}}} +function codexLinuxAppshotFirstString(...e){for(let t of e)if(typeof t==\`string\`&&t.trim().length>0)return t.trim();return null} +function codexLinuxAppshotWindowForRenderer(e){if(e==null||typeof e!=\`object\`)return null;let t=codexLinuxAppshotFirstString(e.app_id,e.wm_class,e.title,\`Linux app\`),n=codexLinuxAppshotFirstString(e.app_id,e.wm_class,e.pid!=null?\`pid:\${e.pid}\`:null,e.window_id!=null?\`window:\${e.window_id}\`:null,t),r=codexLinuxAppshotFirstString(e.title);return{name:t,appName:t,bundleIdentifier:n,windowTitle:r,iconSmallDataURL:null,appIconDataUrl:null}} +async function codexLinuxAppshotFrontmostWindow(){if(process.platform!==\`linux\`)return null;try{let e=await codexLinuxAppshotBackendJson([\`focused-window\`],5000);return codexLinuxAppshotWindowForRenderer(e.focused_window)}catch{return null}} +function codexLinuxAppshotSend(e,t,n){try{${sendMessageFn}(e,{requestId:t,type:\`computer-use-capture-updated\`,update:n})}catch{}} +function codexLinuxAppshotStartCapture({origin:e,requestId:t,bundleIdentifier:n}){if(process.platform!==\`linux\`)return null;setTimeout(()=>{codexLinuxAppshotCapture({origin:e,requestId:t,bundleIdentifier:n}).catch(()=>codexLinuxAppshotSend(e,t,{type:\`failed\`}))},0);return{animationDuration:null,transitionSnapshotHeight:null,transitionSpringDampingFraction:null,transitionSpringResponse:null}} +async function codexLinuxAppshotCapture({origin:e,requestId:t,bundleIdentifier:n}){let r=typeof n==\`string\`&&n.trim().length>0?n.trim():null,i=await codexLinuxAppshotBackendJson(r==null?[\`appshot\`]:[\`appshot\`,r]),a=codexLinuxAppshotWindowForRenderer(i.focused_window);a!=null&&codexLinuxAppshotSend(e,t,{type:\`metadata\`,app:{bundleIdentifier:a.bundleIdentifier,name:a.name,windowTitle:a.windowTitle,iconSmallDataURL:null}});typeof i.accessibility_text==\`string\`&&i.accessibility_text.length>0&&codexLinuxAppshotSend(e,t,{type:\`axText\`,text:i.accessibility_text});let o=i.screenshot?.data_url;if(typeof o!=\`string\`||o.length===0){codexLinuxAppshotSend(e,t,{type:\`failed\`});return}codexLinuxAppshotSend(e,t,{type:\`screenshot\`,screenshotDataURL:o});codexLinuxAppshotSend(e,t,{type:\`completed\`,transitionSnapshotDataURL:o})} +`; +} + +function escapeRegExp(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +module.exports = { + applyLinuxAppshotAvailabilityPatch, + applyLinuxAppshotHotkeyPatch, + applyLinuxAppshotMainProcessPatch, + applyLinuxAppshotSettingsHotkeyPatch, +}; diff --git a/scripts/patches/core/all-linux/main-process/appshots/patch.js b/scripts/patches/core/all-linux/main-process/appshots/patch.js new file mode 100644 index 00000000..be266907 --- /dev/null +++ b/scripts/patches/core/all-linux/main-process/appshots/patch.js @@ -0,0 +1,23 @@ +"use strict"; + +const { + applyLinuxAppshotHotkeyPatch, + applyLinuxAppshotMainProcessPatch, +} = require("../../../../appshots.js"); + +module.exports = [ + { + id: "linux-appshots-main-process", + phase: "main-bundle", + order: 142, + ciPolicy: "required-upstream", + apply: applyLinuxAppshotMainProcessPatch, + }, + { + id: "linux-appshots-hotkey", + phase: "main-bundle", + order: 143, + ciPolicy: "required-upstream", + apply: applyLinuxAppshotHotkeyPatch, + }, +]; diff --git a/scripts/patches/core/all-linux/webview/appshots/patch.js b/scripts/patches/core/all-linux/webview/appshots/patch.js new file mode 100644 index 00000000..7809922e --- /dev/null +++ b/scripts/patches/core/all-linux/webview/appshots/patch.js @@ -0,0 +1,29 @@ +"use strict"; + +const { + applyLinuxAppshotAvailabilityPatch, + applyLinuxAppshotSettingsHotkeyPatch, +} = require("../../../../appshots.js"); + +module.exports = [ + { + id: "linux-appshots-availability", + phase: "webview-asset", + order: 1090, + ciPolicy: "required-upstream", + pattern: /^use-is-appshot-available-.*\.js$/, + missingDescription: "AppShots availability bundle", + skipDescription: "Linux AppShots availability patch", + apply: applyLinuxAppshotAvailabilityPatch, + }, + { + id: "linux-appshots-settings-hotkey", + phase: "webview-asset", + order: 1091, + ciPolicy: "required-upstream", + pattern: /^appshots-settings-.*\.js$/, + missingDescription: "AppShots settings bundle", + skipDescription: "Linux AppShots settings hotkey patch", + apply: applyLinuxAppshotSettingsHotkeyPatch, + }, +]; From 61804b1185d3b820a38398e8b17d16ffd2257ca0 Mon Sep 17 00:00:00 2001 From: Mohit Sahoo Date: Mon, 25 May 2026 03:09:22 +0530 Subject: [PATCH 2/3] fix(appshots): satisfy clippy - Elide redundant AppShot helper lifetimes - Use direct length comparison for capped lines --- computer-use-linux/src/appshot.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/computer-use-linux/src/appshot.rs b/computer-use-linux/src/appshot.rs index e79e6f55..40089136 100644 --- a/computer-use-linux/src/appshot.rs +++ b/computer-use-linux/src/appshot.rs @@ -288,7 +288,7 @@ fn node_line(node: &AccessibilityNode) -> String { format!("{indent}- {}", parts.join(" ")) } -fn first_non_empty<'a, const N: usize>(values: [Option<&'a str>; N]) -> Option<&'a str> { +fn first_non_empty(values: [Option<&str>; N]) -> Option<&str> { values .into_iter() .flatten() @@ -323,7 +323,7 @@ fn push_capped_line(output: &mut String, line: &str) { return; } let remaining = MAX_AX_TEXT_CHARS - output.len(); - if line.len() + 1 <= remaining { + if line.len() < remaining { output.push_str(line); output.push('\n'); return; From f8b2919a2fa4ed623d5a78fe1617985692ff7111 Mon Sep 17 00:00:00 2001 From: Mohit Sahoo Date: Mon, 25 May 2026 12:14:31 +0530 Subject: [PATCH 3/3] fix(appshots): make Linux UI opt-in - Move AppShots patch descriptors into linux-features/appshots - Fail closed without focused-window crop or portal-backed capture - Disable the Linux global hotkey default and document opt-in use --- CHANGELOG.md | 9 +- README.md | 10 +-- computer-use-linux/src/appshot.rs | 83 +++++++++++++++---- computer-use-linux/src/screenshot.rs | 13 ++- linux-features/appshots/README.md | 30 +++++++ linux-features/appshots/feature.json | 9 ++ .../appshots/patch.js | 24 +++++- linux-features/appshots/test.js | 55 ++++++++++++ scripts/patch-linux-window-ui.test.js | 6 +- scripts/patches/appshots.js | 8 +- .../all-linux/main-process/appshots/patch.js | 23 ----- 11 files changed, 207 insertions(+), 63 deletions(-) create mode 100644 linux-features/appshots/README.md create mode 100644 linux-features/appshots/feature.json rename {scripts/patches/core/all-linux/webview => linux-features}/appshots/patch.js (61%) create mode 100644 linux-features/appshots/test.js delete mode 100644 scripts/patches/core/all-linux/main-process/appshots/patch.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d5214b9..6bfa3aae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,11 +11,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/). forces native Wayland with GPU compositing enabled and skips forced renderer accessibility by default for Wayland desktops where XWayland or software rendering is unstable. -- Linux AppShots now use the bundled Computer Use backend to attach the focused - Linux window with metadata, AT-SPI text, and a screenshot. The ASAR patcher - exposes the upstream AppShots composer control and global double-modifier - hotkey on Linux and routes capture updates through the same renderer event - contract as macOS. +- New opt-in Linux feature `appshots` uses the bundled Computer Use backend to + attach the focused Linux window with metadata, AT-SPI text, and a screenshot. + The feature exposes the upstream AppShots composer control on Linux and routes + capture updates through the same renderer event contract as macOS. - Linux screenshot capture now falls back to common desktop tools (`grim`, `gnome-screenshot`, `spectacle`, ImageMagick `import`) when GNOME Shell DBus and XDG Desktop Portal capture are unavailable. diff --git a/README.md b/README.md index 7aaf3749..7faf8cfa 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ Anything systemd-based should work for the optional auto-updater service (`syste | Multi-instance launcher | ๐Ÿงช opt-in | `--new-instance` or `CODEX_MULTI_LAUNCH=1` allocates a bounded webview port and isolated Electron profile | | GUI install prompts (`kdialog` / `zenity`) | โœ… if installed | Falls back to interactive terminal prompt | | Linux browser annotations | โœ… always | Stored-anchor screenshots, isolated marker rendering | -| Linux AppShots | โœ… always | Global hotkey and composer capture attach the focused Linux window with screenshot and AT-SPI text through the bundled Computer Use backend | +| Linux AppShots | ๐Ÿงช opt-in experiment | `linux-features/appshots` exposes the upstream AppShots composer control on Linux and attaches the focused window screenshot plus AT-SPI text through the bundled Computer Use backend | | Chrome plugin native host | โœ… always | Auto-installs the upstream Chrome plugin plus Linux native-messaging support for Chrome, Brave, and Chromium | | Linux Computer Use | โš ๏ธ opt-in | MCP backend registers by default; the in-app UI is opt-in. Supports screenshots, accessibility, window targeting, and input synthesis | | Linux Read Aloud | ๐Ÿงช opt-in experiment | `linux-features/read-aloud` adds an explicit response speaker button; `linux-features/read-aloud-mcp` stages a separate MCP plugin so the agent can read text aloud on request | @@ -269,10 +269,10 @@ You can also invoke the backend binary directly: ./codex-app/resources/plugins/openai-bundled/plugins/computer-use/bin/codex-computer-use-linux appshot [APP_NAME|pid:PID] ``` -For the full AppShots UI path, use the AppShots hotkey in the running app. On -Linux the default is pressing both Shift keys at once. The AppShots settings -page also offers both Alt keys, plus `Ctrl+Alt+A` as a normal-accelerator -fallback. +For the full AppShots UI path, enable `linux-features/appshots` before building. +The feature exposes the upstream AppShots composer control on Linux. Global +hotkeys are disabled by default; after opting in, configure one from the +AppShots settings page. ### Enabling Computer Use UI diff --git a/computer-use-linux/src/appshot.rs b/computer-use-linux/src/appshot.rs index 40089136..1ba3662f 100644 --- a/computer-use-linux/src/appshot.rs +++ b/computer-use-linux/src/appshot.rs @@ -1,5 +1,5 @@ use crate::atspi_tree::{snapshot_tree, AccessibilityNode}; -use crate::screenshot::{capture_screenshot, ScreenshotCapture}; +use crate::screenshot::{capture_screenshot_without_cli_fallback, ScreenshotCapture}; use crate::windows::{focused_window, WindowBounds, WindowInfo}; use anyhow::{Context, Result}; use base64::{engine::general_purpose::STANDARD, Engine}; @@ -30,15 +30,26 @@ pub async fn capture_appshot(app_filter: Option<&str>) -> AppshotCapture { Err(error) => (None, Some(format!("{error:#}"))), }; - let (screenshot, screenshot_error) = match capture_screenshot().await { - Ok(capture) => ( + let (screenshot, screenshot_error) = match focused_window.as_ref() { + Some(window) => match capture_screenshot_without_cli_fallback().await { + Ok(capture) => match crop_capture_to_window(capture, window) { + Ok(capture) => (Some(capture), None), + Err(error) => (None, Some(format!("{error:#}"))), + }, + Err(error) => (None, Some(format!("{error:#}"))), + }, + None => ( + None, Some( - crop_capture_to_window(capture, focused_window.as_ref()) - .unwrap_or_else(|capture| capture), + focused_window_error + .as_deref() + .map(|error| format!("focused window unavailable; refusing full-screen AppShot screenshot: {error}")) + .unwrap_or_else(|| { + "focused window unavailable; refusing full-screen AppShot screenshot" + .to_string() + }), ), - None, ), - Err(error) => (None, Some(format!("{error:#}"))), }; let selector = accessibility_selector(app_filter, focused_window.as_ref()); @@ -82,15 +93,12 @@ pub async fn capture_appshot(app_filter: Option<&str>) -> AppshotCapture { fn crop_capture_to_window( capture: ScreenshotCapture, - focused_window: Option<&WindowInfo>, -) -> std::result::Result { - let Some(bounds) = focused_window.and_then(|window| window.bounds.as_ref()) else { - return Err(capture); - }; - let Ok(cropped) = crop_capture_to_bounds(&capture, bounds) else { - return Err(capture); + focused_window: &WindowInfo, +) -> Result { + let Some(bounds) = focused_window.bounds.as_ref() else { + anyhow::bail!("focused window bounds unavailable; refusing full-screen AppShot screenshot"); }; - Ok(cropped) + crop_capture_to_bounds(&capture, bounds) } fn crop_capture_to_bounds( @@ -98,7 +106,9 @@ fn crop_capture_to_bounds( bounds: &WindowBounds, ) -> Result { let Some((x, y, width, height)) = crop_rect(capture.width, capture.height, bounds) else { - return Ok(capture.clone()); + anyhow::bail!( + "focused window bounds did not intersect screenshot; refusing full-screen AppShot screenshot" + ); }; if x == 0 && y == 0 && width == capture.width && height == capture.height { @@ -406,4 +416,45 @@ mod tests { None ); } + + #[test] + fn crop_capture_to_window_requires_bounds() { + let mut window = window(); + window.bounds = None; + let capture = ScreenshotCapture { + mime_type: "image/png".to_string(), + data_url: "data:image/png;base64,".to_string(), + source: "xdg-desktop-portal".to_string(), + width: 100, + height: 100, + }; + + let error = crop_capture_to_window(capture, &window).unwrap_err(); + + assert!(error + .to_string() + .contains("focused window bounds unavailable")); + } + + #[test] + fn crop_capture_to_window_rejects_non_intersecting_bounds() { + let mut window = window(); + window.bounds = Some(WindowBounds { + x: Some(200), + y: Some(200), + width: 20, + height: 20, + }); + let capture = ScreenshotCapture { + mime_type: "image/png".to_string(), + data_url: "data:image/png;base64,".to_string(), + source: "xdg-desktop-portal".to_string(), + width: 100, + height: 100, + }; + + let error = crop_capture_to_window(capture, &window).unwrap_err(); + + assert!(error.to_string().contains("did not intersect screenshot")); + } } diff --git a/computer-use-linux/src/screenshot.rs b/computer-use-linux/src/screenshot.rs index b54684c9..8a2dfcb7 100644 --- a/computer-use-linux/src/screenshot.rs +++ b/computer-use-linux/src/screenshot.rs @@ -38,18 +38,29 @@ enum ScreenshotCleanup { } pub async fn capture_screenshot() -> Result { + capture_screenshot_inner(true).await +} + +pub async fn capture_screenshot_without_cli_fallback() -> Result { + capture_screenshot_inner(false).await +} + +async fn capture_screenshot_inner(allow_cli_fallback: bool) -> Result { hydrate_session_bus_env(); match capture_with_gnome_shell().await { Ok(capture) => Ok(capture), Err(gnome_error) => match capture_with_portal().await { Ok(capture) => Ok(capture), - Err(portal_error) => match capture_with_cli_fallback().await { + Err(portal_error) if allow_cli_fallback => match capture_with_cli_fallback().await { Ok(capture) => Ok(capture), Err(cli_error) => Err(anyhow!( "GNOME Shell screenshot failed: {gnome_error}; XDG portal screenshot failed: {portal_error}; CLI screenshot fallback failed: {cli_error}" )), }, + Err(portal_error) => Err(anyhow!( + "GNOME Shell screenshot failed: {gnome_error}; XDG portal screenshot failed: {portal_error}; CLI screenshot fallback disabled" + )), }, } } diff --git a/linux-features/appshots/README.md b/linux-features/appshots/README.md new file mode 100644 index 00000000..0e0aa8fc --- /dev/null +++ b/linux-features/appshots/README.md @@ -0,0 +1,30 @@ +# Linux AppShots + +`linux-features/appshots` exposes the upstream AppShots UI on Linux. It routes +capture requests to the bundled Linux Computer Use backend and attaches the +focused window screenshot plus AT-SPI text to the composer. + +This feature is disabled by default. Enable it before building: + +```json +{ + "enabled": [ + "appshots" + ] +} +``` + +The backend refuses to create an AppShot when it cannot identify focused-window +bounds, when the window bounds do not intersect the screenshot, or when GNOME +Shell / XDG Desktop Portal screenshot capture fails. It does not use the CLI +screenshot fallback used by the standalone `screenshot` command. + +Global hotkeys are disabled by default on Linux. After opting into the feature, +use the AppShots settings page to configure a hotkey such as both Shift keys, +both Alt keys, or `Ctrl+Alt+A`. + +Backend-only test: + +```bash +./codex-app/resources/plugins/openai-bundled/plugins/computer-use/bin/codex-computer-use-linux appshot +``` diff --git a/linux-features/appshots/feature.json b/linux-features/appshots/feature.json new file mode 100644 index 00000000..653e41dd --- /dev/null +++ b/linux-features/appshots/feature.json @@ -0,0 +1,9 @@ +{ + "id": "appshots", + "title": "Linux AppShots", + "description": "Opt-in AppShots UI integration that captures the focused Linux window through the bundled Computer Use backend.", + "defaultEnabled": false, + "entrypoints": { + "patchDescriptors": "./patch.js" + } +} diff --git a/scripts/patches/core/all-linux/webview/appshots/patch.js b/linux-features/appshots/patch.js similarity index 61% rename from scripts/patches/core/all-linux/webview/appshots/patch.js rename to linux-features/appshots/patch.js index 7809922e..67ba00af 100644 --- a/scripts/patches/core/all-linux/webview/appshots/patch.js +++ b/linux-features/appshots/patch.js @@ -2,15 +2,28 @@ const { applyLinuxAppshotAvailabilityPatch, + applyLinuxAppshotHotkeyPatch, + applyLinuxAppshotMainProcessPatch, applyLinuxAppshotSettingsHotkeyPatch, -} = require("../../../../appshots.js"); +} = require("../../scripts/patches/appshots.js"); -module.exports = [ +const descriptors = [ + { + id: "linux-appshots-main-process", + phase: "main-bundle", + order: 142, + apply: applyLinuxAppshotMainProcessPatch, + }, + { + id: "linux-appshots-hotkey", + phase: "main-bundle", + order: 143, + apply: applyLinuxAppshotHotkeyPatch, + }, { id: "linux-appshots-availability", phase: "webview-asset", order: 1090, - ciPolicy: "required-upstream", pattern: /^use-is-appshot-available-.*\.js$/, missingDescription: "AppShots availability bundle", skipDescription: "Linux AppShots availability patch", @@ -20,10 +33,13 @@ module.exports = [ id: "linux-appshots-settings-hotkey", phase: "webview-asset", order: 1091, - ciPolicy: "required-upstream", pattern: /^appshots-settings-.*\.js$/, missingDescription: "AppShots settings bundle", skipDescription: "Linux AppShots settings hotkey patch", apply: applyLinuxAppshotSettingsHotkeyPatch, }, ]; + +module.exports = { + descriptors, +}; diff --git a/linux-features/appshots/test.js b/linux-features/appshots/test.js new file mode 100644 index 00000000..1aabbdd6 --- /dev/null +++ b/linux-features/appshots/test.js @@ -0,0 +1,55 @@ +#!/usr/bin/env node +"use strict"; + +const assert = require("node:assert/strict"); +const fs = require("node:fs"); +const os = require("node:os"); +const path = require("node:path"); +const test = require("node:test"); + +const { + loadLinuxFeaturePatchDescriptors, +} = require("../../scripts/lib/linux-features.js"); +const { + descriptors, +} = require("./patch.js"); + +test("appshots stays disabled until listed in features.json", () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "appshots-feature-")); + const configPath = path.join(tempDir, "features.json"); + const featuresRoot = path.resolve(__dirname, ".."); + const originalConfig = process.env.CODEX_LINUX_FEATURES_CONFIG; + + try { + process.env.CODEX_LINUX_FEATURES_CONFIG = configPath; + fs.writeFileSync(configPath, '{"enabled":[]}\n'); + assert.deepEqual(loadLinuxFeaturePatchDescriptors({ featuresRoot }), []); + + fs.writeFileSync(configPath, '{"enabled":["appshots"]}\n'); + const loaded = loadLinuxFeaturePatchDescriptors({ featuresRoot }); + + assert.equal(loaded.length, 4); + assert.deepEqual( + loaded.map((descriptor) => descriptor.id).sort(), + [ + "feature:appshots:linux-appshots-availability", + "feature:appshots:linux-appshots-hotkey", + "feature:appshots:linux-appshots-main-process", + "feature:appshots:linux-appshots-settings-hotkey", + ].sort(), + ); + assert.ok(loaded.every((descriptor) => descriptor.ciPolicy === "optional")); + } finally { + if (originalConfig == null) { + delete process.env.CODEX_LINUX_FEATURES_CONFIG; + } else { + process.env.CODEX_LINUX_FEATURES_CONFIG = originalConfig; + } + fs.rmSync(tempDir, { recursive: true, force: true }); + } +}); + +test("appshots feature descriptors are optional", () => { + assert.equal(descriptors.length, 4); + assert.ok(descriptors.every((descriptor) => descriptor.ciPolicy == null)); +}); diff --git a/scripts/patch-linux-window-ui.test.js b/scripts/patch-linux-window-ui.test.js index 77f4e408..6ecc2f62 100644 --- a/scripts/patch-linux-window-ui.test.js +++ b/scripts/patch-linux-window-ui.test.js @@ -501,8 +501,6 @@ test("default core patch descriptors are grouped and unique", () => { "linux-single-instance", "linux-computer-use-ui-feature", "linux-computer-use-plugin-gate", - "linux-appshots-main-process", - "linux-appshots-hotkey", "linux-chrome-plugin-auto-install", "browser-use-node-repl-approval", "linux-chrome-extension-status", @@ -520,8 +518,6 @@ test("default core patch descriptors are grouped and unique", () => { "opaque-window-default-webview-index", "opaque-window-default-resolved-theme", "subagent-nickname-metadata-shape", - "linux-appshots-availability", - "linux-appshots-settings-hotkey", "linux-computer-use-ui-availability", "linux-computer-use-install-flow", "linux-app-updater-bridge", @@ -1769,7 +1765,7 @@ test("enables the AppShots global hotkey on Linux", () => { assert.match( patched, - /e\.get\(`appshotHotkey`\)\?\?\(process\.platform===`linux`\?`DoubleShift`:uG\)/, + /e\.get\(`appshotHotkey`\)\?\?\(process\.platform===`linux`\?null:uG\)/, ); assert.match( patched, diff --git a/scripts/patches/appshots.js b/scripts/patches/appshots.js index 9013fde3..bf698718 100644 --- a/scripts/patches/appshots.js +++ b/scripts/patches/appshots.js @@ -1,7 +1,7 @@ "use strict"; const APPSHOT_HELPER_MARKER = "codexLinuxAppshotStartCapture"; -const LINUX_APPSHOT_DEFAULT_HOTKEY = "DoubleShift"; +const LINUX_APPSHOT_RECOMMENDED_BARE_HOTKEY = "DoubleShift"; const LINUX_APPSHOT_FALLBACK_HOTKEY = "Ctrl+Alt+A"; function applyLinuxAppshotAvailabilityPatch(currentSource) { @@ -67,7 +67,7 @@ function applyLinuxAppshotMainProcessPatch(currentSource) { } function applyLinuxAppshotHotkeyPatch(currentSource) { - if (currentSource.includes("process.platform===`linux`?`DoubleShift`")) { + if (currentSource.includes("process.platform===`linux`?null")) { return currentSource; } @@ -85,7 +85,7 @@ function applyLinuxAppshotHotkeyPatch(currentSource) { reconcileFnVar, ) => { changed = true; - return `let ${configuredVar}=${globalStateVar}.get(\`appshotHotkey\`)??(process.platform===\`linux\`?\`${LINUX_APPSHOT_DEFAULT_HOTKEY}\`:${defaultHotkeyVar});let ${registrationVar}=null,${stateFnVar}=()=>({supported:${enabledVar}&&(process.platform===\`darwin\`||process.platform===\`linux\`),configuredHotkey:${configuredVar},isActive:${registrationVar}!=null}),${reconcileFnVar}=()=>{if(${registrationVar}?.unregister(),${registrationVar}=null,!${enabledVar}||process.platform!==\`darwin\`&&process.platform!==\`linux\`||${configuredVar}==null){`; + return `let ${configuredVar}=${globalStateVar}.get(\`appshotHotkey\`)??(process.platform===\`linux\`?null:${defaultHotkeyVar});let ${registrationVar}=null,${stateFnVar}=()=>({supported:${enabledVar}&&(process.platform===\`darwin\`||process.platform===\`linux\`),configuredHotkey:${configuredVar},isActive:${registrationVar}!=null}),${reconcileFnVar}=()=>{if(${registrationVar}?.unregister(),${registrationVar}=null,!${enabledVar}||process.platform!==\`darwin\`&&process.platform!==\`linux\`||${configuredVar}==null){`; }, ); @@ -142,7 +142,7 @@ function applyLinuxAppshotSettingsHotkeyPatch(currentSource) { /((?:var\s+|,)([A-Za-z_$][\w$]*)=)(\[\{hotkey:`DoubleCommand`,label:`[^`]+`\},\{hotkey:`DoubleOption`,label:`[^`]+`\},\{hotkey:`DoubleShift`,label:`[^`]+`\}\])(?=;)/, (match, declarationPrefix, optionsVar, macOptions) => { changed = true; - return `${declarationPrefix}typeof navigator!=\`undefined\`&&navigator.userAgent.includes(\`Linux\`)?[{hotkey:\`${LINUX_APPSHOT_DEFAULT_HOTKEY}\`,label:\`Shift + Shift\`},{hotkey:\`DoubleAlt\`,label:\`Alt + Alt\`},{hotkey:\`${LINUX_APPSHOT_FALLBACK_HOTKEY}\`,label:\`Ctrl + Alt + A\`}]:${macOptions}`; + return `${declarationPrefix}typeof navigator!=\`undefined\`&&navigator.userAgent.includes(\`Linux\`)?[{hotkey:\`${LINUX_APPSHOT_RECOMMENDED_BARE_HOTKEY}\`,label:\`Shift + Shift\`},{hotkey:\`DoubleAlt\`,label:\`Alt + Alt\`},{hotkey:\`${LINUX_APPSHOT_FALLBACK_HOTKEY}\`,label:\`Ctrl + Alt + A\`}]:${macOptions}`; }, ); diff --git a/scripts/patches/core/all-linux/main-process/appshots/patch.js b/scripts/patches/core/all-linux/main-process/appshots/patch.js deleted file mode 100644 index be266907..00000000 --- a/scripts/patches/core/all-linux/main-process/appshots/patch.js +++ /dev/null @@ -1,23 +0,0 @@ -"use strict"; - -const { - applyLinuxAppshotHotkeyPatch, - applyLinuxAppshotMainProcessPatch, -} = require("../../../../appshots.js"); - -module.exports = [ - { - id: "linux-appshots-main-process", - phase: "main-bundle", - order: 142, - ciPolicy: "required-upstream", - apply: applyLinuxAppshotMainProcessPatch, - }, - { - id: "linux-appshots-hotkey", - phase: "main-bundle", - order: 143, - ciPolicy: "required-upstream", - apply: applyLinuxAppshotHotkeyPatch, - }, -];