Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions crates/crw-browse/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ struct Cli {
/// those tools return `NOT_IMPLEMENTED`.
#[arg(long, env = "CRW_BROWSE_CHROME_WS_URL")]
chrome_ws_url: Option<String>,

/// Directory where screenshot(path=...) may create image artifacts.
/// If unset, screenshot output must be returned inline as base64.
#[arg(long, env = "CRW_BROWSE_SCREENSHOT_DIR")]
screenshot_dir: Option<std::path::PathBuf>,
}

#[tokio::main]
Expand All @@ -41,6 +46,7 @@ async fn main() -> Result<()> {
ws_url: cli.ws_url,
page_timeout: Duration::from_millis(cli.page_timeout_ms),
chrome_ws_url: cli.chrome_ws_url,
screenshot_dir: cli.screenshot_dir,
};

tracing::info!(ws_url = %config.ws_url, "starting crw-browse");
Expand Down
5 changes: 5 additions & 0 deletions crates/crw-browse/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//! tool call. Multi-session + `session.new`/`session.close` tools land later
//! in Phase 2 (see ROADMAP).

use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;

Expand Down Expand Up @@ -34,6 +35,9 @@ pub struct BrowseConfig {
/// v0.2.9). Tools that require Chrome return `NOT_IMPLEMENTED` when
/// this is `None`.
pub chrome_ws_url: Option<String>,
/// Optional directory where screenshot `path` outputs may be created.
/// When unset, screenshots must be returned inline as base64.
pub screenshot_dir: Option<PathBuf>,
}

impl Default for BrowseConfig {
Expand All @@ -42,6 +46,7 @@ impl Default for BrowseConfig {
ws_url: "ws://localhost:9222".to_string(),
page_timeout: Duration::from_secs(30),
chrome_ws_url: None,
screenshot_dir: None,
}
}
}
Expand Down
19 changes: 19 additions & 0 deletions crates/crw-browse/src/session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ pub struct BrowserSession {
/// Listener task handle — aborted on session close so it can't outlive the
/// connection. `None` until `ensure_attached` runs for the first time.
event_listener: Mutex<Option<JoinHandle<()>>>,
/// CDP Fetch guard task. It must live for the full session lifetime because
/// navigations can be triggered by clicks, scripts, redirects, and SPA code
/// after the original `goto` has returned.
outbound_guard: Mutex<Option<JoinHandle<()>>>,
}

impl BrowserSession {
Expand All @@ -155,6 +159,7 @@ impl BrowserSession {
network_buffer: Arc::new(Mutex::new(VecDeque::with_capacity(NETWORK_BUFFER_CAP))),
network_event_count: Arc::new(AtomicU64::new(0)),
event_listener: Mutex::new(None),
outbound_guard: Mutex::new(None),
}
}

Expand Down Expand Up @@ -377,6 +382,17 @@ impl BrowserSession {
)
.await?;
}
crate::tools::goto::enable_outbound_guard(&self.conn, &cdp_session_id, timeout).await?;
let mut outbound_guard = self.outbound_guard.lock().await;
if outbound_guard.as_ref().is_none_or(|h| h.is_finished()) {
let rx = self.conn.subscribe();
let conn = self.conn.clone();
let sid = cdp_session_id.clone();
*outbound_guard = Some(tokio::spawn(async move {
crate::tools::goto::run_outbound_guard(conn, rx, &sid).await;
}));
}
drop(outbound_guard);

*self.target_id.write().await = Some(target_id);
*self.cdp_session_id.write().await = Some(cdp_session_id.clone());
Expand All @@ -403,6 +419,9 @@ impl BrowserSession {
if let Some(h) = self.event_listener.lock().await.take() {
h.abort();
}
if let Some(h) = self.outbound_guard.lock().await.take() {
h.abort();
}
self.conn.close().await;
}
}
Expand Down
115 changes: 114 additions & 1 deletion crates/crw-browse/src/tools/goto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ pub async fn handle(server: &CrwBrowse, input: GotoInput) -> Result<CallToolResu
)));
}
};

// Subscribe before navigating so we don't miss Page.loadEventFired.
let events_rx = session.conn.subscribe();

Expand Down Expand Up @@ -134,9 +133,111 @@ pub(crate) fn validate_goto_url(url: &str) -> Result<(), String> {
"scheme {scheme:?} not allowed — goto accepts http or https only"
));
}
crw_core::url_safety::validate_safe_url(&parsed)?;
Ok(())
}

async fn validate_goto_url_resolved(url: &str) -> Result<(), String> {
validate_goto_url(url)?;
let parsed = url::Url::parse(url).map_err(|e| format!("invalid url: {e}"))?;
tokio::time::timeout(
Duration::from_secs(2),
crw_core::url_safety::validate_safe_url_resolved(&parsed),
)
.await
.map_err(|_| "DNS validation timed out".to_string())?
}

pub(crate) async fn enable_outbound_guard(
conn: &crw_renderer::cdp_conn::CdpConnection,
cdp_session_id: &str,
timeout: Duration,
) -> crw_core::error::CrwResult<()> {
conn.send_recv(
"Fetch.enable",
serde_json::json!({
"patterns": [
{ "urlPattern": "*", "requestStage": "Request" }
]
}),
Some(cdp_session_id),
timeout,
)
.await
.map(|_| ())
}

pub(crate) async fn run_outbound_guard(
conn: std::sync::Arc<crw_renderer::cdp_conn::CdpConnection>,
mut events: tokio::sync::broadcast::Receiver<CdpEvent>,
cdp_session_id: &str,
) {
use tokio::sync::broadcast::error::RecvError;
let concurrency = std::sync::Arc::new(tokio::sync::Semaphore::new(32));
let cmd_timeout = Duration::from_secs(2);
loop {
let ev = match events.recv().await {
Ok(ev) => ev,
Err(RecvError::Closed) => return,
Err(RecvError::Lagged(_)) => continue,
};
if ev.session_id.as_deref() != Some(cdp_session_id) || ev.method != "Fetch.requestPaused" {
continue;
}
let request_id = ev
.params
.get("requestId")
.and_then(|v| v.as_str())
.unwrap_or("");
if request_id.is_empty() {
continue;
}
let permit = match concurrency.clone().try_acquire_owned() {
Ok(permit) => permit,
Err(_) => {
let _ = conn
.send_recv(
"Fetch.failRequest",
serde_json::json!({
"requestId": request_id,
"errorReason": "BlockedByClient",
}),
Some(cdp_session_id),
cmd_timeout,
)
.await;
continue;
}
};
let req_url = ev
.params
.get("request")
.and_then(|r| r.get("url"))
.and_then(|v| v.as_str())
.unwrap_or("");
let request_id = request_id.to_string();
let req_url = req_url.to_string();
let conn = conn.clone();
let cdp_session_id = cdp_session_id.to_string();
tokio::spawn(async move {
let _permit = permit;
let method = if validate_goto_url_resolved(&req_url).await.is_ok() {
"Fetch.continueRequest"
} else {
"Fetch.failRequest"
};
let params = if method == "Fetch.continueRequest" {
serde_json::json!({ "requestId": request_id })
} else {
serde_json::json!({ "requestId": request_id, "errorReason": "BlockedByClient" })
};
let _ = conn
.send_recv(method, params, Some(&cdp_session_id), cmd_timeout)
.await;
});
}
}

/// Waits until either `Page.loadEventFired` arrives or `timeout` elapses, and
/// returns the HTTP status from the first `Network.responseReceived` event
/// with `type: "Document"` that matches our session.
Expand Down Expand Up @@ -248,6 +349,18 @@ mod tests {
assert!(validate_goto_url("ht%74ps://example.com").is_err());
}

#[test]
fn validate_goto_url_rejects_internal_networks() {
for bad in [
"http://127.0.0.1",
"http://10.0.0.1",
"http://169.254.169.254/latest/meta-data/",
"http://[::1]/",
] {
assert!(validate_goto_url(bad).is_err(), "{bad} should be rejected");
}
}

#[test]
fn validate_goto_url_rejects_malformed() {
assert!(validate_goto_url("not a url").is_err());
Expand Down
Loading
Loading