Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions crates/openshell-supervisor-network/src/proxy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ impl ProxyHandle {
policy_local_ctx: Option<Arc<PolicyLocalContext>>,
denial_tx: Option<mpsc::UnboundedSender<DenialEvent>>,
activity_tx: Option<ActivitySender>,
engine_ready: tokio::sync::watch::Receiver<bool>,
) -> Result<Self> {
// Use override bind_addr, fall back to policy http_addr, then default
// to loopback:3128. The default allows the proxy to function when no
Expand Down Expand Up @@ -229,6 +230,30 @@ impl ProxyHandle {
}

let join = tokio::spawn(async move {
// Wait for the OPA engine's symlink resolution reload to complete
// before accepting connections. This prevents requests from
// observing a generation transition mid-flight, which would cause
// the generation guard to reject them with a 403.
//
// The TCP listener is already bound, so the OS backlog queues
// incoming SYN packets during this wait. Once we start accepting,
// queued connections drain immediately.
let mut engine_ready = engine_ready;
match tokio::time::timeout(
std::time::Duration::from_secs(15),
engine_ready.wait_for(|v| *v),
)
.await
{
Ok(_) => {}
Err(_) => {
warn!(
"Engine readiness signal not received within 15s; \
proceeding with proxy accept loop"
);
}
}

loop {
match listener.accept().await {
Ok((stream, _addr)) => {
Expand Down
14 changes: 14 additions & 0 deletions crates/openshell-supervisor-network/src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,13 @@ pub async fn run_networking(
.or_else(|| sandbox_id.map(str::to_string)),
));

// Readiness signal for the proxy accept loop: the proxy binds the TCP
// listener immediately (so the OS backlog queues early SYN packets) but
// defers `accept()` until symlink resolution completes. This eliminates
// the race where an in-flight request observes a generation transition
// during the OPA engine reload.
let (engine_ready_tx, engine_ready_rx) = tokio::sync::watch::channel(false);

// Spawn a task to resolve policy binary symlinks once the workload's mount
// namespace becomes accessible via /proc/<pid>/root/. The task starts
// before run_process spawns the child, so first wait for the orchestrator
Expand Down Expand Up @@ -125,6 +132,7 @@ pub async fn run_networking(
"Entrypoint PID never published; binary symlink resolution skipped. \
Policy binary paths will be matched literally."
);
let _ = engine_ready_tx.send(true);
return;
}

Expand Down Expand Up @@ -155,6 +163,7 @@ pub async fn run_networking(
);
}
}
let _ = engine_ready_tx.send(true);
return;
}
debug!(
Expand All @@ -170,7 +179,11 @@ pub async fn run_networking(
If binaries are symlinks, use canonical paths in your policy \
(run 'readlink -f <path>' inside the sandbox)"
);
let _ = engine_ready_tx.send(true);
});
} else {
// No symlink resolution needed — unblock the proxy immediately.
let _ = engine_ready_tx.send(true);
}

// Identity cache for SHA256 TOFU when OPA is active. Only consumed by
Expand Down Expand Up @@ -279,6 +292,7 @@ pub async fn run_networking(
Some(policy_local_ctx.clone()),
denial_tx,
activity_tx,
engine_ready_rx,
)
.await?;
Some(proxy_handle)
Expand Down
Loading