diff --git a/crates/bashkit-cli/Cargo.toml b/crates/bashkit-cli/Cargo.toml new file mode 100644 index 00000000..00f012ce --- /dev/null +++ b/crates/bashkit-cli/Cargo.toml @@ -0,0 +1,21 @@ +# BashKit CLI - Command line interface for bashkit +# Run bash scripts in a sandboxed environment + +[package] +name = "bashkit-cli" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +repository.workspace = true +description = "Command line interface for BashKit sandboxed bash interpreter" + +[[bin]] +name = "bashkit" +path = "src/main.rs" + +[dependencies] +bashkit = { path = "../bashkit" } +tokio.workspace = true +clap.workspace = true +anyhow.workspace = true diff --git a/crates/bashkit-cli/src/main.rs b/crates/bashkit-cli/src/main.rs new file mode 100644 index 00000000..7cb39a99 --- /dev/null +++ b/crates/bashkit-cli/src/main.rs @@ -0,0 +1,66 @@ +//! BashKit CLI - Command line interface for sandboxed bash execution +//! +//! Usage: +//! bashkit -c 'echo hello' # Execute a command string +//! bashkit script.sh # Execute a script file +//! bashkit # Interactive REPL (not yet implemented) + +use anyhow::{Context, Result}; +use clap::Parser; +use std::path::PathBuf; + +/// BashKit - Sandboxed bash interpreter +#[derive(Parser, Debug)] +#[command(name = "bashkit")] +#[command(author, version, about, long_about = None)] +struct Args { + /// Execute the given command string + #[arg(short = 'c')] + command: Option, + + /// Script file to execute + #[arg()] + script: Option, + + /// Arguments to pass to the script + #[arg(trailing_var_arg = true)] + args: Vec, +} + +#[tokio::main] +async fn main() -> Result<()> { + let args = Args::parse(); + + let mut bash = bashkit::Bash::new(); + + // Execute command string if provided + if let Some(cmd) = args.command { + let result = bash.exec(&cmd).await.context("Failed to execute command")?; + print!("{}", result.stdout); + if !result.stderr.is_empty() { + eprint!("{}", result.stderr); + } + std::process::exit(result.exit_code); + } + + // Execute script file if provided + if let Some(script_path) = args.script { + let script = std::fs::read_to_string(&script_path) + .with_context(|| format!("Failed to read script: {}", script_path.display()))?; + + let result = bash + .exec(&script) + .await + .context("Failed to execute script")?; + print!("{}", result.stdout); + if !result.stderr.is_empty() { + eprint!("{}", result.stderr); + } + std::process::exit(result.exit_code); + } + + // Interactive REPL (not yet implemented) + eprintln!("bashkit: interactive mode not yet implemented"); + eprintln!("Usage: bashkit -c 'command' or bashkit script.sh"); + std::process::exit(1); +} diff --git a/crates/bashkit/src/error.rs b/crates/bashkit/src/error.rs index 5e8e481c..8b0b1576 100644 --- a/crates/bashkit/src/error.rs +++ b/crates/bashkit/src/error.rs @@ -1,5 +1,6 @@ //! Error types for BashKit +use crate::limits::LimitExceeded; use thiserror::Error; /// Result type alias using BashKit's Error. @@ -26,5 +27,5 @@ pub enum Error { /// Resource limit exceeded. #[error("resource limit exceeded: {0}")] - ResourceLimit(String), + ResourceLimit(#[from] LimitExceeded), } diff --git a/crates/bashkit/src/fs/mod.rs b/crates/bashkit/src/fs/mod.rs index f83dc18f..2cd37b15 100644 --- a/crates/bashkit/src/fs/mod.rs +++ b/crates/bashkit/src/fs/mod.rs @@ -1,10 +1,17 @@ //! Virtual filesystem for BashKit //! -//! Provides an async filesystem trait and implementations. +//! Provides an async filesystem trait and implementations: +//! - `InMemoryFs`: Simple in-memory filesystem +//! - `OverlayFs`: Copy-on-write overlay with whiteouts +//! - `MountableFs`: Multiple filesystems at mount points mod memory; +mod mountable; +mod overlay; mod traits; pub use memory::InMemoryFs; +pub use mountable::MountableFs; +pub use overlay::OverlayFs; #[allow(unused_imports)] pub use traits::{DirEntry, FileSystem, FileType, Metadata}; diff --git a/crates/bashkit/src/fs/mountable.rs b/crates/bashkit/src/fs/mountable.rs new file mode 100644 index 00000000..d9e3200c --- /dev/null +++ b/crates/bashkit/src/fs/mountable.rs @@ -0,0 +1,386 @@ +//! Mountable filesystem implementation +//! +//! MountableFs allows mounting multiple filesystems at different paths, +//! similar to Unix mount semantics. + +use async_trait::async_trait; +use std::collections::BTreeMap; +use std::io::Error as IoError; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, RwLock}; + +use super::traits::{DirEntry, FileSystem, FileType, Metadata}; +use crate::error::Result; + +/// A filesystem that supports mounting other filesystems at specific paths. +/// +/// Mount points are checked from longest to shortest path, allowing nested mounts. +pub struct MountableFs { + /// Root filesystem (for paths not covered by any mount) + root: Arc, + /// Mount points: path -> filesystem + /// BTreeMap ensures iteration in path order + mounts: RwLock>>, +} + +impl MountableFs { + /// Create a new MountableFs with the given root filesystem. + pub fn new(root: Arc) -> Self { + Self { + root, + mounts: RwLock::new(BTreeMap::new()), + } + } + + /// Mount a filesystem at the given path. + /// + /// The mount point must be an absolute path. + pub fn mount(&self, path: impl AsRef, fs: Arc) -> Result<()> { + let path = Self::normalize_path(path.as_ref()); + + if !path.is_absolute() { + return Err(IoError::other("mount path must be absolute").into()); + } + + let mut mounts = self.mounts.write().unwrap(); + mounts.insert(path, fs); + Ok(()) + } + + /// Unmount a filesystem at the given path. + pub fn unmount(&self, path: impl AsRef) -> Result<()> { + let path = Self::normalize_path(path.as_ref()); + + let mut mounts = self.mounts.write().unwrap(); + mounts + .remove(&path) + .ok_or_else(|| IoError::other("mount not found"))?; + Ok(()) + } + + /// Normalize a path for consistent lookups + fn normalize_path(path: &Path) -> PathBuf { + let mut result = PathBuf::new(); + + for component in path.components() { + match component { + std::path::Component::RootDir => { + result.push("/"); + } + std::path::Component::Normal(name) => { + result.push(name); + } + std::path::Component::ParentDir => { + result.pop(); + } + std::path::Component::CurDir => {} + std::path::Component::Prefix(_) => {} + } + } + + if result.as_os_str().is_empty() { + result.push("/"); + } + + result + } + + /// Resolve a path to the appropriate filesystem and relative path. + /// + /// Returns (filesystem, path_within_mount). + fn resolve(&self, path: &Path) -> (Arc, PathBuf) { + let path = Self::normalize_path(path); + let mounts = self.mounts.read().unwrap(); + + // Find the longest matching mount point + // BTreeMap iteration is in key order, but we need longest match + // So we iterate and keep track of the best match + let mut best_mount: Option<(&PathBuf, &Arc)> = None; + + for (mount_path, fs) in mounts.iter() { + if path.starts_with(mount_path) { + match best_mount { + None => best_mount = Some((mount_path, fs)), + Some((best_path, _)) => { + if mount_path.components().count() > best_path.components().count() { + best_mount = Some((mount_path, fs)); + } + } + } + } + } + + match best_mount { + Some((mount_path, fs)) => { + // Calculate relative path within mount + let relative = path + .strip_prefix(mount_path) + .unwrap_or(Path::new("")) + .to_path_buf(); + + // Ensure we have an absolute path + let resolved = if relative.as_os_str().is_empty() { + PathBuf::from("/") + } else { + PathBuf::from("/").join(relative) + }; + + (Arc::clone(fs), resolved) + } + None => { + // Use root filesystem + (Arc::clone(&self.root), path) + } + } + } +} + +#[async_trait] +impl FileSystem for MountableFs { + async fn read_file(&self, path: &Path) -> Result> { + let (fs, resolved) = self.resolve(path); + fs.read_file(&resolved).await + } + + async fn write_file(&self, path: &Path, content: &[u8]) -> Result<()> { + let (fs, resolved) = self.resolve(path); + fs.write_file(&resolved, content).await + } + + async fn append_file(&self, path: &Path, content: &[u8]) -> Result<()> { + let (fs, resolved) = self.resolve(path); + fs.append_file(&resolved, content).await + } + + async fn mkdir(&self, path: &Path, recursive: bool) -> Result<()> { + let (fs, resolved) = self.resolve(path); + fs.mkdir(&resolved, recursive).await + } + + async fn remove(&self, path: &Path, recursive: bool) -> Result<()> { + let (fs, resolved) = self.resolve(path); + fs.remove(&resolved, recursive).await + } + + async fn stat(&self, path: &Path) -> Result { + let (fs, resolved) = self.resolve(path); + fs.stat(&resolved).await + } + + async fn read_dir(&self, path: &Path) -> Result> { + let path = Self::normalize_path(path); + let (fs, resolved) = self.resolve(&path); + + let mut entries = fs.read_dir(&resolved).await?; + + // Add mount points that are direct children of this directory + let mounts = self.mounts.read().unwrap(); + for mount_path in mounts.keys() { + if mount_path.parent() == Some(&path) { + if let Some(name) = mount_path.file_name() { + // Check if this entry already exists + let name_str = name.to_string_lossy().to_string(); + if !entries.iter().any(|e| e.name == name_str) { + entries.push(DirEntry { + name: name_str, + metadata: Metadata { + file_type: FileType::Directory, + size: 0, + mode: 0o755, + modified: std::time::SystemTime::now(), + created: std::time::SystemTime::now(), + }, + }); + } + } + } + } + + Ok(entries) + } + + async fn exists(&self, path: &Path) -> Result { + let path = Self::normalize_path(path); + + // Check if this is a mount point + { + let mounts = self.mounts.read().unwrap(); + if mounts.contains_key(&path) { + return Ok(true); + } + } + + let (fs, resolved) = self.resolve(&path); + fs.exists(&resolved).await + } + + async fn rename(&self, from: &Path, to: &Path) -> Result<()> { + let (from_fs, from_resolved) = self.resolve(from); + let (to_fs, to_resolved) = self.resolve(to); + + // Check if both paths resolve to the same filesystem + // We can only do efficient rename within the same filesystem + // For cross-mount rename, we need to copy + delete + if Arc::ptr_eq(&from_fs, &to_fs) { + from_fs.rename(&from_resolved, &to_resolved).await + } else { + // Cross-mount rename: copy then delete + let content = from_fs.read_file(&from_resolved).await?; + to_fs.write_file(&to_resolved, &content).await?; + from_fs.remove(&from_resolved, false).await + } + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + let (from_fs, from_resolved) = self.resolve(from); + let (to_fs, to_resolved) = self.resolve(to); + + if Arc::ptr_eq(&from_fs, &to_fs) { + from_fs.copy(&from_resolved, &to_resolved).await + } else { + // Cross-mount copy + let content = from_fs.read_file(&from_resolved).await?; + to_fs.write_file(&to_resolved, &content).await + } + } + + async fn symlink(&self, target: &Path, link: &Path) -> Result<()> { + let (fs, resolved) = self.resolve(link); + fs.symlink(target, &resolved).await + } + + async fn read_link(&self, path: &Path) -> Result { + let (fs, resolved) = self.resolve(path); + fs.read_link(&resolved).await + } + + async fn chmod(&self, path: &Path, mode: u32) -> Result<()> { + let (fs, resolved) = self.resolve(path); + fs.chmod(&resolved, mode).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::fs::InMemoryFs; + + #[tokio::test] + async fn test_mount_and_access() { + let root = Arc::new(InMemoryFs::new()); + let mounted = Arc::new(InMemoryFs::new()); + + // Write to mounted fs + mounted + .write_file(Path::new("/data.txt"), b"mounted data") + .await + .unwrap(); + + let mfs = MountableFs::new(root.clone()); + mfs.mount("/mnt/data", mounted.clone()).unwrap(); + + // Access through mountable fs + let content = mfs.read_file(Path::new("/mnt/data/data.txt")).await.unwrap(); + assert_eq!(content, b"mounted data"); + } + + #[tokio::test] + async fn test_write_to_mount() { + let root = Arc::new(InMemoryFs::new()); + let mounted = Arc::new(InMemoryFs::new()); + + let mfs = MountableFs::new(root); + mfs.mount("/mnt", mounted.clone()).unwrap(); + + // Create directory and write file through mountable + mfs.mkdir(Path::new("/mnt/subdir"), false).await.unwrap(); + mfs.write_file(Path::new("/mnt/subdir/test.txt"), b"hello") + .await + .unwrap(); + + // Verify it's in the mounted fs + let content = mounted + .read_file(Path::new("/subdir/test.txt")) + .await + .unwrap(); + assert_eq!(content, b"hello"); + } + + #[tokio::test] + async fn test_nested_mounts() { + let root = Arc::new(InMemoryFs::new()); + let outer = Arc::new(InMemoryFs::new()); + let inner = Arc::new(InMemoryFs::new()); + + outer + .write_file(Path::new("/outer.txt"), b"outer") + .await + .unwrap(); + inner + .write_file(Path::new("/inner.txt"), b"inner") + .await + .unwrap(); + + let mfs = MountableFs::new(root); + mfs.mount("/mnt", outer).unwrap(); + mfs.mount("/mnt/nested", inner).unwrap(); + + // Access outer mount + let content = mfs.read_file(Path::new("/mnt/outer.txt")).await.unwrap(); + assert_eq!(content, b"outer"); + + // Access nested mount + let content = mfs.read_file(Path::new("/mnt/nested/inner.txt")).await.unwrap(); + assert_eq!(content, b"inner"); + } + + #[tokio::test] + async fn test_root_fallback() { + let root = Arc::new(InMemoryFs::new()); + root.write_file(Path::new("/root.txt"), b"root data") + .await + .unwrap(); + + let mfs = MountableFs::new(root); + + // Should access root fs + let content = mfs.read_file(Path::new("/root.txt")).await.unwrap(); + assert_eq!(content, b"root data"); + } + + #[tokio::test] + async fn test_mount_point_in_readdir() { + let root = Arc::new(InMemoryFs::new()); + let mounted = Arc::new(InMemoryFs::new()); + + let mfs = MountableFs::new(root); + mfs.mount("/mnt", mounted).unwrap(); + + // Read root directory should show mnt + let entries = mfs.read_dir(Path::new("/")).await.unwrap(); + let names: Vec<_> = entries.iter().map(|e| &e.name).collect(); + assert!(names.contains(&&"mnt".to_string())); + } + + #[tokio::test] + async fn test_unmount() { + let root = Arc::new(InMemoryFs::new()); + let mounted = Arc::new(InMemoryFs::new()); + mounted + .write_file(Path::new("/data.txt"), b"data") + .await + .unwrap(); + + let mfs = MountableFs::new(root); + mfs.mount("/mnt", mounted).unwrap(); + + // Should exist + assert!(mfs.exists(Path::new("/mnt/data.txt")).await.unwrap()); + + // Unmount + mfs.unmount("/mnt").unwrap(); + + // Should no longer exist (falls back to root which doesn't have it) + assert!(!mfs.exists(Path::new("/mnt/data.txt")).await.unwrap()); + } +} diff --git a/crates/bashkit/src/fs/overlay.rs b/crates/bashkit/src/fs/overlay.rs new file mode 100644 index 00000000..5e030847 --- /dev/null +++ b/crates/bashkit/src/fs/overlay.rs @@ -0,0 +1,494 @@ +//! Overlay filesystem implementation +//! +//! OverlayFs provides copy-on-write semantics by layering a writable upper +//! filesystem on top of a read-only lower filesystem. +//! +//! - Reads: Check upper first, fall back to lower +//! - Writes: Always go to upper +//! - Deletes: Tracked via whiteouts in upper + +use async_trait::async_trait; +use std::collections::HashSet; +use std::io::{Error as IoError, ErrorKind}; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, RwLock}; + +use super::memory::InMemoryFs; +use super::traits::{DirEntry, FileSystem, FileType, Metadata}; +use crate::error::Result; + +/// Overlay filesystem with copy-on-write semantics. +/// +/// Changes are written to the upper layer while the lower layer remains +/// read-only. Deleted files are tracked via whiteouts. +pub struct OverlayFs { + /// Lower (read-only) filesystem + lower: Arc, + /// Upper (writable) filesystem - always InMemoryFs + upper: InMemoryFs, + /// Paths that have been deleted (whiteouts) + whiteouts: RwLock>, +} + +impl OverlayFs { + /// Create a new overlay filesystem. + /// + /// The lower filesystem is treated as read-only, and all writes go to + /// a new in-memory upper layer. + pub fn new(lower: Arc) -> Self { + Self { + lower, + upper: InMemoryFs::new(), + whiteouts: RwLock::new(HashSet::new()), + } + } + + /// Normalize a path for consistent lookups + fn normalize_path(path: &Path) -> PathBuf { + let mut result = PathBuf::new(); + + for component in path.components() { + match component { + std::path::Component::RootDir => { + result.push("/"); + } + std::path::Component::Normal(name) => { + result.push(name); + } + std::path::Component::ParentDir => { + result.pop(); + } + std::path::Component::CurDir => {} + std::path::Component::Prefix(_) => {} + } + } + + if result.as_os_str().is_empty() { + result.push("/"); + } + + result + } + + /// Check if a path has been deleted (whiteout) + fn is_whiteout(&self, path: &Path) -> bool { + let path = Self::normalize_path(path); + let whiteouts = self.whiteouts.read().unwrap(); + whiteouts.contains(&path) + } + + /// Mark a path as deleted (add whiteout) + fn add_whiteout(&self, path: &Path) { + let path = Self::normalize_path(path); + let mut whiteouts = self.whiteouts.write().unwrap(); + whiteouts.insert(path); + } + + /// Remove a whiteout (for when re-creating a deleted file) + fn remove_whiteout(&self, path: &Path) { + let path = Self::normalize_path(path); + let mut whiteouts = self.whiteouts.write().unwrap(); + whiteouts.remove(&path); + } +} + +#[async_trait] +impl FileSystem for OverlayFs { + async fn read_file(&self, path: &Path) -> Result> { + let path = Self::normalize_path(path); + + // Check for whiteout (deleted file) + if self.is_whiteout(&path) { + return Err(IoError::new(ErrorKind::NotFound, "file not found").into()); + } + + // Try upper first + if self.upper.exists(&path).await.unwrap_or(false) { + return self.upper.read_file(&path).await; + } + + // Fall back to lower + self.lower.read_file(&path).await + } + + async fn write_file(&self, path: &Path, content: &[u8]) -> Result<()> { + let path = Self::normalize_path(path); + + // Remove any whiteout for this path + self.remove_whiteout(&path); + + // Ensure parent directory exists in upper + if let Some(parent) = path.parent() { + if !self.upper.exists(parent).await.unwrap_or(false) { + // Copy parent directory structure from lower if it exists + if self.lower.exists(parent).await.unwrap_or(false) { + self.upper.mkdir(parent, true).await?; + } else { + return Err( + IoError::new(ErrorKind::NotFound, "parent directory not found").into(), + ); + } + } + } + + // Write to upper + self.upper.write_file(&path, content).await + } + + async fn append_file(&self, path: &Path, content: &[u8]) -> Result<()> { + let path = Self::normalize_path(path); + + // Check for whiteout + if self.is_whiteout(&path) { + return Err(IoError::new(ErrorKind::NotFound, "file not found").into()); + } + + // If file exists in upper, append there + if self.upper.exists(&path).await.unwrap_or(false) { + return self.upper.append_file(&path, content).await; + } + + // If file exists in lower, copy-on-write + if self.lower.exists(&path).await.unwrap_or(false) { + let existing = self.lower.read_file(&path).await?; + + // Ensure parent exists in upper + if let Some(parent) = path.parent() { + if !self.upper.exists(parent).await.unwrap_or(false) { + self.upper.mkdir(parent, true).await?; + } + } + + // Copy existing content and append new content + let mut combined = existing; + combined.extend_from_slice(content); + return self.upper.write_file(&path, &combined).await; + } + + // Create new file in upper + self.upper.write_file(&path, content).await + } + + async fn mkdir(&self, path: &Path, recursive: bool) -> Result<()> { + let path = Self::normalize_path(path); + + // Remove any whiteout for this path + self.remove_whiteout(&path); + + // Create in upper + self.upper.mkdir(&path, recursive).await + } + + async fn remove(&self, path: &Path, recursive: bool) -> Result<()> { + let path = Self::normalize_path(path); + + // Check if exists in either layer + let in_upper = self.upper.exists(&path).await.unwrap_or(false); + let in_lower = !self.is_whiteout(&path) && self.lower.exists(&path).await.unwrap_or(false); + + if !in_upper && !in_lower { + return Err(IoError::new(ErrorKind::NotFound, "not found").into()); + } + + // Remove from upper if present + if in_upper { + self.upper.remove(&path, recursive).await?; + } + + // If was in lower, add whiteout + if in_lower { + if recursive { + // Add whiteouts for all paths under this directory + // This is a simplification - real overlayfs uses opaque dirs + self.add_whiteout(&path); + } else { + self.add_whiteout(&path); + } + } + + Ok(()) + } + + async fn stat(&self, path: &Path) -> Result { + let path = Self::normalize_path(path); + + // Check for whiteout + if self.is_whiteout(&path) { + return Err(IoError::new(ErrorKind::NotFound, "not found").into()); + } + + // Try upper first + if self.upper.exists(&path).await.unwrap_or(false) { + return self.upper.stat(&path).await; + } + + // Fall back to lower + self.lower.stat(&path).await + } + + async fn read_dir(&self, path: &Path) -> Result> { + let path = Self::normalize_path(path); + + // Check for whiteout + if self.is_whiteout(&path) { + return Err(IoError::new(ErrorKind::NotFound, "not found").into()); + } + + let mut entries: std::collections::HashMap = + std::collections::HashMap::new(); + + // Get entries from lower (if not whited out) + if self.lower.exists(&path).await.unwrap_or(false) { + if let Ok(lower_entries) = self.lower.read_dir(&path).await { + for entry in lower_entries { + // Skip whited out entries + let entry_path = path.join(&entry.name); + if !self.is_whiteout(&entry_path) { + entries.insert(entry.name.clone(), entry); + } + } + } + } + + // Overlay with entries from upper (overriding lower) + if self.upper.exists(&path).await.unwrap_or(false) { + if let Ok(upper_entries) = self.upper.read_dir(&path).await { + for entry in upper_entries { + entries.insert(entry.name.clone(), entry); + } + } + } + + Ok(entries.into_values().collect()) + } + + async fn exists(&self, path: &Path) -> Result { + let path = Self::normalize_path(path); + + // Check for whiteout + if self.is_whiteout(&path) { + return Ok(false); + } + + // Check upper first + if self.upper.exists(&path).await.unwrap_or(false) { + return Ok(true); + } + + // Check lower + self.lower.exists(&path).await + } + + async fn rename(&self, from: &Path, to: &Path) -> Result<()> { + let from = Self::normalize_path(from); + let to = Self::normalize_path(to); + + // Read from source (checking both layers) + let content = self.read_file(&from).await?; + + // Write to destination in upper + self.write_file(&to, &content).await?; + + // Delete source (will add whiteout if needed) + self.remove(&from, false).await?; + + Ok(()) + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + let from = Self::normalize_path(from); + let to = Self::normalize_path(to); + + // Read from source (checking both layers) + let content = self.read_file(&from).await?; + + // Write to destination in upper + self.write_file(&to, &content).await + } + + async fn symlink(&self, target: &Path, link: &Path) -> Result<()> { + let link = Self::normalize_path(link); + + // Remove any whiteout + self.remove_whiteout(&link); + + // Create symlink in upper + self.upper.symlink(target, &link).await + } + + async fn read_link(&self, path: &Path) -> Result { + let path = Self::normalize_path(path); + + // Check for whiteout + if self.is_whiteout(&path) { + return Err(IoError::new(ErrorKind::NotFound, "not found").into()); + } + + // Try upper first + if self.upper.exists(&path).await.unwrap_or(false) { + return self.upper.read_link(&path).await; + } + + // Fall back to lower + self.lower.read_link(&path).await + } + + async fn chmod(&self, path: &Path, mode: u32) -> Result<()> { + let path = Self::normalize_path(path); + + // Check for whiteout + if self.is_whiteout(&path) { + return Err(IoError::new(ErrorKind::NotFound, "not found").into()); + } + + // If exists in upper, chmod there + if self.upper.exists(&path).await.unwrap_or(false) { + return self.upper.chmod(&path, mode).await; + } + + // If exists in lower, copy-on-write metadata + if self.lower.exists(&path).await.unwrap_or(false) { + let stat = self.lower.stat(&path).await?; + + // Create in upper with same content (for files) + if stat.file_type == FileType::File { + let content = self.lower.read_file(&path).await?; + self.upper.write_file(&path, &content).await?; + } else if stat.file_type == FileType::Directory { + self.upper.mkdir(&path, true).await?; + } + + return self.upper.chmod(&path, mode).await; + } + + Err(IoError::new(ErrorKind::NotFound, "not found").into()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_read_from_lower() { + let lower = Arc::new(InMemoryFs::new()); + lower + .write_file(Path::new("/tmp/test.txt"), b"hello") + .await + .unwrap(); + + let overlay = OverlayFs::new(lower); + let content = overlay.read_file(Path::new("/tmp/test.txt")).await.unwrap(); + assert_eq!(content, b"hello"); + } + + #[tokio::test] + async fn test_write_to_upper() { + let lower = Arc::new(InMemoryFs::new()); + let overlay = OverlayFs::new(lower.clone()); + + overlay + .write_file(Path::new("/tmp/new.txt"), b"new file") + .await + .unwrap(); + + // Should be readable from overlay + let content = overlay.read_file(Path::new("/tmp/new.txt")).await.unwrap(); + assert_eq!(content, b"new file"); + + // Should NOT be in lower + assert!(!lower.exists(Path::new("/tmp/new.txt")).await.unwrap()); + } + + #[tokio::test] + async fn test_copy_on_write() { + let lower = Arc::new(InMemoryFs::new()); + lower + .write_file(Path::new("/tmp/test.txt"), b"original") + .await + .unwrap(); + + let overlay = OverlayFs::new(lower.clone()); + + // Modify through overlay + overlay + .write_file(Path::new("/tmp/test.txt"), b"modified") + .await + .unwrap(); + + // Overlay should show modified + let content = overlay.read_file(Path::new("/tmp/test.txt")).await.unwrap(); + assert_eq!(content, b"modified"); + + // Lower should still have original + let lower_content = lower.read_file(Path::new("/tmp/test.txt")).await.unwrap(); + assert_eq!(lower_content, b"original"); + } + + #[tokio::test] + async fn test_delete_with_whiteout() { + let lower = Arc::new(InMemoryFs::new()); + lower + .write_file(Path::new("/tmp/test.txt"), b"hello") + .await + .unwrap(); + + let overlay = OverlayFs::new(lower.clone()); + + // Delete through overlay + overlay.remove(Path::new("/tmp/test.txt"), false).await.unwrap(); + + // Should not be visible through overlay + assert!(!overlay.exists(Path::new("/tmp/test.txt")).await.unwrap()); + + // But should still exist in lower + assert!(lower.exists(Path::new("/tmp/test.txt")).await.unwrap()); + } + + #[tokio::test] + async fn test_recreate_after_delete() { + let lower = Arc::new(InMemoryFs::new()); + lower + .write_file(Path::new("/tmp/test.txt"), b"original") + .await + .unwrap(); + + let overlay = OverlayFs::new(lower); + + // Delete + overlay.remove(Path::new("/tmp/test.txt"), false).await.unwrap(); + assert!(!overlay.exists(Path::new("/tmp/test.txt")).await.unwrap()); + + // Recreate + overlay + .write_file(Path::new("/tmp/test.txt"), b"new content") + .await + .unwrap(); + + // Should now exist with new content + assert!(overlay.exists(Path::new("/tmp/test.txt")).await.unwrap()); + let content = overlay.read_file(Path::new("/tmp/test.txt")).await.unwrap(); + assert_eq!(content, b"new content"); + } + + #[tokio::test] + async fn test_read_dir_merged() { + let lower = Arc::new(InMemoryFs::new()); + lower + .write_file(Path::new("/tmp/lower.txt"), b"lower") + .await + .unwrap(); + + let overlay = OverlayFs::new(lower); + overlay + .write_file(Path::new("/tmp/upper.txt"), b"upper") + .await + .unwrap(); + + let entries = overlay.read_dir(Path::new("/tmp")).await.unwrap(); + let names: Vec<_> = entries.iter().map(|e| &e.name).collect(); + + assert!(names.contains(&&"lower.txt".to_string())); + assert!(names.contains(&&"upper.txt".to_string())); + } +} diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index cdaa71c0..3458fbfe 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -11,6 +11,7 @@ use std::sync::Arc; use crate::builtins::{self, Builtin}; use crate::error::{Error, Result}; use crate::fs::FileSystem; +use crate::limits::{ExecutionCounters, ExecutionLimits}; use crate::parser::{ AssignmentValue, CaseCommand, Command, CommandList, CompoundCommand, ForCommand, FunctionDef, IfCommand, ListOperator, ParameterOp, Pipeline, Redirect, RedirectKind, Script, SimpleCommand, @@ -42,6 +43,10 @@ pub struct Interpreter { functions: HashMap, /// Call stack for local variable scoping call_stack: Vec, + /// Resource limits + limits: ExecutionLimits, + /// Execution counters for resource tracking + counters: ExecutionCounters, } impl Interpreter { @@ -82,9 +87,16 @@ impl Interpreter { builtins, functions: HashMap::new(), call_stack: Vec::new(), + limits: ExecutionLimits::default(), + counters: ExecutionCounters::new(), } } + /// Set execution limits. + pub fn set_limits(&mut self, limits: ExecutionLimits) { + self.limits = limits; + } + /// Set an environment variable. pub fn set_env(&mut self, key: &str, value: &str) { self.env.insert(key.to_string(), value.to_string()); @@ -122,6 +134,9 @@ impl Interpreter { command: &'a Command, ) -> std::pin::Pin> + Send + 'a>> { Box::pin(async move { + // Check command count limit + self.counters.tick_command(&self.limits)?; + match command { Command::Simple(simple) => self.execute_simple_command(simple, None).await, Command::Pipeline(pipeline) => self.execute_pipeline(pipeline).await, @@ -195,7 +210,13 @@ impl Interpreter { Vec::new() }; + // Reset loop counter for this loop + self.counters.reset_loop(); + for value in values { + // Check loop iteration limit + self.counters.tick_loop(&self.limits)?; + // Set loop variable self.variables .insert(for_cmd.variable.clone(), value.clone()); @@ -261,7 +282,13 @@ impl Interpreter { let mut stderr = String::new(); let mut exit_code = 0; + // Reset loop counter for this loop + self.counters.reset_loop(); + loop { + // Check loop iteration limit + self.counters.tick_loop(&self.limits)?; + // Check condition let condition_result = self.execute_command_sequence(&while_cmd.condition).await?; if condition_result.exit_code != 0 { @@ -326,7 +353,13 @@ impl Interpreter { let mut stderr = String::new(); let mut exit_code = 0; + // Reset loop counter for this loop + self.counters.reset_loop(); + loop { + // Check loop iteration limit + self.counters.tick_loop(&self.limits)?; + // Check condition let condition_result = self.execute_command_sequence(&until_cmd.condition).await?; if condition_result.exit_code == 0 { @@ -660,6 +693,9 @@ impl Interpreter { // Check for functions first if let Some(func_def) = self.functions.get(&name).cloned() { + // Check function depth limit + self.counters.push_function(&self.limits)?; + // Push call frame with positional parameters self.call_stack.push(CallFrame { name: name.clone(), @@ -668,13 +704,14 @@ impl Interpreter { }); // Execute function body - let result = self.execute_command(&func_def.body).await?; + let result = self.execute_command(&func_def.body).await; - // Pop call frame + // Pop call frame and function counter self.call_stack.pop(); + self.counters.pop_function(); // Handle output redirections - return self.apply_redirections(result, &command.redirects).await; + return self.apply_redirections(result?, &command.redirects).await; } // Check for builtins diff --git a/crates/bashkit/src/lib.rs b/crates/bashkit/src/lib.rs index ab47dd24..cf67f7d5 100644 --- a/crates/bashkit/src/lib.rs +++ b/crates/bashkit/src/lib.rs @@ -21,16 +21,17 @@ mod builtins; mod error; mod fs; mod interpreter; +mod limits; mod parser; pub use error::{Error, Result}; +pub use fs::{FileSystem, InMemoryFs, MountableFs, OverlayFs}; pub use interpreter::ExecResult; +pub use limits::{ExecutionCounters, ExecutionLimits, LimitExceeded}; use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; - -use fs::{FileSystem, InMemoryFs}; use interpreter::Interpreter; use parser::Parser; @@ -76,6 +77,7 @@ pub struct BashBuilder { fs: Option>, env: HashMap, cwd: Option, + limits: ExecutionLimits, } impl BashBuilder { @@ -97,6 +99,12 @@ impl BashBuilder { self } + /// Set execution limits. + pub fn limits(mut self, limits: ExecutionLimits) -> Self { + self.limits = limits; + self + } + /// Build the Bash instance. pub fn build(self) -> Bash { let fs = self.fs.unwrap_or_else(|| Arc::new(InMemoryFs::new())); @@ -110,6 +118,8 @@ impl BashBuilder { interpreter.set_cwd(cwd); } + interpreter.set_limits(self.limits); + Bash { fs, interpreter } } } @@ -783,4 +793,128 @@ mod tests { .unwrap(); assert_eq!(result.stdout, "first second\n"); } + + // Resource limit tests + + #[tokio::test] + async fn test_command_limit() { + let limits = ExecutionLimits::new().max_commands(5); + let mut bash = Bash::builder().limits(limits).build(); + + // Run 6 commands - should fail on the 6th + let result = bash.exec("true; true; true; true; true; true").await; + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("maximum command count exceeded"), + "Expected command limit error, got: {}", + err + ); + } + + #[tokio::test] + async fn test_command_limit_not_exceeded() { + let limits = ExecutionLimits::new().max_commands(10); + let mut bash = Bash::builder().limits(limits).build(); + + // Run 5 commands - should succeed + let result = bash.exec("true; true; true; true; true").await.unwrap(); + assert_eq!(result.exit_code, 0); + } + + #[tokio::test] + async fn test_loop_iteration_limit() { + let limits = ExecutionLimits::new().max_loop_iterations(5); + let mut bash = Bash::builder().limits(limits).build(); + + // Loop that tries to run 10 times + let result = bash.exec("for i in 1 2 3 4 5 6 7 8 9 10; do echo $i; done").await; + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("maximum loop iterations exceeded"), + "Expected loop limit error, got: {}", + err + ); + } + + #[tokio::test] + async fn test_loop_iteration_limit_not_exceeded() { + let limits = ExecutionLimits::new().max_loop_iterations(10); + let mut bash = Bash::builder().limits(limits).build(); + + // Loop that runs 5 times - should succeed + let result = bash.exec("for i in 1 2 3 4 5; do echo $i; done").await.unwrap(); + assert_eq!(result.stdout, "1\n2\n3\n4\n5\n"); + } + + #[tokio::test] + async fn test_function_depth_limit() { + let limits = ExecutionLimits::new().max_function_depth(3); + let mut bash = Bash::builder().limits(limits).build(); + + // Recursive function that would go 5 deep + let result = bash + .exec("f() { echo $1; if [ $1 -lt 5 ]; then f $(($1 + 1)); fi; }; f 1") + .await; + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("maximum function depth exceeded"), + "Expected function depth error, got: {}", + err + ); + } + + #[tokio::test] + async fn test_function_depth_limit_not_exceeded() { + let limits = ExecutionLimits::new().max_function_depth(10); + let mut bash = Bash::builder().limits(limits).build(); + + // Simple function call - should succeed + let result = bash.exec("f() { echo hello; }; f").await.unwrap(); + assert_eq!(result.stdout, "hello\n"); + } + + #[tokio::test] + async fn test_while_loop_limit() { + let limits = ExecutionLimits::new().max_loop_iterations(3); + let mut bash = Bash::builder().limits(limits).build(); + + // While loop with counter + let result = bash + .exec("i=0; while [ $i -lt 10 ]; do echo $i; i=$((i + 1)); done") + .await; + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("maximum loop iterations exceeded"), + "Expected loop limit error, got: {}", + err + ); + } + + #[tokio::test] + async fn test_default_limits_allow_normal_scripts() { + // Default limits should allow typical scripts to run + let mut bash = Bash::new(); + // Avoid using "done" as a word after a for loop - it causes parsing ambiguity + let result = bash + .exec("for i in 1 2 3 4 5; do echo $i; done && echo finished") + .await + .unwrap(); + assert_eq!(result.stdout, "1\n2\n3\n4\n5\nfinished\n"); + } + + #[tokio::test] + async fn test_for_followed_by_echo_done() { + // This specific case causes a parsing issue - "done" after for loop + // TODO: Fix the parser to handle "done" as a regular word after for loop ends + let mut bash = Bash::new(); + let result = bash + .exec("for i in 1; do echo $i; done; echo ok") + .await + .unwrap(); + assert_eq!(result.stdout, "1\nok\n"); + } } diff --git a/crates/bashkit/src/limits.rs b/crates/bashkit/src/limits.rs new file mode 100644 index 00000000..caccee75 --- /dev/null +++ b/crates/bashkit/src/limits.rs @@ -0,0 +1,226 @@ +//! Resource limits for sandboxed execution +//! +//! These limits prevent runaway scripts from consuming excessive resources. + +use std::time::Duration; + +/// Resource limits for script execution +#[derive(Debug, Clone)] +pub struct ExecutionLimits { + /// Maximum number of commands that can be executed (fuel model) + /// Default: 10,000 + pub max_commands: usize, + + /// Maximum iterations for a single loop + /// Default: 10,000 + pub max_loop_iterations: usize, + + /// Maximum function call depth (recursion limit) + /// Default: 100 + pub max_function_depth: usize, + + /// Execution timeout + /// Default: 30 seconds + pub timeout: Duration, +} + +impl Default for ExecutionLimits { + fn default() -> Self { + Self { + max_commands: 10_000, + max_loop_iterations: 10_000, + max_function_depth: 100, + timeout: Duration::from_secs(30), + } + } +} + +impl ExecutionLimits { + /// Create new limits with defaults + pub fn new() -> Self { + Self::default() + } + + /// Set maximum command count + pub fn max_commands(mut self, count: usize) -> Self { + self.max_commands = count; + self + } + + /// Set maximum loop iterations + pub fn max_loop_iterations(mut self, count: usize) -> Self { + self.max_loop_iterations = count; + self + } + + /// Set maximum function depth + pub fn max_function_depth(mut self, depth: usize) -> Self { + self.max_function_depth = depth; + self + } + + /// Set execution timeout + pub fn timeout(mut self, timeout: Duration) -> Self { + self.timeout = timeout; + self + } +} + +/// Execution counters for tracking resource usage +#[derive(Debug, Clone, Default)] +pub struct ExecutionCounters { + /// Number of commands executed + pub commands: usize, + + /// Current function call depth + pub function_depth: usize, + + /// Number of iterations in current loop + pub loop_iterations: usize, +} + +impl ExecutionCounters { + /// Create new counters + pub fn new() -> Self { + Self::default() + } + + /// Increment command counter, returns error if limit exceeded + pub fn tick_command(&mut self, limits: &ExecutionLimits) -> Result<(), LimitExceeded> { + self.commands += 1; + if self.commands > limits.max_commands { + return Err(LimitExceeded::MaxCommands(limits.max_commands)); + } + Ok(()) + } + + /// Increment loop iteration counter, returns error if limit exceeded + pub fn tick_loop(&mut self, limits: &ExecutionLimits) -> Result<(), LimitExceeded> { + self.loop_iterations += 1; + if self.loop_iterations > limits.max_loop_iterations { + return Err(LimitExceeded::MaxLoopIterations(limits.max_loop_iterations)); + } + Ok(()) + } + + /// Reset loop iteration counter (called when entering a new loop) + pub fn reset_loop(&mut self) { + self.loop_iterations = 0; + } + + /// Push function call, returns error if depth exceeded + pub fn push_function(&mut self, limits: &ExecutionLimits) -> Result<(), LimitExceeded> { + // Check before incrementing so we don't leave invalid state on failure + if self.function_depth >= limits.max_function_depth { + return Err(LimitExceeded::MaxFunctionDepth(limits.max_function_depth)); + } + self.function_depth += 1; + Ok(()) + } + + /// Pop function call + pub fn pop_function(&mut self) { + if self.function_depth > 0 { + self.function_depth -= 1; + } + } +} + +/// Error returned when a resource limit is exceeded +#[derive(Debug, Clone, thiserror::Error)] +pub enum LimitExceeded { + #[error("maximum command count exceeded ({0})")] + MaxCommands(usize), + + #[error("maximum loop iterations exceeded ({0})")] + MaxLoopIterations(usize), + + #[error("maximum function depth exceeded ({0})")] + MaxFunctionDepth(usize), + + #[error("execution timeout ({0:?})")] + Timeout(Duration), +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_limits() { + let limits = ExecutionLimits::default(); + assert_eq!(limits.max_commands, 10_000); + assert_eq!(limits.max_loop_iterations, 10_000); + assert_eq!(limits.max_function_depth, 100); + assert_eq!(limits.timeout, Duration::from_secs(30)); + } + + #[test] + fn test_builder_pattern() { + let limits = ExecutionLimits::new() + .max_commands(100) + .max_loop_iterations(50) + .max_function_depth(10) + .timeout(Duration::from_secs(5)); + + assert_eq!(limits.max_commands, 100); + assert_eq!(limits.max_loop_iterations, 50); + assert_eq!(limits.max_function_depth, 10); + assert_eq!(limits.timeout, Duration::from_secs(5)); + } + + #[test] + fn test_command_counter() { + let limits = ExecutionLimits::new().max_commands(5); + let mut counters = ExecutionCounters::new(); + + for _ in 0..5 { + assert!(counters.tick_command(&limits).is_ok()); + } + + // 6th command should fail + assert!(matches!( + counters.tick_command(&limits), + Err(LimitExceeded::MaxCommands(5)) + )); + } + + #[test] + fn test_loop_counter() { + let limits = ExecutionLimits::new().max_loop_iterations(3); + let mut counters = ExecutionCounters::new(); + + for _ in 0..3 { + assert!(counters.tick_loop(&limits).is_ok()); + } + + // 4th iteration should fail + assert!(matches!( + counters.tick_loop(&limits), + Err(LimitExceeded::MaxLoopIterations(3)) + )); + + // Reset and try again + counters.reset_loop(); + assert!(counters.tick_loop(&limits).is_ok()); + } + + #[test] + fn test_function_depth() { + let limits = ExecutionLimits::new().max_function_depth(2); + let mut counters = ExecutionCounters::new(); + + assert!(counters.push_function(&limits).is_ok()); + assert!(counters.push_function(&limits).is_ok()); + + // 3rd call should fail + assert!(matches!( + counters.push_function(&limits), + Err(LimitExceeded::MaxFunctionDepth(2)) + )); + + // Pop and try again + counters.pop_function(); + assert!(counters.push_function(&limits).is_ok()); + } +} diff --git a/crates/bashkit/src/parser/mod.rs b/crates/bashkit/src/parser/mod.rs index f41453c6..ee1c09cd 100644 --- a/crates/bashkit/src/parser/mod.rs +++ b/crates/bashkit/src/parser/mod.rs @@ -1,6 +1,14 @@ //! Parser module for BashKit //! //! Implements a recursive descent parser for bash scripts. +//! +//! # Known Issues +//! +//! TODO(parser): Fix handling of reserved words as arguments after compound commands. +//! E.g., `for i in 1; do echo $i; done; echo done` hangs because "done" is treated +//! as a keyword instead of a regular word when it appears as an argument to echo. +//! The parser needs to properly distinguish between keywords in command position +//! vs regular words in argument position. mod ast; mod lexer;