diff --git a/Cargo.toml b/Cargo.toml index 9b6c59ee..96392432 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ serde_json = "1" # JSON processing (jq) - verified embeddable jaq-core = "2" jaq-std = "2" +jaq-json = { version = "1", features = ["serde_json"] } # Text search (grep) - verified supports search_slice() for in-memory grep = "0.3" diff --git a/README.md b/README.md index bad2c664..5d654744 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,108 @@ -# rust-template +# BashKit - +Sandboxed bash interpreter for multi-tenant environments. Written in Rust. -## Overview +## Features - +- **Sandboxed execution** - No real filesystem access by default +- **Virtual filesystem** - InMemoryFs, OverlayFs, MountableFs +- **Resource limits** - Command count, loop iterations, function depth +- **Network allowlist** - Control HTTP access per-domain +- **MCP server mode** - Model Context Protocol integration +- **Async-first** - Built on tokio ## Quick Start +```rust +use bashkit::Bash; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let mut bash = Bash::new(); + let result = bash.exec("echo hello world").await?; + println!("{}", result.stdout); // "hello world\n" + Ok(()) +} +``` + +## Built-in Commands + +| Category | Commands | +|----------|----------| +| Core | `echo`, `printf`, `cat`, `read` | +| Navigation | `cd`, `pwd` | +| Flow control | `true`, `false`, `exit`, `test`, `[` | +| Variables | `export`, `set`, `unset`, `local`, `source` | +| Text processing | `grep`, `sed`, `awk`, `jq` | + +## Shell Features + +- Variables and parameter expansion (`$VAR`, `${VAR:-default}`, `${#VAR}`) +- Command substitution (`$(cmd)`) +- Arithmetic expansion (`$((1 + 2))`) +- Pipelines and redirections (`|`, `>`, `>>`, `<`, `<<<`) +- Control flow (`if`/`elif`/`else`, `for`, `while`, `case`) +- Functions (POSIX and bash-style) +- Arrays (`arr=(a b c)`, `${arr[@]}`, `${#arr[@]}`) +- Glob expansion (`*`, `?`) +- Here documents (`< +MIT diff --git a/crates/bashkit/Cargo.toml b/crates/bashkit/Cargo.toml index 94d6aa27..8e85010e 100644 --- a/crates/bashkit/Cargo.toml +++ b/crates/bashkit/Cargo.toml @@ -36,6 +36,11 @@ reqwest = { workspace = true, optional = true } # URL parsing url = "2" +# JSON processing (jq) +jaq-core = { workspace = true } +jaq-std = { workspace = true } +jaq-json = { workspace = true } + [features] default = [] network = ["reqwest"] diff --git a/crates/bashkit/src/builtins/awk.rs b/crates/bashkit/src/builtins/awk.rs new file mode 100644 index 00000000..e855433d --- /dev/null +++ b/crates/bashkit/src/builtins/awk.rs @@ -0,0 +1,1585 @@ +//! awk - Pattern scanning and processing builtin +//! +//! Implements basic AWK functionality. +//! +//! Usage: +//! awk '{print $1}' file +//! awk -F: '{print $1}' /etc/passwd +//! echo "a b c" | awk '{print $2}' +//! awk 'BEGIN{print "start"} {print} END{print "end"}' file +//! awk '/pattern/{print}' file +//! awk 'NR==2{print}' file + +use async_trait::async_trait; +use regex::Regex; +use std::collections::HashMap; + +use super::{Builtin, Context}; +use crate::error::{Error, Result}; +use crate::interpreter::ExecResult; + +/// awk command - pattern scanning and processing +pub struct Awk; + +#[derive(Debug)] +struct AwkProgram { + begin_actions: Vec, + main_rules: Vec, + end_actions: Vec, +} + +#[derive(Debug)] +struct AwkRule { + pattern: Option, + actions: Vec, +} + +#[derive(Debug)] +enum AwkPattern { + Regex(Regex), + Expression(AwkExpr), +} + +#[derive(Debug, Clone)] +#[allow(dead_code)] // Regex and Match used for pattern matching expansion +enum AwkExpr { + Number(f64), + String(String), + Field(Box), // $n + Variable(String), // var + BinOp(Box, String, Box), + UnaryOp(String, Box), + Assign(String, Box), + Concat(Vec), + FuncCall(String, Vec), + Regex(String), + Match(Box, String), // expr ~ /pattern/ +} + +#[allow(dead_code)] // While and For for future expansion +#[derive(Debug)] +enum AwkAction { + Print(Vec), + Printf(String, Vec), + Assign(String, AwkExpr), + If(AwkExpr, Vec, Vec), + While(AwkExpr, Vec), + For(Box, AwkExpr, Box, Vec), + Next, + #[allow(dead_code)] // Exit code support for future + Exit(Option), + Expression(AwkExpr), +} + +struct AwkState { + variables: HashMap, + fields: Vec, + fs: String, + ofs: String, + ors: String, + nr: usize, + nf: usize, + fnr: usize, +} + +#[derive(Debug, Clone)] +enum AwkValue { + Number(f64), + String(String), + Uninitialized, +} + +impl AwkValue { + fn as_number(&self) -> f64 { + match self { + AwkValue::Number(n) => *n, + AwkValue::String(s) => s.parse().unwrap_or(0.0), + AwkValue::Uninitialized => 0.0, + } + } + + fn as_string(&self) -> String { + match self { + AwkValue::Number(n) => { + if n.fract() == 0.0 { + format!("{}", *n as i64) + } else { + format!("{}", n) + } + } + AwkValue::String(s) => s.clone(), + AwkValue::Uninitialized => String::new(), + } + } + + fn as_bool(&self) -> bool { + match self { + AwkValue::Number(n) => *n != 0.0, + AwkValue::String(s) => !s.is_empty(), + AwkValue::Uninitialized => false, + } + } +} + +impl Default for AwkState { + fn default() -> Self { + Self { + variables: HashMap::new(), + fields: Vec::new(), + fs: " ".to_string(), + ofs: " ".to_string(), + ors: "\n".to_string(), + nr: 0, + nf: 0, + fnr: 0, + } + } +} + +impl AwkState { + fn set_line(&mut self, line: &str) { + self.nr += 1; + self.fnr += 1; + + // Split by field separator + if self.fs == " " { + // Special: split on whitespace, collapse multiple spaces + self.fields = line.split_whitespace().map(String::from).collect(); + } else { + self.fields = line.split(&self.fs).map(String::from).collect(); + } + + self.nf = self.fields.len(); + + // Set built-in variables + self.variables + .insert("NR".to_string(), AwkValue::Number(self.nr as f64)); + self.variables + .insert("NF".to_string(), AwkValue::Number(self.nf as f64)); + self.variables + .insert("FNR".to_string(), AwkValue::Number(self.fnr as f64)); + self.variables + .insert("$0".to_string(), AwkValue::String(line.to_string())); + } + + fn get_field(&self, n: usize) -> AwkValue { + if n == 0 { + // $0 is the whole line + self.variables + .get("$0") + .cloned() + .unwrap_or(AwkValue::Uninitialized) + } else if n <= self.fields.len() { + AwkValue::String(self.fields[n - 1].clone()) + } else { + AwkValue::Uninitialized + } + } + + fn get_variable(&self, name: &str) -> AwkValue { + match name { + "NR" => AwkValue::Number(self.nr as f64), + "NF" => AwkValue::Number(self.nf as f64), + "FNR" => AwkValue::Number(self.fnr as f64), + "FS" => AwkValue::String(self.fs.clone()), + "OFS" => AwkValue::String(self.ofs.clone()), + "ORS" => AwkValue::String(self.ors.clone()), + _ => self + .variables + .get(name) + .cloned() + .unwrap_or(AwkValue::Uninitialized), + } + } + + fn set_variable(&mut self, name: &str, value: AwkValue) { + match name { + "FS" => self.fs = value.as_string(), + "OFS" => self.ofs = value.as_string(), + "ORS" => self.ors = value.as_string(), + _ => { + self.variables.insert(name.to_string(), value); + } + } + } +} + +struct AwkParser<'a> { + input: &'a str, + pos: usize, +} + +impl<'a> AwkParser<'a> { + fn new(input: &'a str) -> Self { + Self { input, pos: 0 } + } + + fn parse(&mut self) -> Result { + let mut program = AwkProgram { + begin_actions: Vec::new(), + main_rules: Vec::new(), + end_actions: Vec::new(), + }; + + self.skip_whitespace(); + + while self.pos < self.input.len() { + self.skip_whitespace(); + if self.pos >= self.input.len() { + break; + } + + // Check for BEGIN/END + if self.matches_keyword("BEGIN") { + self.skip_whitespace(); + let actions = self.parse_action_block()?; + program.begin_actions.extend(actions); + } else if self.matches_keyword("END") { + self.skip_whitespace(); + let actions = self.parse_action_block()?; + program.end_actions.extend(actions); + } else { + // Pattern-action rule + let rule = self.parse_rule()?; + program.main_rules.push(rule); + } + + self.skip_whitespace(); + } + + // If no rules, add default print rule + if program.main_rules.is_empty() + && program.begin_actions.is_empty() + && program.end_actions.is_empty() + { + program.main_rules.push(AwkRule { + pattern: None, + actions: vec![AwkAction::Print(vec![AwkExpr::Field(Box::new( + AwkExpr::Number(0.0), + ))])], + }); + } + + Ok(program) + } + + fn matches_keyword(&mut self, keyword: &str) -> bool { + if self.input[self.pos..].starts_with(keyword) { + let after = self.pos + keyword.len(); + if after >= self.input.len() + || !self.input.chars().nth(after).unwrap().is_alphanumeric() + { + self.pos = after; + return true; + } + } + false + } + + fn skip_whitespace(&mut self) { + while self.pos < self.input.len() { + let c = self.input.chars().nth(self.pos).unwrap(); + if c.is_whitespace() || c == ';' { + self.pos += 1; + } else if c == '#' { + // Comment - skip to end of line + while self.pos < self.input.len() + && self.input.chars().nth(self.pos).unwrap() != '\n' + { + self.pos += 1; + } + } else { + break; + } + } + } + + fn parse_rule(&mut self) -> Result { + let pattern = self.parse_pattern()?; + self.skip_whitespace(); + + let actions = + if self.pos < self.input.len() && self.input.chars().nth(self.pos).unwrap() == '{' { + self.parse_action_block()? + } else if pattern.is_some() { + // Default action is print + vec![AwkAction::Print(vec![AwkExpr::Field(Box::new( + AwkExpr::Number(0.0), + ))])] + } else { + Vec::new() + }; + + Ok(AwkRule { pattern, actions }) + } + + fn parse_pattern(&mut self) -> Result> { + self.skip_whitespace(); + + if self.pos >= self.input.len() { + return Ok(None); + } + + let c = self.input.chars().nth(self.pos).unwrap(); + + // Check for regex pattern + if c == '/' { + self.pos += 1; + let start = self.pos; + while self.pos < self.input.len() { + let c = self.input.chars().nth(self.pos).unwrap(); + if c == '/' { + let pattern = &self.input[start..self.pos]; + self.pos += 1; + let regex = Regex::new(pattern) + .map_err(|e| Error::Execution(format!("awk: invalid regex: {}", e)))?; + return Ok(Some(AwkPattern::Regex(regex))); + } else if c == '\\' { + self.pos += 2; + } else { + self.pos += 1; + } + } + return Err(Error::Execution("awk: unterminated regex".to_string())); + } + + // Check for opening brace (no pattern) + if c == '{' { + return Ok(None); + } + + // Expression pattern + let expr = self.parse_expression()?; + Ok(Some(AwkPattern::Expression(expr))) + } + + fn parse_action_block(&mut self) -> Result> { + self.skip_whitespace(); + + if self.pos >= self.input.len() || self.input.chars().nth(self.pos).unwrap() != '{' { + return Err(Error::Execution("awk: expected '{'".to_string())); + } + self.pos += 1; + + let mut actions = Vec::new(); + + loop { + self.skip_whitespace(); + if self.pos >= self.input.len() { + return Err(Error::Execution( + "awk: unterminated action block".to_string(), + )); + } + + let c = self.input.chars().nth(self.pos).unwrap(); + if c == '}' { + self.pos += 1; + break; + } + + let action = self.parse_action()?; + actions.push(action); + + self.skip_whitespace(); + // Allow semicolon separator + if self.pos < self.input.len() && self.input.chars().nth(self.pos).unwrap() == ';' { + self.pos += 1; + } + } + + Ok(actions) + } + + fn parse_action(&mut self) -> Result { + self.skip_whitespace(); + + // Check for keywords + if self.matches_keyword("print") { + return self.parse_print(); + } + if self.matches_keyword("printf") { + return self.parse_printf(); + } + if self.matches_keyword("next") { + return Ok(AwkAction::Next); + } + if self.matches_keyword("exit") { + self.skip_whitespace(); + if self.pos < self.input.len() { + let c = self.input.chars().nth(self.pos).unwrap(); + if c != '}' && c != ';' { + let expr = self.parse_expression()?; + return Ok(AwkAction::Exit(Some(expr))); + } + } + return Ok(AwkAction::Exit(None)); + } + if self.matches_keyword("if") { + return self.parse_if(); + } + + // Otherwise it's an expression (including assignment) + let expr = self.parse_expression()?; + + // Check if it's an assignment + if let AwkExpr::Assign(name, val) = expr { + Ok(AwkAction::Assign(name, *val)) + } else { + Ok(AwkAction::Expression(expr)) + } + } + + fn parse_print(&mut self) -> Result { + self.skip_whitespace(); + let mut args = Vec::new(); + + loop { + if self.pos >= self.input.len() { + break; + } + let c = self.input.chars().nth(self.pos).unwrap(); + if c == '}' || c == ';' { + break; + } + + let expr = self.parse_expression()?; + args.push(expr); + + self.skip_whitespace(); + if self.pos < self.input.len() && self.input.chars().nth(self.pos).unwrap() == ',' { + self.pos += 1; + self.skip_whitespace(); + } else { + break; + } + } + + if args.is_empty() { + args.push(AwkExpr::Field(Box::new(AwkExpr::Number(0.0)))); + } + + Ok(AwkAction::Print(args)) + } + + fn parse_printf(&mut self) -> Result { + self.skip_whitespace(); + + // Parse format string + if self.pos >= self.input.len() || self.input.chars().nth(self.pos).unwrap() != '"' { + return Err(Error::Execution( + "awk: printf requires format string".to_string(), + )); + } + + let format = self.parse_string()?; + let mut args = Vec::new(); + + self.skip_whitespace(); + while self.pos < self.input.len() && self.input.chars().nth(self.pos).unwrap() == ',' { + self.pos += 1; + self.skip_whitespace(); + let expr = self.parse_expression()?; + args.push(expr); + self.skip_whitespace(); + } + + Ok(AwkAction::Printf(format, args)) + } + + fn parse_if(&mut self) -> Result { + self.skip_whitespace(); + + if self.pos >= self.input.len() || self.input.chars().nth(self.pos).unwrap() != '(' { + return Err(Error::Execution("awk: expected '(' after if".to_string())); + } + self.pos += 1; + + let condition = self.parse_expression()?; + + self.skip_whitespace(); + if self.pos >= self.input.len() || self.input.chars().nth(self.pos).unwrap() != ')' { + return Err(Error::Execution( + "awk: expected ')' after condition".to_string(), + )); + } + self.pos += 1; + + self.skip_whitespace(); + let then_actions = if self.input.chars().nth(self.pos).unwrap() == '{' { + self.parse_action_block()? + } else { + vec![self.parse_action()?] + }; + + self.skip_whitespace(); + let else_actions = if self.matches_keyword("else") { + self.skip_whitespace(); + if self.pos < self.input.len() && self.input.chars().nth(self.pos).unwrap() == '{' { + self.parse_action_block()? + } else { + vec![self.parse_action()?] + } + } else { + Vec::new() + }; + + Ok(AwkAction::If(condition, then_actions, else_actions)) + } + + fn parse_expression(&mut self) -> Result { + self.parse_assignment() + } + + fn parse_assignment(&mut self) -> Result { + let expr = self.parse_ternary()?; + + self.skip_whitespace(); + if self.pos >= self.input.len() { + return Ok(expr); + } + + // Check for compound assignment operators (+=, -=, *=, /=, %=) + let compound_ops = ["+=", "-=", "*=", "/=", "%="]; + for op in compound_ops { + if self.input[self.pos..].starts_with(op) { + self.pos += op.len(); + self.skip_whitespace(); + let value = self.parse_assignment()?; + + if let AwkExpr::Variable(name) = expr { + // Transform `x += y` into `x = x + y` + let bin_op = &op[..1]; // Get the operator without '=' + let current = AwkExpr::Variable(name.clone()); + let combined = + AwkExpr::BinOp(Box::new(current), bin_op.to_string(), Box::new(value)); + return Ok(AwkExpr::Assign(name, Box::new(combined))); + } + return Err(Error::Execution( + "awk: invalid assignment target".to_string(), + )); + } + } + + // Simple assignment + if self.input.chars().nth(self.pos).unwrap() == '=' { + let next = self.input.chars().nth(self.pos + 1); + if next != Some('=') && next != Some('~') { + self.pos += 1; + self.skip_whitespace(); + let value = self.parse_assignment()?; + + if let AwkExpr::Variable(name) = expr { + return Ok(AwkExpr::Assign(name, Box::new(value))); + } + return Err(Error::Execution( + "awk: invalid assignment target".to_string(), + )); + } + } + + Ok(expr) + } + + fn parse_ternary(&mut self) -> Result { + self.parse_or() + } + + fn parse_or(&mut self) -> Result { + let mut left = self.parse_and()?; + + loop { + self.skip_whitespace(); + if self.input[self.pos..].starts_with("||") { + self.pos += 2; + self.skip_whitespace(); + let right = self.parse_and()?; + left = AwkExpr::BinOp(Box::new(left), "||".to_string(), Box::new(right)); + } else { + break; + } + } + + Ok(left) + } + + fn parse_and(&mut self) -> Result { + let mut left = self.parse_comparison()?; + + loop { + self.skip_whitespace(); + if self.input[self.pos..].starts_with("&&") { + self.pos += 2; + self.skip_whitespace(); + let right = self.parse_comparison()?; + left = AwkExpr::BinOp(Box::new(left), "&&".to_string(), Box::new(right)); + } else { + break; + } + } + + Ok(left) + } + + fn parse_comparison(&mut self) -> Result { + let left = self.parse_concat()?; + + self.skip_whitespace(); + let ops = ["==", "!=", "<=", ">=", "<", ">", "~", "!~"]; + + for op in ops { + if self.input[self.pos..].starts_with(op) { + self.pos += op.len(); + self.skip_whitespace(); + let right = self.parse_concat()?; + return Ok(AwkExpr::BinOp( + Box::new(left), + op.to_string(), + Box::new(right), + )); + } + } + + Ok(left) + } + + fn parse_concat(&mut self) -> Result { + let mut parts = vec![self.parse_additive()?]; + + loop { + self.skip_whitespace(); + if self.pos >= self.input.len() { + break; + } + + let c = self.input.chars().nth(self.pos).unwrap(); + // Check if this could be the start of another value for concatenation + if c == '"' || c == '$' || c.is_alphabetic() || c == '(' { + // But not if it's a keyword or operator + let remaining = &self.input[self.pos..]; + if !remaining.starts_with("||") + && !remaining.starts_with("&&") + && !remaining.starts_with("==") + && !remaining.starts_with("!=") + { + if let Ok(next) = self.parse_additive() { + parts.push(next); + continue; + } + } + } + break; + } + + if parts.len() == 1 { + Ok(parts.remove(0)) + } else { + Ok(AwkExpr::Concat(parts)) + } + } + + fn parse_additive(&mut self) -> Result { + let mut left = self.parse_multiplicative()?; + + loop { + self.skip_whitespace(); + if self.pos >= self.input.len() { + break; + } + + let c = self.input.chars().nth(self.pos).unwrap(); + if c == '+' || c == '-' { + // Don't consume if it's a compound assignment operator (+=, -=) + let next = self.input.chars().nth(self.pos + 1); + if next == Some('=') { + break; + } + self.pos += 1; + self.skip_whitespace(); + let right = self.parse_multiplicative()?; + left = AwkExpr::BinOp(Box::new(left), c.to_string(), Box::new(right)); + } else { + break; + } + } + + Ok(left) + } + + fn parse_multiplicative(&mut self) -> Result { + let mut left = self.parse_unary()?; + + loop { + self.skip_whitespace(); + if self.pos >= self.input.len() { + break; + } + + let c = self.input.chars().nth(self.pos).unwrap(); + if c == '*' || c == '/' || c == '%' { + // Don't consume if it's a compound assignment operator (*=, /=, %=) + let next = self.input.chars().nth(self.pos + 1); + if next == Some('=') { + break; + } + self.pos += 1; + self.skip_whitespace(); + let right = self.parse_unary()?; + left = AwkExpr::BinOp(Box::new(left), c.to_string(), Box::new(right)); + } else { + break; + } + } + + Ok(left) + } + + fn parse_unary(&mut self) -> Result { + self.skip_whitespace(); + + if self.pos >= self.input.len() { + return Err(Error::Execution( + "awk: unexpected end of expression".to_string(), + )); + } + + let c = self.input.chars().nth(self.pos).unwrap(); + + if c == '-' { + self.pos += 1; + let expr = self.parse_unary()?; + return Ok(AwkExpr::UnaryOp("-".to_string(), Box::new(expr))); + } + + if c == '!' { + self.pos += 1; + let expr = self.parse_unary()?; + return Ok(AwkExpr::UnaryOp("!".to_string(), Box::new(expr))); + } + + if c == '+' { + self.pos += 1; + return self.parse_unary(); + } + + self.parse_primary() + } + + fn parse_primary(&mut self) -> Result { + self.skip_whitespace(); + + if self.pos >= self.input.len() { + return Err(Error::Execution( + "awk: unexpected end of expression".to_string(), + )); + } + + let c = self.input.chars().nth(self.pos).unwrap(); + + // Field reference $ + if c == '$' { + self.pos += 1; + let index = self.parse_primary()?; + return Ok(AwkExpr::Field(Box::new(index))); + } + + // Number + if c.is_ascii_digit() || c == '.' { + return self.parse_number(); + } + + // String + if c == '"' { + let s = self.parse_string()?; + return Ok(AwkExpr::String(s)); + } + + // Parenthesized expression + if c == '(' { + self.pos += 1; + let expr = self.parse_expression()?; + self.skip_whitespace(); + if self.pos >= self.input.len() || self.input.chars().nth(self.pos).unwrap() != ')' { + return Err(Error::Execution("awk: expected ')'".to_string())); + } + self.pos += 1; + return Ok(expr); + } + + // Variable or function call + if c.is_alphabetic() || c == '_' { + let start = self.pos; + while self.pos < self.input.len() { + let c = self.input.chars().nth(self.pos).unwrap(); + if c.is_alphanumeric() || c == '_' { + self.pos += 1; + } else { + break; + } + } + let name = self.input[start..self.pos].to_string(); + + self.skip_whitespace(); + if self.pos < self.input.len() && self.input.chars().nth(self.pos).unwrap() == '(' { + // Function call + self.pos += 1; + let mut args = Vec::new(); + loop { + self.skip_whitespace(); + if self.pos < self.input.len() + && self.input.chars().nth(self.pos).unwrap() == ')' + { + self.pos += 1; + break; + } + let arg = self.parse_expression()?; + args.push(arg); + self.skip_whitespace(); + if self.pos < self.input.len() + && self.input.chars().nth(self.pos).unwrap() == ',' + { + self.pos += 1; + } + } + return Ok(AwkExpr::FuncCall(name, args)); + } + + return Ok(AwkExpr::Variable(name)); + } + + Err(Error::Execution(format!( + "awk: unexpected character: {}", + c + ))) + } + + fn parse_number(&mut self) -> Result { + let start = self.pos; + while self.pos < self.input.len() { + let c = self.input.chars().nth(self.pos).unwrap(); + if c.is_ascii_digit() || c == '.' || c == 'e' || c == 'E' || c == '-' || c == '+' { + self.pos += 1; + } else { + break; + } + } + + let num_str = &self.input[start..self.pos]; + let num: f64 = num_str + .parse() + .map_err(|_| Error::Execution(format!("awk: invalid number: {}", num_str)))?; + + Ok(AwkExpr::Number(num)) + } + + fn parse_string(&mut self) -> Result { + if self.pos >= self.input.len() || self.input.chars().nth(self.pos).unwrap() != '"' { + return Err(Error::Execution("awk: expected string".to_string())); + } + self.pos += 1; + + let mut result = String::new(); + while self.pos < self.input.len() { + let c = self.input.chars().nth(self.pos).unwrap(); + if c == '"' { + self.pos += 1; + return Ok(result); + } else if c == '\\' { + self.pos += 1; + if self.pos < self.input.len() { + let escaped = self.input.chars().nth(self.pos).unwrap(); + match escaped { + 'n' => result.push('\n'), + 't' => result.push('\t'), + 'r' => result.push('\r'), + '\\' => result.push('\\'), + '"' => result.push('"'), + _ => { + result.push('\\'); + result.push(escaped); + } + } + self.pos += 1; + } + } else { + result.push(c); + self.pos += 1; + } + } + + Err(Error::Execution("awk: unterminated string".to_string())) + } +} + +struct AwkInterpreter { + state: AwkState, + output: String, +} + +impl AwkInterpreter { + fn new() -> Self { + Self { + state: AwkState::default(), + output: String::new(), + } + } + + fn eval_expr(&mut self, expr: &AwkExpr) -> AwkValue { + match expr { + AwkExpr::Number(n) => AwkValue::Number(*n), + AwkExpr::String(s) => AwkValue::String(s.clone()), + AwkExpr::Field(index) => { + let n = self.eval_expr(index).as_number() as usize; + self.state.get_field(n) + } + AwkExpr::Variable(name) => self.state.get_variable(name), + AwkExpr::Assign(name, val) => { + let value = self.eval_expr(val); + self.state.set_variable(name, value.clone()); + value + } + AwkExpr::BinOp(left, op, right) => { + let l = self.eval_expr(left); + let r = self.eval_expr(right); + + match op.as_str() { + "+" => AwkValue::Number(l.as_number() + r.as_number()), + "-" => AwkValue::Number(l.as_number() - r.as_number()), + "*" => AwkValue::Number(l.as_number() * r.as_number()), + "/" => AwkValue::Number(l.as_number() / r.as_number()), + "%" => AwkValue::Number(l.as_number() % r.as_number()), + "==" => AwkValue::Number(if l.as_string() == r.as_string() { + 1.0 + } else { + 0.0 + }), + "!=" => AwkValue::Number(if l.as_string() != r.as_string() { + 1.0 + } else { + 0.0 + }), + "<" => AwkValue::Number(if l.as_number() < r.as_number() { + 1.0 + } else { + 0.0 + }), + ">" => AwkValue::Number(if l.as_number() > r.as_number() { + 1.0 + } else { + 0.0 + }), + "<=" => AwkValue::Number(if l.as_number() <= r.as_number() { + 1.0 + } else { + 0.0 + }), + ">=" => AwkValue::Number(if l.as_number() >= r.as_number() { + 1.0 + } else { + 0.0 + }), + "&&" => AwkValue::Number(if l.as_bool() && r.as_bool() { 1.0 } else { 0.0 }), + "||" => AwkValue::Number(if l.as_bool() || r.as_bool() { 1.0 } else { 0.0 }), + "~" => { + if let Ok(re) = Regex::new(&r.as_string()) { + AwkValue::Number(if re.is_match(&l.as_string()) { + 1.0 + } else { + 0.0 + }) + } else { + AwkValue::Number(0.0) + } + } + "!~" => { + if let Ok(re) = Regex::new(&r.as_string()) { + AwkValue::Number(if !re.is_match(&l.as_string()) { + 1.0 + } else { + 0.0 + }) + } else { + AwkValue::Number(1.0) + } + } + _ => AwkValue::Uninitialized, + } + } + AwkExpr::UnaryOp(op, expr) => { + let v = self.eval_expr(expr); + match op.as_str() { + "-" => AwkValue::Number(-v.as_number()), + "!" => AwkValue::Number(if v.as_bool() { 0.0 } else { 1.0 }), + _ => v, + } + } + AwkExpr::Concat(parts) => { + let s: String = parts + .iter() + .map(|p| self.eval_expr(p).as_string()) + .collect(); + AwkValue::String(s) + } + AwkExpr::FuncCall(name, args) => self.call_function(name, args), + AwkExpr::Regex(pattern) => AwkValue::String(pattern.clone()), + AwkExpr::Match(expr, pattern) => { + let s = self.eval_expr(expr).as_string(); + if let Ok(re) = Regex::new(pattern) { + AwkValue::Number(if re.is_match(&s) { 1.0 } else { 0.0 }) + } else { + AwkValue::Number(0.0) + } + } + } + } + + fn call_function(&mut self, name: &str, args: &[AwkExpr]) -> AwkValue { + match name { + "length" => { + if args.is_empty() { + AwkValue::Number(self.state.get_field(0).as_string().len() as f64) + } else { + AwkValue::Number(self.eval_expr(&args[0]).as_string().len() as f64) + } + } + "substr" => { + if args.len() < 2 { + return AwkValue::Uninitialized; + } + let s = self.eval_expr(&args[0]).as_string(); + let start = (self.eval_expr(&args[1]).as_number() as usize).saturating_sub(1); + let len = if args.len() > 2 { + self.eval_expr(&args[2]).as_number() as usize + } else { + s.len() + }; + let end = (start + len).min(s.len()); + AwkValue::String(s.chars().skip(start).take(end - start).collect()) + } + "index" => { + if args.len() < 2 { + return AwkValue::Number(0.0); + } + let s = self.eval_expr(&args[0]).as_string(); + let t = self.eval_expr(&args[1]).as_string(); + match s.find(&t) { + Some(i) => AwkValue::Number((i + 1) as f64), + None => AwkValue::Number(0.0), + } + } + "split" => { + if args.len() < 2 { + return AwkValue::Number(0.0); + } + let s = self.eval_expr(&args[0]).as_string(); + let sep = if args.len() > 2 { + self.eval_expr(&args[2]).as_string() + } else { + self.state.fs.clone() + }; + + let parts: Vec<&str> = if sep == " " { + s.split_whitespace().collect() + } else { + s.split(&sep).collect() + }; + + // Store in array variable + if let AwkExpr::Variable(arr_name) = &args[1] { + for (i, part) in parts.iter().enumerate() { + let key = format!("{}[{}]", arr_name, i + 1); + self.state + .set_variable(&key, AwkValue::String(part.to_string())); + } + } + + AwkValue::Number(parts.len() as f64) + } + "sprintf" => { + if args.is_empty() { + return AwkValue::String(String::new()); + } + let format = self.eval_expr(&args[0]).as_string(); + let values: Vec = args[1..].iter().map(|a| self.eval_expr(a)).collect(); + AwkValue::String(self.format_string(&format, &values)) + } + "toupper" => { + if args.is_empty() { + return AwkValue::Uninitialized; + } + AwkValue::String(self.eval_expr(&args[0]).as_string().to_uppercase()) + } + "tolower" => { + if args.is_empty() { + return AwkValue::Uninitialized; + } + AwkValue::String(self.eval_expr(&args[0]).as_string().to_lowercase()) + } + "gsub" | "sub" => { + // gsub(regexp, replacement, target) + if args.len() < 2 { + return AwkValue::Number(0.0); + } + let pattern = self.eval_expr(&args[0]).as_string(); + let replacement = self.eval_expr(&args[1]).as_string(); + + let target_expr = if args.len() > 2 { + args[2].clone() + } else { + AwkExpr::Field(Box::new(AwkExpr::Number(0.0))) + }; + + let target = self.eval_expr(&target_expr).as_string(); + + if let Ok(re) = Regex::new(&pattern) { + let (result, count) = if name == "gsub" { + let count = re.find_iter(&target).count(); + ( + re.replace_all(&target, replacement.as_str()).to_string(), + count, + ) + } else { + let count = if re.is_match(&target) { 1 } else { 0 }; + (re.replace(&target, replacement.as_str()).to_string(), count) + }; + + // Update the target variable + if let AwkExpr::Variable(name) = &target_expr { + self.state.set_variable(name, AwkValue::String(result)); + } + + AwkValue::Number(count as f64) + } else { + AwkValue::Number(0.0) + } + } + "int" => { + if args.is_empty() { + return AwkValue::Number(0.0); + } + AwkValue::Number(self.eval_expr(&args[0]).as_number().trunc()) + } + "sqrt" => { + if args.is_empty() { + return AwkValue::Number(0.0); + } + AwkValue::Number(self.eval_expr(&args[0]).as_number().sqrt()) + } + "sin" => { + if args.is_empty() { + return AwkValue::Number(0.0); + } + AwkValue::Number(self.eval_expr(&args[0]).as_number().sin()) + } + "cos" => { + if args.is_empty() { + return AwkValue::Number(0.0); + } + AwkValue::Number(self.eval_expr(&args[0]).as_number().cos()) + } + "log" => { + if args.is_empty() { + return AwkValue::Number(0.0); + } + AwkValue::Number(self.eval_expr(&args[0]).as_number().ln()) + } + "exp" => { + if args.is_empty() { + return AwkValue::Number(0.0); + } + AwkValue::Number(self.eval_expr(&args[0]).as_number().exp()) + } + _ => AwkValue::Uninitialized, + } + } + + fn format_string(&self, format: &str, values: &[AwkValue]) -> String { + let mut result = String::new(); + let mut chars = format.chars().peekable(); + let mut value_idx = 0; + + while let Some(c) = chars.next() { + if c == '%' { + if chars.peek() == Some(&'%') { + chars.next(); + result.push('%'); + continue; + } + + // Parse format specifier + let mut spec = String::from("%"); + while let Some(&c) = chars.peek() { + if c.is_ascii_alphabetic() { + spec.push(c); + chars.next(); + break; + } else if c.is_ascii_digit() || c == '-' || c == '.' || c == '+' { + spec.push(c); + chars.next(); + } else { + break; + } + } + + if value_idx < values.len() { + let val = &values[value_idx]; + value_idx += 1; + + if spec.ends_with('d') || spec.ends_with('i') { + result.push_str(&format!("{}", val.as_number() as i64)); + } else if spec.ends_with('f') || spec.ends_with('g') || spec.ends_with('e') { + result.push_str(&format!("{}", val.as_number())); + } else if spec.ends_with('s') { + result.push_str(&val.as_string()); + } else if spec.ends_with('c') { + let s = val.as_string(); + if let Some(c) = s.chars().next() { + result.push(c); + } + } else { + result.push_str(&val.as_string()); + } + } + } else { + result.push(c); + } + } + + result + } + + fn exec_action(&mut self, action: &AwkAction) -> bool { + match action { + AwkAction::Print(exprs) => { + let parts: Vec = exprs + .iter() + .map(|e| self.eval_expr(e).as_string()) + .collect(); + self.output.push_str(&parts.join(&self.state.ofs)); + self.output.push_str(&self.state.ors); + true + } + AwkAction::Printf(format, args) => { + let values: Vec = args.iter().map(|a| self.eval_expr(a)).collect(); + self.output.push_str(&self.format_string(format, &values)); + true + } + AwkAction::Assign(name, expr) => { + let value = self.eval_expr(expr); + self.state.set_variable(name, value); + true + } + AwkAction::If(cond, then_actions, else_actions) => { + if self.eval_expr(cond).as_bool() { + for action in then_actions { + if !self.exec_action(action) { + return false; + } + } + } else { + for action in else_actions { + if !self.exec_action(action) { + return false; + } + } + } + true + } + AwkAction::While(cond, actions) => { + while self.eval_expr(cond).as_bool() { + for action in actions { + if !self.exec_action(action) { + return false; + } + } + } + true + } + AwkAction::For(init, cond, update, actions) => { + self.exec_action(init); + while self.eval_expr(cond).as_bool() { + for action in actions { + if !self.exec_action(action) { + return false; + } + } + self.exec_action(update); + } + true + } + AwkAction::Next => false, + AwkAction::Exit(_) => false, + AwkAction::Expression(expr) => { + self.eval_expr(expr); + true + } + } + } + + fn matches_pattern(&mut self, pattern: &AwkPattern) -> bool { + match pattern { + AwkPattern::Regex(re) => { + let line = self.state.get_field(0).as_string(); + re.is_match(&line) + } + AwkPattern::Expression(expr) => self.eval_expr(expr).as_bool(), + } + } +} + +#[async_trait] +impl Builtin for Awk { + async fn execute(&self, ctx: Context<'_>) -> Result { + let mut program_str = String::new(); + let mut files: Vec = Vec::new(); + let mut field_sep = " ".to_string(); + let mut i = 0; + + while i < ctx.args.len() { + let arg = &ctx.args[i]; + if arg == "-F" { + i += 1; + if i < ctx.args.len() { + field_sep = ctx.args[i].clone(); + } + } else if let Some(sep) = arg.strip_prefix("-F") { + field_sep = sep.to_string(); + } else if arg == "-f" { + // Read program from file + i += 1; + if i < ctx.args.len() { + let path = if ctx.args[i].starts_with('/') { + std::path::PathBuf::from(&ctx.args[i]) + } else { + ctx.cwd.join(&ctx.args[i]) + }; + match ctx.fs.read_file(&path).await { + Ok(content) => { + program_str = String::from_utf8_lossy(&content).into_owned(); + } + Err(e) => { + return Ok(ExecResult::err(format!("awk: {}: {}", ctx.args[i], e), 1)); + } + } + } + } else if arg.starts_with('-') { + // Unknown option - ignore + } else if program_str.is_empty() { + program_str = arg.clone(); + } else { + files.push(arg.clone()); + } + i += 1; + } + + if program_str.is_empty() { + return Err(Error::Execution("awk: no program given".to_string())); + } + + let mut parser = AwkParser::new(&program_str); + let program = parser.parse()?; + + let mut interp = AwkInterpreter::new(); + interp.state.fs = field_sep; + + // Run BEGIN actions + for action in &program.begin_actions { + interp.exec_action(action); + } + + // Process input + let inputs: Vec = if files.is_empty() { + vec![ctx.stdin.unwrap_or("").to_string()] + } else { + let mut inputs = Vec::new(); + for file in &files { + let path = if file.starts_with('/') { + std::path::PathBuf::from(file) + } else { + ctx.cwd.join(file) + }; + + match ctx.fs.read_file(&path).await { + Ok(content) => { + inputs.push(String::from_utf8_lossy(&content).into_owned()); + } + Err(e) => { + return Ok(ExecResult::err(format!("awk: {}: {}", file, e), 1)); + } + } + } + inputs + }; + + 'files: for input in inputs { + interp.state.fnr = 0; + for line in input.lines() { + interp.state.set_line(line); + + 'rules: for rule in &program.main_rules { + // Check pattern + let matches = match &rule.pattern { + Some(pattern) => interp.matches_pattern(pattern), + None => true, + }; + + if matches { + for action in &rule.actions { + match action { + AwkAction::Next => continue 'rules, + AwkAction::Exit(_) => break 'files, + _ => { + // exec_action returns false for Next, which we've already handled + interp.exec_action(action); + } + } + } + } + } + } + } + + // Run END actions + for action in &program.end_actions { + interp.exec_action(action); + } + + Ok(ExecResult::ok(interp.output)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::fs::InMemoryFs; + use std::collections::HashMap; + use std::path::PathBuf; + use std::sync::Arc; + + async fn run_awk(args: &[&str], stdin: Option<&str>) -> Result { + let awk = Awk; + let fs = Arc::new(InMemoryFs::new()); + let mut vars = HashMap::new(); + let mut cwd = PathBuf::from("/"); + let args: Vec = args.iter().map(|s| s.to_string()).collect(); + + let ctx = Context { + args: &args, + env: &HashMap::new(), + variables: &mut vars, + cwd: &mut cwd, + fs, + stdin, + }; + + awk.execute(ctx).await + } + + #[tokio::test] + async fn test_awk_print_all() { + let result = run_awk(&["{print}"], Some("hello\nworld")).await.unwrap(); + assert_eq!(result.stdout, "hello\nworld\n"); + } + + #[tokio::test] + async fn test_awk_print_field() { + let result = run_awk(&["{print $1}"], Some("hello world\nfoo bar")) + .await + .unwrap(); + assert_eq!(result.stdout, "hello\nfoo\n"); + } + + #[tokio::test] + async fn test_awk_print_multiple_fields() { + let result = run_awk(&["{print $2, $1}"], Some("hello world")) + .await + .unwrap(); + assert_eq!(result.stdout, "world hello\n"); + } + + #[tokio::test] + async fn test_awk_field_separator() { + let result = run_awk(&["-F:", "{print $1}"], Some("root:x:0:0")) + .await + .unwrap(); + assert_eq!(result.stdout, "root\n"); + } + + #[tokio::test] + async fn test_awk_nr() { + let result = run_awk(&["{print NR, $0}"], Some("a\nb\nc")).await.unwrap(); + assert_eq!(result.stdout, "1 a\n2 b\n3 c\n"); + } + + #[tokio::test] + async fn test_awk_nf() { + let result = run_awk(&["{print NF}"], Some("a b c\nd e")).await.unwrap(); + assert_eq!(result.stdout, "3\n2\n"); + } + + #[tokio::test] + async fn test_awk_begin_end() { + let result = run_awk( + &["BEGIN{print \"start\"} {print} END{print \"end\"}"], + Some("middle"), + ) + .await + .unwrap(); + assert_eq!(result.stdout, "start\nmiddle\nend\n"); + } + + #[tokio::test] + async fn test_awk_pattern() { + let result = run_awk(&["/hello/{print}"], Some("hello\nworld\nhello again")) + .await + .unwrap(); + assert_eq!(result.stdout, "hello\nhello again\n"); + } + + #[tokio::test] + async fn test_awk_condition() { + let result = run_awk(&["NR==2{print}"], Some("line1\nline2\nline3")) + .await + .unwrap(); + assert_eq!(result.stdout, "line2\n"); + } + + #[tokio::test] + async fn test_awk_arithmetic() { + let result = run_awk(&["{print $1 + $2}"], Some("1 2\n3 4")) + .await + .unwrap(); + assert_eq!(result.stdout, "3\n7\n"); + } + + #[tokio::test] + async fn test_awk_variables() { + let result = run_awk(&["{sum += $1} END{print sum}"], Some("1\n2\n3\n4")) + .await + .unwrap(); + assert_eq!(result.stdout, "10\n"); + } + + #[tokio::test] + async fn test_awk_length() { + let result = run_awk(&["{print length($0)}"], Some("hello\nhi")) + .await + .unwrap(); + assert_eq!(result.stdout, "5\n2\n"); + } + + #[tokio::test] + async fn test_awk_substr() { + let result = run_awk(&["{print substr($0, 2, 3)}"], Some("hello")) + .await + .unwrap(); + assert_eq!(result.stdout, "ell\n"); + } + + #[tokio::test] + async fn test_awk_toupper() { + let result = run_awk(&["{print toupper($0)}"], Some("hello")) + .await + .unwrap(); + assert_eq!(result.stdout, "HELLO\n"); + } +} diff --git a/crates/bashkit/src/builtins/grep.rs b/crates/bashkit/src/builtins/grep.rs new file mode 100644 index 00000000..d8899d6c --- /dev/null +++ b/crates/bashkit/src/builtins/grep.rs @@ -0,0 +1,305 @@ +//! grep - Pattern matching builtin +//! +//! Implements grep functionality using the regex crate. +//! +//! Usage: +//! grep pattern file +//! echo "text" | grep pattern +//! grep -i pattern file # case insensitive +//! grep -v pattern file # invert match +//! grep -n pattern file # show line numbers +//! grep -c pattern file # count matches +//! grep -l pattern file1 file2 # list matching files +//! grep -E pattern file # extended regex (default) +//! grep -F pattern file # fixed string match + +use async_trait::async_trait; +use regex::{Regex, RegexBuilder}; + +use super::{Builtin, Context}; +use crate::error::{Error, Result}; +use crate::interpreter::ExecResult; + +/// grep command - pattern matching +pub struct Grep; + +struct GrepOptions { + pattern: String, + files: Vec, + ignore_case: bool, + invert_match: bool, + line_numbers: bool, + count_only: bool, + files_with_matches: bool, + fixed_strings: bool, +} + +impl GrepOptions { + fn parse(args: &[String]) -> Result { + let mut opts = GrepOptions { + pattern: String::new(), + files: Vec::new(), + ignore_case: false, + invert_match: false, + line_numbers: false, + count_only: false, + files_with_matches: false, + fixed_strings: false, + }; + + let mut positional = Vec::new(); + let mut i = 0; + + while i < args.len() { + let arg = &args[i]; + if arg.starts_with('-') && arg.len() > 1 && !arg.starts_with("--") { + // Handle combined flags like -iv + for c in arg[1..].chars() { + match c { + 'i' => opts.ignore_case = true, + 'v' => opts.invert_match = true, + 'n' => opts.line_numbers = true, + 'c' => opts.count_only = true, + 'l' => opts.files_with_matches = true, + 'F' => opts.fixed_strings = true, + 'E' => {} // Extended regex is default + 'e' => { + // -e pattern + i += 1; + if i < args.len() { + opts.pattern = args[i].clone(); + } + } + _ => {} // Ignore unknown flags + } + } + } else if arg == "--" { + // End of options + positional.extend(args[i + 1..].iter().cloned()); + break; + } else { + positional.push(arg.clone()); + } + i += 1; + } + + // First positional is pattern (if not set by -e) + if opts.pattern.is_empty() { + if positional.is_empty() { + return Err(Error::Execution("grep: missing pattern".to_string())); + } + opts.pattern = positional.remove(0); + } + + // Rest are files + opts.files = positional; + + Ok(opts) + } + + fn build_regex(&self) -> Result { + let pattern = if self.fixed_strings { + regex::escape(&self.pattern) + } else { + self.pattern.clone() + }; + + RegexBuilder::new(&pattern) + .case_insensitive(self.ignore_case) + .build() + .map_err(|e| Error::Execution(format!("grep: invalid pattern: {}", e))) + } +} + +#[async_trait] +impl Builtin for Grep { + async fn execute(&self, ctx: Context<'_>) -> Result { + let opts = GrepOptions::parse(ctx.args)?; + let regex = opts.build_regex()?; + + let mut output = String::new(); + let mut any_match = false; + let mut exit_code = 1; // 1 = no match + + // Determine input sources + let inputs: Vec<(&str, String)> = if opts.files.is_empty() { + // Read from stdin + vec![("", ctx.stdin.unwrap_or("").to_string())] + } else { + // Read from files + let mut inputs = Vec::new(); + for file in &opts.files { + let path = if file.starts_with('/') { + std::path::PathBuf::from(file) + } else { + ctx.cwd.join(file) + }; + + match ctx.fs.read_file(&path).await { + Ok(content) => { + let text = String::from_utf8_lossy(&content).into_owned(); + inputs.push((file.as_str(), text)); + } + Err(e) => { + // Report error but continue with other files + output.push_str(&format!("grep: {}: {}\n", file, e)); + } + } + } + inputs + }; + + let show_filename = opts.files.len() > 1; + + for (filename, content) in inputs { + let mut match_count = 0; + let mut file_matched = false; + + for (line_num, line) in content.lines().enumerate() { + let matches = regex.is_match(line); + let should_output = if opts.invert_match { !matches } else { matches }; + + if should_output { + file_matched = true; + any_match = true; + match_count += 1; + + if opts.files_with_matches { + // Just need to know if file matches, output later + break; + } + + if !opts.count_only { + // Build output line + if show_filename { + output.push_str(filename); + output.push(':'); + } + if opts.line_numbers { + output.push_str(&format!("{}:", line_num + 1)); + } + output.push_str(line); + output.push('\n'); + } + } + } + + if opts.files_with_matches && file_matched { + output.push_str(filename); + output.push('\n'); + } else if opts.count_only { + if show_filename { + output.push_str(&format!("{}:{}\n", filename, match_count)); + } else { + output.push_str(&format!("{}\n", match_count)); + } + } + } + + if any_match { + exit_code = 0; + } + + Ok(ExecResult::with_code(output, exit_code)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::fs::InMemoryFs; + use std::collections::HashMap; + use std::path::PathBuf; + use std::sync::Arc; + + async fn run_grep(args: &[&str], stdin: Option<&str>) -> Result { + let grep = Grep; + let fs = Arc::new(InMemoryFs::new()); + let mut vars = HashMap::new(); + let mut cwd = PathBuf::from("/"); + let args: Vec = args.iter().map(|s| s.to_string()).collect(); + + let ctx = Context { + args: &args, + env: &HashMap::new(), + variables: &mut vars, + cwd: &mut cwd, + fs, + stdin, + }; + + grep.execute(ctx).await + } + + #[tokio::test] + async fn test_grep_basic() { + let result = run_grep(&["hello"], Some("hello world\ngoodbye world")) + .await + .unwrap(); + assert_eq!(result.exit_code, 0); + assert_eq!(result.stdout, "hello world\n"); + } + + #[tokio::test] + async fn test_grep_no_match() { + let result = run_grep(&["xyz"], Some("hello world\ngoodbye world")) + .await + .unwrap(); + assert_eq!(result.exit_code, 1); + assert_eq!(result.stdout, ""); + } + + #[tokio::test] + async fn test_grep_case_insensitive() { + let result = run_grep(&["-i", "HELLO"], Some("Hello World\ngoodbye")) + .await + .unwrap(); + assert_eq!(result.exit_code, 0); + assert_eq!(result.stdout, "Hello World\n"); + } + + #[tokio::test] + async fn test_grep_invert() { + let result = run_grep(&["-v", "hello"], Some("hello\nworld\nhello again")) + .await + .unwrap(); + assert_eq!(result.exit_code, 0); + assert_eq!(result.stdout, "world\n"); + } + + #[tokio::test] + async fn test_grep_line_numbers() { + let result = run_grep(&["-n", "world"], Some("hello\nworld\nfoo")) + .await + .unwrap(); + assert_eq!(result.exit_code, 0); + assert_eq!(result.stdout, "2:world\n"); + } + + #[tokio::test] + async fn test_grep_count() { + let result = run_grep(&["-c", "o"], Some("hello\nworld\nfoo")) + .await + .unwrap(); + assert_eq!(result.exit_code, 0); + assert_eq!(result.stdout, "3\n"); + } + + #[tokio::test] + async fn test_grep_regex() { + let result = run_grep(&["^h.*o$"], Some("hello\nworld\nhero")) + .await + .unwrap(); + assert_eq!(result.exit_code, 0); + assert_eq!(result.stdout, "hello\nhero\n"); + } + + #[tokio::test] + async fn test_grep_fixed_string() { + let result = run_grep(&["-F", "a.b"], Some("a.b\naxb\na.b.c")) + .await + .unwrap(); + assert_eq!(result.exit_code, 0); + assert_eq!(result.stdout, "a.b\na.b.c\n"); + } +} diff --git a/crates/bashkit/src/builtins/jq.rs b/crates/bashkit/src/builtins/jq.rs new file mode 100644 index 00000000..e3498b59 --- /dev/null +++ b/crates/bashkit/src/builtins/jq.rs @@ -0,0 +1,179 @@ +//! jq - JSON processor builtin +//! +//! Implements jq functionality using the jaq library. +//! +//! Usage: +//! echo '{"name":"foo"}' | jq '.name' +//! jq '.[] | .id' < data.json + +use async_trait::async_trait; +use jaq_core::{load, Compiler, Ctx, RcIter}; +use jaq_json::Val; + +use super::{Builtin, Context}; +use crate::error::{Error, Result}; +use crate::interpreter::ExecResult; + +/// jq command - JSON processor +pub struct Jq; + +#[async_trait] +impl Builtin for Jq { + async fn execute(&self, ctx: Context<'_>) -> Result { + // Get the filter expression + let filter = ctx.args.first().map(|s| s.as_str()).unwrap_or("."); + + // Get input from stdin + let input = ctx.stdin.unwrap_or(""); + + // If no input, return empty + if input.trim().is_empty() { + return Ok(ExecResult::ok(String::new())); + } + + // Set up the loader with standard library definitions + let loader = load::Loader::new(jaq_std::defs().chain(jaq_json::defs())); + let arena = load::Arena::default(); + + // Parse the filter + let program = load::File { + code: filter, + path: (), + }; + + let modules = loader.load(&arena, program).map_err(|errs| { + Error::Execution(format!( + "jq: parse error: {}", + errs.into_iter() + .map(|e| format!("{:?}", e)) + .collect::>() + .join(", ") + )) + })?; + + // Compile the filter + let filter = Compiler::default() + .with_funs(jaq_std::funs().chain(jaq_json::funs())) + .compile(modules) + .map_err(|errs| { + Error::Execution(format!( + "jq: compile error: {}", + errs.into_iter() + .map(|e| format!("{:?}", e)) + .collect::>() + .join(", ") + )) + })?; + + // Process each line of input as JSON + let mut output = String::new(); + for line in input.lines() { + let line = line.trim(); + if line.is_empty() { + continue; + } + + // Parse JSON input + let json_input: serde_json::Value = serde_json::from_str(line) + .map_err(|e| Error::Execution(format!("jq: invalid JSON: {}", e)))?; + + // Convert to jaq value + let jaq_input = Val::from(json_input); + + // Create empty inputs iterator + let inputs = RcIter::new(core::iter::empty()); + + // Run the filter + let ctx = Ctx::new([], &inputs); + for result in filter.run((ctx, jaq_input)) { + match result { + Ok(val) => { + // Convert back to serde_json::Value and format + let json: serde_json::Value = val.into(); + match serde_json::to_string(&json) { + Ok(s) => { + output.push_str(&s); + output.push('\n'); + } + Err(e) => { + return Err(Error::Execution(format!("jq: output error: {}", e))); + } + } + } + Err(e) => { + return Err(Error::Execution(format!("jq: runtime error: {:?}", e))); + } + } + } + } + + Ok(ExecResult::ok(output)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::fs::InMemoryFs; + use std::collections::HashMap; + use std::path::PathBuf; + use std::sync::Arc; + + async fn run_jq(filter: &str, input: &str) -> Result { + let jq = Jq; + let fs = Arc::new(InMemoryFs::new()); + let mut vars = HashMap::new(); + let mut cwd = PathBuf::from("/"); + let args = vec![filter.to_string()]; + + let ctx = Context { + args: &args, + env: &HashMap::new(), + variables: &mut vars, + cwd: &mut cwd, + fs, + stdin: Some(input), + }; + + let result = jq.execute(ctx).await?; + Ok(result.stdout) + } + + #[tokio::test] + async fn test_jq_identity() { + let result = run_jq(".", r#"{"name":"test"}"#).await.unwrap(); + assert_eq!(result.trim(), r#"{"name":"test"}"#); + } + + #[tokio::test] + async fn test_jq_field_access() { + let result = run_jq(".name", r#"{"name":"foo","id":42}"#).await.unwrap(); + assert_eq!(result.trim(), r#""foo""#); + } + + #[tokio::test] + async fn test_jq_array_index() { + let result = run_jq(".[1]", r#"["a","b","c"]"#).await.unwrap(); + assert_eq!(result.trim(), r#""b""#); + } + + #[tokio::test] + async fn test_jq_nested() { + let result = run_jq(".user.name", r#"{"user":{"name":"alice"}}"#) + .await + .unwrap(); + assert_eq!(result.trim(), r#""alice""#); + } + + #[tokio::test] + async fn test_jq_keys() { + let result = run_jq("keys", r#"{"b":1,"a":2}"#).await.unwrap(); + assert_eq!(result.trim(), r#"["a","b"]"#); + } + + #[tokio::test] + async fn test_jq_length() { + let result = run_jq("length", r#"[1,2,3,4,5]"#).await.unwrap(); + assert_eq!(result.trim(), "5"); + } +} diff --git a/crates/bashkit/src/builtins/mod.rs b/crates/bashkit/src/builtins/mod.rs index dfbb747e..39dfeba1 100644 --- a/crates/bashkit/src/builtins/mod.rs +++ b/crates/bashkit/src/builtins/mod.rs @@ -1,23 +1,31 @@ //! Built-in shell commands +mod awk; mod cat; mod echo; mod export; mod flow; +mod grep; +mod jq; mod navigation; mod printf; mod read; +mod sed; mod source; mod test; mod vars; +pub use awk::Awk; pub use cat::Cat; pub use echo::Echo; pub use export::Export; pub use flow::{Break, Continue, Exit, False, Return, True}; +pub use grep::Grep; +pub use jq::Jq; pub use navigation::{Cd, Pwd}; pub use printf::Printf; pub use read::Read; +pub use sed::Sed; pub use source::Source; pub use test::{Bracket, Test}; pub use vars::{Local, Set, Shift, Unset}; diff --git a/crates/bashkit/src/builtins/sed.rs b/crates/bashkit/src/builtins/sed.rs new file mode 100644 index 00000000..b7916e25 --- /dev/null +++ b/crates/bashkit/src/builtins/sed.rs @@ -0,0 +1,475 @@ +//! sed - Stream editor builtin +//! +//! Implements basic sed functionality. +//! +//! Usage: +//! sed 's/pattern/replacement/' file +//! sed 's/pattern/replacement/g' file # global replacement +//! sed -i 's/pattern/replacement/' file # in-place edit +//! echo "text" | sed 's/pattern/replacement/' +//! sed -n '2p' file # print line 2 +//! sed '2d' file # delete line 2 +//! sed -e 's/a/b/' -e 's/c/d/' file # multiple commands + +use async_trait::async_trait; +use regex::Regex; + +use super::{Builtin, Context}; +use crate::error::{Error, Result}; +use crate::interpreter::ExecResult; + +/// sed command - stream editor +pub struct Sed; + +#[derive(Debug)] +enum SedCommand { + Substitute { + pattern: Regex, + replacement: String, + global: bool, + print_only: bool, + }, + Delete, + Print, + Quit, +} + +#[derive(Debug, Clone)] +enum Address { + All, + Line(usize), + Range(usize, usize), + Regex(Regex), + Last, +} + +impl Address { + fn matches(&self, line_num: usize, total_lines: usize, line: &str) -> bool { + match self { + Address::All => true, + Address::Line(n) => line_num == *n, + Address::Range(start, end) => line_num >= *start && line_num <= *end, + Address::Regex(re) => re.is_match(line), + Address::Last => line_num == total_lines, + } + } +} + +struct SedOptions { + commands: Vec<(Option
, SedCommand)>, + files: Vec, + in_place: bool, + quiet: bool, +} + +impl SedOptions { + fn parse(args: &[String]) -> Result { + let mut opts = SedOptions { + commands: Vec::new(), + files: Vec::new(), + in_place: false, + quiet: false, + }; + + let mut i = 0; + while i < args.len() { + let arg = &args[i]; + if arg == "-n" { + opts.quiet = true; + } else if arg == "-i" { + opts.in_place = true; + } else if arg == "-e" { + i += 1; + if i < args.len() { + let (addr, cmd) = parse_sed_command(&args[i])?; + opts.commands.push((addr, cmd)); + } + } else if arg.starts_with('-') { + // Unknown option - ignore + } else if opts.commands.is_empty() { + // First non-option is the command + let (addr, cmd) = parse_sed_command(arg)?; + opts.commands.push((addr, cmd)); + } else { + // Rest are files + opts.files.push(arg.clone()); + } + i += 1; + } + + if opts.commands.is_empty() { + return Err(Error::Execution("sed: no command given".to_string())); + } + + Ok(opts) + } +} + +fn parse_address(s: &str) -> Result<(Option
, &str)> { + if s.is_empty() { + return Ok((None, s)); + } + + let first_char = s.chars().next().unwrap(); + + // Line number + if first_char.is_ascii_digit() { + let end = s.find(|c: char| !c.is_ascii_digit()).unwrap_or(s.len()); + let num: usize = s[..end] + .parse() + .map_err(|_| Error::Execution("sed: invalid address".to_string()))?; + let rest = &s[end..]; + + // Check for range + if let Some(rest) = rest.strip_prefix(',') { + if let Some(after_dollar) = rest.strip_prefix('$') { + return Ok((Some(Address::Range(num, usize::MAX)), after_dollar)); + } + let end2 = rest + .find(|c: char| !c.is_ascii_digit()) + .unwrap_or(rest.len()); + if end2 > 0 { + let num2: usize = rest[..end2] + .parse() + .map_err(|_| Error::Execution("sed: invalid address".to_string()))?; + return Ok((Some(Address::Range(num, num2)), &rest[end2..])); + } + return Ok((Some(Address::Line(num)), rest)); + } + + return Ok((Some(Address::Line(num)), rest)); + } + + // Last line + if let Some(after_dollar) = s.strip_prefix('$') { + return Ok((Some(Address::Last), after_dollar)); + } + + // Regex address /pattern/ + if first_char == '/' { + let end = s[1..] + .find('/') + .ok_or_else(|| Error::Execution("sed: unterminated address regex".to_string()))?; + let pattern = &s[1..end + 1]; + let regex = Regex::new(pattern) + .map_err(|e| Error::Execution(format!("sed: invalid regex: {}", e)))?; + return Ok((Some(Address::Regex(regex)), &s[end + 2..])); + } + + Ok((None, s)) +} + +fn parse_sed_command(s: &str) -> Result<(Option
, SedCommand)> { + let (address, rest) = parse_address(s)?; + + if rest.is_empty() { + return Err(Error::Execution("sed: missing command".to_string())); + } + + let first_char = rest.chars().next().unwrap(); + + match first_char { + 's' => { + // Substitution: s/pattern/replacement/flags + if rest.len() < 4 { + return Err(Error::Execution("sed: invalid substitution".to_string())); + } + let delim = rest.chars().nth(1).unwrap(); + + // Find the parts between delimiters + let rest = &rest[2..]; + let mut parts = Vec::new(); + let mut current = String::new(); + let mut escaped = false; + + for c in rest.chars() { + if escaped { + current.push(c); + escaped = false; + } else if c == '\\' { + escaped = true; + current.push(c); + } else if c == delim { + parts.push(current); + current = String::new(); + } else { + current.push(c); + } + } + parts.push(current); + + if parts.len() < 2 { + return Err(Error::Execution("sed: invalid substitution".to_string())); + } + + let pattern = &parts[0]; + let replacement = &parts[1]; + let flags = parts.get(2).map(|s| s.as_str()).unwrap_or(""); + + // Convert POSIX sed regex to Rust regex syntax + // \( \) -> ( ) for capture groups + // \+ -> + for one-or-more + // \? -> ? for zero-or-one + let pattern = pattern + .replace("\\(", "(") + .replace("\\)", ")") + .replace("\\+", "+") + .replace("\\?", "?"); + + let regex = Regex::new(&pattern) + .map_err(|e| Error::Execution(format!("sed: invalid pattern: {}", e)))?; + + // Convert sed replacement syntax to regex replacement syntax + // sed uses \1, \2, etc. and & for full match + // regex crate uses $1, $2, etc. and $0 for full match + let replacement = replacement + .replace("\\&", "\x00") // Temporarily escape literal & + .replace('&', "$0") + .replace("\x00", "&"); + + let replacement = Regex::new(r"\\(\d+)") + .unwrap() + .replace_all(&replacement, "$$$1") + .to_string(); + + Ok(( + address, + SedCommand::Substitute { + pattern: regex, + replacement, + global: flags.contains('g'), + print_only: flags.contains('p'), + }, + )) + } + 'd' => Ok((address.or(Some(Address::All)), SedCommand::Delete)), + 'p' => Ok((address.or(Some(Address::All)), SedCommand::Print)), + 'q' => Ok((address, SedCommand::Quit)), + _ => Err(Error::Execution(format!( + "sed: unknown command: {}", + first_char + ))), + } +} + +#[async_trait] +impl Builtin for Sed { + async fn execute(&self, ctx: Context<'_>) -> Result { + let opts = SedOptions::parse(ctx.args)?; + + // Determine input + let inputs: Vec<(Option, String)> = if opts.files.is_empty() { + vec![(None, ctx.stdin.unwrap_or("").to_string())] + } else { + let mut inputs = Vec::new(); + for file in &opts.files { + let path = if file.starts_with('/') { + std::path::PathBuf::from(file) + } else { + ctx.cwd.join(file) + }; + + match ctx.fs.read_file(&path).await { + Ok(content) => { + let text = String::from_utf8_lossy(&content).into_owned(); + inputs.push((Some(file.clone()), text)); + } + Err(e) => { + return Ok(ExecResult::err(format!("sed: {}: {}", file, e), 1)); + } + } + } + inputs + }; + + let mut output = String::new(); + let mut modified_files: Vec<(String, String)> = Vec::new(); + + for (filename, content) in inputs { + let lines: Vec<&str> = content.lines().collect(); + let total_lines = lines.len(); + let mut file_output = String::new(); + let mut quit = false; + + for (idx, line) in lines.iter().enumerate() { + if quit { + break; + } + + let line_num = idx + 1; + let mut current_line = line.to_string(); + let mut should_print = !opts.quiet; + let mut deleted = false; + let mut extra_print = false; + + for (addr, cmd) in &opts.commands { + let addr_matches = addr + .as_ref() + .map(|a| a.matches(line_num, total_lines, ¤t_line)) + .unwrap_or(true); + + if !addr_matches { + continue; + } + + match cmd { + SedCommand::Substitute { + pattern, + replacement, + global, + print_only, + } => { + let new_line = if *global { + pattern.replace_all(¤t_line, replacement.as_str()) + } else { + pattern.replace(¤t_line, replacement.as_str()) + }; + + if new_line != current_line { + current_line = new_line.into_owned(); + if *print_only { + extra_print = true; + } + } + } + SedCommand::Delete => { + deleted = true; + should_print = false; + } + SedCommand::Print => { + extra_print = true; + } + SedCommand::Quit => { + quit = true; + } + } + } + + if !deleted && should_print { + file_output.push_str(¤t_line); + file_output.push('\n'); + } + + if extra_print { + file_output.push_str(¤t_line); + file_output.push('\n'); + } + } + + if opts.in_place { + if let Some(fname) = filename { + modified_files.push((fname, file_output)); + } + } else { + output.push_str(&file_output); + } + } + + // Write back in-place modifications + for (filename, content) in modified_files { + let path = if filename.starts_with('/') { + std::path::PathBuf::from(&filename) + } else { + ctx.cwd.join(&filename) + }; + + if let Err(e) = ctx.fs.write_file(&path, content.as_bytes()).await { + return Ok(ExecResult::err(format!("sed: {}: {}", filename, e), 1)); + } + } + + Ok(ExecResult::ok(output)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::fs::InMemoryFs; + use std::collections::HashMap; + use std::path::PathBuf; + use std::sync::Arc; + + async fn run_sed(args: &[&str], stdin: Option<&str>) -> Result { + let sed = Sed; + let fs = Arc::new(InMemoryFs::new()); + let mut vars = HashMap::new(); + let mut cwd = PathBuf::from("/"); + let args: Vec = args.iter().map(|s| s.to_string()).collect(); + + let ctx = Context { + args: &args, + env: &HashMap::new(), + variables: &mut vars, + cwd: &mut cwd, + fs, + stdin, + }; + + sed.execute(ctx).await + } + + #[tokio::test] + async fn test_sed_substitute() { + let result = run_sed(&["s/hello/goodbye/"], Some("hello world\nhello again")) + .await + .unwrap(); + assert_eq!(result.stdout, "goodbye world\ngoodbye again\n"); + } + + #[tokio::test] + async fn test_sed_substitute_global() { + let result = run_sed(&["s/o/0/g"], Some("hello world")).await.unwrap(); + assert_eq!(result.stdout, "hell0 w0rld\n"); + } + + #[tokio::test] + async fn test_sed_substitute_first_only() { + let result = run_sed(&["s/o/0/"], Some("hello world")).await.unwrap(); + assert_eq!(result.stdout, "hell0 world\n"); + } + + #[tokio::test] + async fn test_sed_delete_line() { + let result = run_sed(&["2d"], Some("line1\nline2\nline3")).await.unwrap(); + assert_eq!(result.stdout, "line1\nline3\n"); + } + + #[tokio::test] + async fn test_sed_print_line() { + let result = run_sed(&["-n", "2p"], Some("line1\nline2\nline3")) + .await + .unwrap(); + assert_eq!(result.stdout, "line2\n"); + } + + #[tokio::test] + async fn test_sed_regex_groups() { + let result = run_sed(&["s/\\(hello\\) \\(world\\)/\\2 \\1/"], Some("hello world")) + .await + .unwrap(); + assert_eq!(result.stdout, "world hello\n"); + } + + #[tokio::test] + async fn test_sed_ampersand() { + let result = run_sed(&["s/world/[&]/"], Some("hello world")) + .await + .unwrap(); + assert_eq!(result.stdout, "hello [world]\n"); + } + + #[tokio::test] + async fn test_sed_address_range() { + let result = run_sed(&["2,3d"], Some("line1\nline2\nline3\nline4")) + .await + .unwrap(); + assert_eq!(result.stdout, "line1\nline4\n"); + } + + #[tokio::test] + async fn test_sed_last_line() { + let result = run_sed(&["$d"], Some("line1\nline2\nline3")).await.unwrap(); + assert_eq!(result.stdout, "line1\nline2\n"); + } +} diff --git a/crates/bashkit/src/fs/mountable.rs b/crates/bashkit/src/fs/mountable.rs index d9e3200c..14ffc424 100644 --- a/crates/bashkit/src/fs/mountable.rs +++ b/crates/bashkit/src/fs/mountable.rs @@ -280,7 +280,10 @@ mod tests { mfs.mount("/mnt/data", mounted.clone()).unwrap(); // Access through mountable fs - let content = mfs.read_file(Path::new("/mnt/data/data.txt")).await.unwrap(); + let content = mfs + .read_file(Path::new("/mnt/data/data.txt")) + .await + .unwrap(); assert_eq!(content, b"mounted data"); } @@ -330,7 +333,10 @@ mod tests { assert_eq!(content, b"outer"); // Access nested mount - let content = mfs.read_file(Path::new("/mnt/nested/inner.txt")).await.unwrap(); + let content = mfs + .read_file(Path::new("/mnt/nested/inner.txt")) + .await + .unwrap(); assert_eq!(content, b"inner"); } diff --git a/crates/bashkit/src/fs/overlay.rs b/crates/bashkit/src/fs/overlay.rs index 5e030847..44203758 100644 --- a/crates/bashkit/src/fs/overlay.rs +++ b/crates/bashkit/src/fs/overlay.rs @@ -436,7 +436,10 @@ mod tests { let overlay = OverlayFs::new(lower.clone()); // Delete through overlay - overlay.remove(Path::new("/tmp/test.txt"), false).await.unwrap(); + overlay + .remove(Path::new("/tmp/test.txt"), false) + .await + .unwrap(); // Should not be visible through overlay assert!(!overlay.exists(Path::new("/tmp/test.txt")).await.unwrap()); @@ -456,7 +459,10 @@ mod tests { let overlay = OverlayFs::new(lower); // Delete - overlay.remove(Path::new("/tmp/test.txt"), false).await.unwrap(); + overlay + .remove(Path::new("/tmp/test.txt"), false) + .await + .unwrap(); assert!(!overlay.exists(Path::new("/tmp/test.txt")).await.unwrap()); // Recreate diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index 3458fbfe..77bbd4d1 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -76,6 +76,10 @@ impl Interpreter { builtins.insert("local", Box::new(builtins::Local)); builtins.insert("source", Box::new(builtins::Source::new(fs.clone()))); builtins.insert(".", Box::new(builtins::Source::new(fs.clone()))); + builtins.insert("jq", Box::new(builtins::Jq)); + builtins.insert("grep", Box::new(builtins::Grep)); + builtins.insert("sed", Box::new(builtins::Sed)); + builtins.insert("awk", Box::new(builtins::Awk)); Self { fs, diff --git a/crates/bashkit/src/interpreter/state.rs b/crates/bashkit/src/interpreter/state.rs index c20b14d5..2a76ea82 100644 --- a/crates/bashkit/src/interpreter/state.rs +++ b/crates/bashkit/src/interpreter/state.rs @@ -47,6 +47,16 @@ impl ExecResult { } } + /// Create a result with stdout and custom exit code. + pub fn with_code(stdout: impl Into, exit_code: i32) -> Self { + Self { + stdout: stdout.into(), + stderr: String::new(), + exit_code, + control_flow: ControlFlow::None, + } + } + /// Create a result with a control flow signal pub fn with_control_flow(control_flow: ControlFlow) -> Self { Self { diff --git a/crates/bashkit/src/lib.rs b/crates/bashkit/src/lib.rs index 92c4510e..3a8a2e5c 100644 --- a/crates/bashkit/src/lib.rs +++ b/crates/bashkit/src/lib.rs @@ -34,11 +34,11 @@ pub use network::NetworkAllowlist; #[cfg(feature = "network")] pub use network::HttpClient; +use interpreter::Interpreter; +use parser::Parser; use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; -use interpreter::Interpreter; -use parser::Parser; /// Main entry point for BashKit. /// @@ -833,7 +833,9 @@ mod tests { let mut bash = Bash::builder().limits(limits).build(); // Loop that tries to run 10 times - let result = bash.exec("for i in 1 2 3 4 5 6 7 8 9 10; do echo $i; done").await; + let result = bash + .exec("for i in 1 2 3 4 5 6 7 8 9 10; do echo $i; done") + .await; assert!(result.is_err()); let err = result.unwrap_err(); assert!( @@ -849,7 +851,10 @@ mod tests { let mut bash = Bash::builder().limits(limits).build(); // Loop that runs 5 times - should succeed - let result = bash.exec("for i in 1 2 3 4 5; do echo $i; done").await.unwrap(); + let result = bash + .exec("for i in 1 2 3 4 5; do echo $i; done") + .await + .unwrap(); assert_eq!(result.stdout, "1\n2\n3\n4\n5\n"); } diff --git a/crates/bashkit/src/network/allowlist.rs b/crates/bashkit/src/network/allowlist.rs index 9e520439..2e7e51b7 100644 --- a/crates/bashkit/src/network/allowlist.rs +++ b/crates/bashkit/src/network/allowlist.rs @@ -187,10 +187,7 @@ mod tests { #[test] fn test_allow_all() { let allowlist = NetworkAllowlist::allow_all(); - assert_eq!( - allowlist.check("https://example.com"), - UrlMatch::Allowed - ); + assert_eq!(allowlist.check("https://example.com"), UrlMatch::Allowed); assert_eq!( allowlist.check("http://localhost:8080/anything"), UrlMatch::Allowed diff --git a/crates/bashkit/src/network/client.rs b/crates/bashkit/src/network/client.rs index b4fbd856..c2baeb80 100644 --- a/crates/bashkit/src/network/client.rs +++ b/crates/bashkit/src/network/client.rs @@ -167,10 +167,7 @@ mod tests { let result = client.get("https://example.com").await; assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("access denied")); + assert!(result.unwrap_err().to_string().contains("access denied")); } #[tokio::test] @@ -180,10 +177,7 @@ mod tests { let result = client.get("https://blocked.com").await; assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("access denied")); + assert!(result.unwrap_err().to_string().contains("access denied")); } // Note: Integration tests that actually make network requests diff --git a/crates/bashkit/src/parser/lexer.rs b/crates/bashkit/src/parser/lexer.rs index 84e1f4b1..ba5a491c 100644 --- a/crates/bashkit/src/parser/lexer.rs +++ b/crates/bashkit/src/parser/lexer.rs @@ -265,7 +265,8 @@ impl<'a> Lexer<'a> { self.advance(); } - Some(Token::Word(content)) + // Single-quoted strings are literal - no variable expansion + Some(Token::LiteralWord(content)) } fn read_double_quoted_string(&mut self) -> Option { @@ -391,9 +392,10 @@ mod tests { let mut lexer = Lexer::new("echo 'hello world'"); assert_eq!(lexer.next_token(), Some(Token::Word("echo".to_string()))); + // Single-quoted strings return LiteralWord (no variable expansion) assert_eq!( lexer.next_token(), - Some(Token::Word("hello world".to_string())) + Some(Token::LiteralWord("hello world".to_string())) ); assert_eq!(lexer.next_token(), None); } diff --git a/crates/bashkit/src/parser/mod.rs b/crates/bashkit/src/parser/mod.rs index ee1c09cd..bec51140 100644 --- a/crates/bashkit/src/parser/mod.rs +++ b/crates/bashkit/src/parser/mod.rs @@ -259,7 +259,7 @@ impl<'a> Parser<'a> { // Expect variable name let variable = match &self.current_token { - Some(tokens::Token::Word(w)) => w.clone(), + Some(tokens::Token::Word(w)) | Some(tokens::Token::LiteralWord(w)) => w.clone(), _ => { return Err(Error::Parse( "expected variable name in for loop".to_string(), @@ -281,6 +281,12 @@ impl<'a> Parser<'a> { words.push(self.parse_word(w.clone())); self.advance(); } + Some(tokens::Token::LiteralWord(w)) => { + words.push(Word { + parts: vec![WordPart::Literal(w.clone())], + }); + self.advance(); + } Some(tokens::Token::Newline) | Some(tokens::Token::Semicolon) => { self.advance(); break; @@ -701,7 +707,10 @@ impl<'a> Parser<'a> { loop { match &self.current_token { - Some(tokens::Token::Word(w)) => { + Some(tokens::Token::Word(w)) | Some(tokens::Token::LiteralWord(w)) => { + let is_literal = + matches!(&self.current_token, Some(tokens::Token::LiteralWord(_))); + // Stop if this word cannot start a command (like 'then', 'fi', etc.) if words.is_empty() && Self::is_non_command_word(w) { break; @@ -711,8 +720,8 @@ impl<'a> Parser<'a> { break; } - // Check for assignment (only before the command name) - if words.is_empty() { + // Check for assignment (only before the command name, not for literal words) + if words.is_empty() && !is_literal { let w_clone = w.clone(); if let Some((name, index, value)) = Self::is_assignment(&w_clone) { let name = name.to_string(); @@ -741,9 +750,20 @@ impl<'a> Parser<'a> { self.advance(); break; } - Some(tokens::Token::Word(elem)) => { + Some(tokens::Token::Word(elem)) + | Some(tokens::Token::LiteralWord(elem)) => { let elem_clone = elem.clone(); - elements.push(self.parse_word(elem_clone)); + let word = if matches!( + &self.current_token, + Some(tokens::Token::LiteralWord(_)) + ) { + Word { + parts: vec![WordPart::Literal(elem_clone)], + } + } else { + self.parse_word(elem_clone) + }; + elements.push(word); self.advance(); } None => break, @@ -780,7 +800,14 @@ impl<'a> Parser<'a> { } } - words.push(self.parse_word(w.clone())); + let word = if is_literal { + Word { + parts: vec![WordPart::Literal(w.clone())], + } + } else { + self.parse_word(w.clone()) + }; + words.push(word); self.advance(); } Some(tokens::Token::RedirectOut) => { @@ -884,10 +911,50 @@ impl<'a> Parser<'a> { self.advance(); Ok(word) } + Some(tokens::Token::LiteralWord(w)) => { + // Single-quoted: no variable expansion + let word = Word { + parts: vec![WordPart::Literal(w.clone())], + }; + self.advance(); + Ok(word) + } _ => Err(Error::Parse("expected word".to_string())), } } + // Helper methods for word handling - kept for potential future use + #[allow(dead_code)] + /// Convert current word token to Word (handles both Word and LiteralWord) + fn current_word_to_word(&self) -> Option { + match &self.current_token { + Some(tokens::Token::Word(w)) => Some(self.parse_word(w.clone())), + Some(tokens::Token::LiteralWord(w)) => Some(Word { + parts: vec![WordPart::Literal(w.clone())], + }), + _ => None, + } + } + + #[allow(dead_code)] + /// Check if current token is a word (either Word or LiteralWord) + fn is_current_word(&self) -> bool { + matches!( + &self.current_token, + Some(tokens::Token::Word(_)) | Some(tokens::Token::LiteralWord(_)) + ) + } + + #[allow(dead_code)] + /// Get the string content if current token is a word + fn current_word_str(&self) -> Option { + match &self.current_token { + Some(tokens::Token::Word(w)) => Some(w.clone()), + Some(tokens::Token::LiteralWord(w)) => Some(w.clone()), + _ => None, + } + } + /// Parse a word string into a Word with proper parts (variables, literals) fn parse_word(&self, s: String) -> Word { let mut parts = Vec::new(); diff --git a/crates/bashkit/src/parser/tokens.rs b/crates/bashkit/src/parser/tokens.rs index 7add3506..d6f19dbc 100644 --- a/crates/bashkit/src/parser/tokens.rs +++ b/crates/bashkit/src/parser/tokens.rs @@ -7,9 +7,12 @@ /// Token types produced by the lexer. #[derive(Debug, Clone, PartialEq)] pub enum Token { - /// A word (command name, argument, etc.) + /// A word (command name, argument, etc.) - may contain variable expansions Word(String), + /// A literal word (single-quoted) - no variable expansion + LiteralWord(String), + /// Newline character Newline,