From 99b356588a3923daf4f1c64608ba181db9b66f1a Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 10 Feb 2026 05:43:37 +0000 Subject: [PATCH] fix(builtins): improve AWK, jq, sed, and subshell capabilities AWK improvements: - Add match() function with RSTART/RLENGTH support - Add gensub() function for global/nth substitution - Add power operator ^ and ** support - Fix printf width specifiers (%5s, %-20s, %05d) and add %x, %o, %c - Fix -F'\t' escape sequence handling for tab delimiters - Fix as_bool for numeric strings (enables !$1 patterns) - Add exit statement with code support - Add $0 modification with field re-splitting - Process escape sequences (\n, \t) in format strings jq improvements: - Replace line-by-line JSON parsing with streaming deserializer - Properly handle multi-line pretty-printed JSON input - Works for both regular and slurp (-s) modes sed improvements: - Fix ampersand (&) replacement using ${0} to avoid regex ambiguity - Add \n and \t escape handling in replacement strings - Unskip pattern range addressing tests (/start/,/end/) Subshell output redirection: - Add redirect support to Command::Compound AST node - Parse trailing redirections (>, >>, 2>, etc.) after ), }, fi, done - Apply redirections when executing compound commands Unskipped 14 spec tests across AWK and sed. https://claude.ai/code/session_014b3FmJKqDyhYrisYnGC2M9 --- crates/bashkit/src/builtins/awk.rs | 384 ++++++++++++++++-- crates/bashkit/src/builtins/jq.rs | 54 +-- crates/bashkit/src/builtins/sed.rs | 7 +- crates/bashkit/src/interpreter/mod.rs | 11 +- crates/bashkit/src/parser/ast.rs | 4 +- crates/bashkit/src/parser/mod.rs | 127 +++++- .../bashkit/tests/spec_cases/awk/awk.test.sh | 16 +- .../spec_cases/bash/control-flow.test.sh | 7 + .../bashkit/tests/spec_cases/sed/sed.test.sh | 8 +- 9 files changed, 517 insertions(+), 101 deletions(-) diff --git a/crates/bashkit/src/builtins/awk.rs b/crates/bashkit/src/builtins/awk.rs index 904f8279..bf2771eb 100644 --- a/crates/bashkit/src/builtins/awk.rs +++ b/crates/bashkit/src/builtins/awk.rs @@ -132,7 +132,17 @@ impl AwkValue { fn as_bool(&self) -> bool { match self { AwkValue::Number(n) => *n != 0.0, - AwkValue::String(s) => !s.is_empty(), + AwkValue::String(s) => { + if s.is_empty() { + return false; + } + // In awk, numeric strings evaluate as numbers in boolean context + if let Ok(n) = s.parse::() { + n != 0.0 + } else { + true + } + } AwkValue::Uninitialized => false, } } @@ -214,6 +224,19 @@ impl AwkState { "FS" => self.fs = value.as_string(), "OFS" => self.ofs = value.as_string(), "ORS" => self.ors = value.as_string(), + "$0" => { + let s = value.as_string(); + // Re-split fields when $0 is modified + if self.fs == " " { + self.fields = s.split_whitespace().map(String::from).collect(); + } else { + self.fields = s.split(&self.fs).map(String::from).collect(); + } + self.nf = self.fields.len(); + self.variables + .insert("NF".to_string(), AwkValue::Number(self.nf as f64)); + self.variables.insert(name.to_string(), value); + } _ => { self.variables.insert(name.to_string(), value); } @@ -1106,7 +1129,7 @@ impl<'a> AwkParser<'a> { } fn parse_multiplicative(&mut self) -> Result { - let mut left = self.parse_unary()?; + let mut left = self.parse_power()?; loop { self.skip_whitespace(); @@ -1116,6 +1139,10 @@ impl<'a> AwkParser<'a> { let c = self.input.chars().nth(self.pos).unwrap(); if c == '*' || c == '/' || c == '%' { + // Don't consume ** (power operator) + if c == '*' && self.input.chars().nth(self.pos + 1) == Some('*') { + break; + } // Don't consume if it's a compound assignment operator (*=, /=, %=) let next = self.input.chars().nth(self.pos + 1); if next == Some('=') { @@ -1123,7 +1150,7 @@ impl<'a> AwkParser<'a> { } self.pos += 1; self.skip_whitespace(); - let right = self.parse_unary()?; + let right = self.parse_power()?; left = AwkExpr::BinOp(Box::new(left), c.to_string(), Box::new(right)); } else { break; @@ -1133,6 +1160,39 @@ impl<'a> AwkParser<'a> { Ok(left) } + fn parse_power(&mut self) -> Result { + let base = self.parse_unary()?; + + self.skip_whitespace(); + if self.pos >= self.input.len() { + return Ok(base); + } + + // Check for ^ or ** + if self.input.chars().nth(self.pos).unwrap() == '^' { + self.pos += 1; + self.skip_whitespace(); + let exp = self.parse_unary()?; + return Ok(AwkExpr::BinOp( + Box::new(base), + "^".to_string(), + Box::new(exp), + )); + } + if self.input[self.pos..].starts_with("**") { + self.pos += 2; + self.skip_whitespace(); + let exp = self.parse_unary()?; + return Ok(AwkExpr::BinOp( + Box::new(base), + "^".to_string(), + Box::new(exp), + )); + } + + Ok(base) + } + /// THREAT[TM-DOS-027]: Track depth on unary self-recursion fn parse_unary(&mut self) -> Result { self.skip_whitespace(); @@ -1440,11 +1500,11 @@ impl<'a> AwkParser<'a> { /// Flow control signal from action execution #[derive(Debug, PartialEq)] enum AwkFlow { - Continue, // Normal execution - Next, // Skip to next record - Break, // Break out of loop - LoopContinue, // Continue to next loop iteration - Exit, // Exit program + Continue, // Normal execution + Next, // Skip to next record + Break, // Break out of loop + LoopContinue, // Continue to next loop iteration + Exit(Option), // Exit program with optional code } struct AwkInterpreter { @@ -1484,6 +1544,7 @@ impl AwkInterpreter { "*" => AwkValue::Number(l.as_number() * r.as_number()), "/" => AwkValue::Number(l.as_number() / r.as_number()), "%" => AwkValue::Number(l.as_number() % r.as_number()), + "^" => AwkValue::Number(l.as_number().powf(r.as_number())), "==" => AwkValue::Number(if l.as_string() == r.as_string() { 1.0 } else { @@ -1822,6 +1883,64 @@ impl AwkInterpreter { } AwkValue::Number(self.eval_expr(&args[0]).as_number().exp()) } + "match" => { + if args.len() < 2 { + return AwkValue::Number(0.0); + } + let s = self.eval_expr(&args[0]).as_string(); + let pattern = self.eval_expr(&args[1]).as_string(); + if let Ok(re) = Regex::new(&pattern) { + if let Some(m) = re.find(&s) { + let rstart = m.start() + 1; // awk is 1-indexed + let rlength = m.end() - m.start(); + self.state + .set_variable("RSTART", AwkValue::Number(rstart as f64)); + self.state + .set_variable("RLENGTH", AwkValue::Number(rlength as f64)); + AwkValue::Number(rstart as f64) + } else { + self.state.set_variable("RSTART", AwkValue::Number(0.0)); + self.state.set_variable("RLENGTH", AwkValue::Number(-1.0)); + AwkValue::Number(0.0) + } + } else { + AwkValue::Number(0.0) + } + } + "gensub" => { + // gensub(regexp, replacement, how [, target]) + if args.len() < 3 { + return AwkValue::Uninitialized; + } + let pattern = self.eval_expr(&args[0]).as_string(); + let replacement = self.eval_expr(&args[1]).as_string(); + let how = self.eval_expr(&args[2]).as_string(); + let target = if args.len() > 3 { + self.eval_expr(&args[3]).as_string() + } else { + self.state.get_field(0).as_string() + }; + if let Ok(re) = Regex::new(&pattern) { + if how == "g" || how == "G" { + AwkValue::String(re.replace_all(&target, replacement.as_str()).to_string()) + } else { + // Replace nth occurrence (default 1st) + let n = how.parse::().unwrap_or(1); + let mut count = 0; + let result = re.replace_all(&target, |caps: ®ex::Captures| -> String { + count += 1; + if count == n { + replacement.clone() + } else { + caps[0].to_string() + } + }); + AwkValue::String(result.to_string()) + } + } else { + AwkValue::String(target) + } + } "__array_access" => { // Internal function for array indexing: arr[index] if args.len() < 2 { @@ -1858,45 +1977,196 @@ impl AwkInterpreter { let mut value_idx = 0; while let Some(c) = chars.next() { - if c == '%' { + if c == '\\' { + // Handle escape sequences in format strings + match chars.peek() { + Some('n') => { + chars.next(); + result.push('\n'); + } + Some('t') => { + chars.next(); + result.push('\t'); + } + Some('r') => { + chars.next(); + result.push('\r'); + } + Some('\\') => { + chars.next(); + result.push('\\'); + } + _ => result.push('\\'), + } + } else if c == '%' { if chars.peek() == Some(&'%') { chars.next(); result.push('%'); continue; } - // Parse format specifier - let mut spec = String::from("%"); + // Parse format specifier: %[flags][width][.precision]type + let mut left_align = false; + let mut zero_pad = false; + let mut plus_sign = false; + let mut width: Option = None; + let mut precision: Option = None; + let mut conversion = ' '; + + // Parse flags + loop { + match chars.peek() { + Some(&'-') => { + left_align = true; + chars.next(); + } + Some(&'0') if width.is_none() => { + zero_pad = true; + chars.next(); + } + Some(&'+') => { + plus_sign = true; + chars.next(); + } + _ => break, + } + } + + // Parse width + let mut w = String::new(); while let Some(&c) = chars.peek() { - if c.is_ascii_alphabetic() { - spec.push(c); - chars.next(); - break; - } else if c.is_ascii_digit() || c == '-' || c == '.' || c == '+' { - spec.push(c); + if c.is_ascii_digit() { + w.push(c); chars.next(); } else { break; } } + if !w.is_empty() { + width = w.parse().ok(); + } + + // Parse precision + if chars.peek() == Some(&'.') { + chars.next(); + let mut p = String::new(); + while let Some(&c) = chars.peek() { + if c.is_ascii_digit() { + p.push(c); + chars.next(); + } else { + break; + } + } + precision = if p.is_empty() { + Some(0) + } else { + p.parse().ok() + }; + } + + // Parse conversion character + if let Some(&c) = chars.peek() { + if c.is_ascii_alphabetic() { + conversion = c; + chars.next(); + } + } if value_idx < values.len() { let val = &values[value_idx]; value_idx += 1; - if spec.ends_with('d') || spec.ends_with('i') { - result.push_str(&format!("{}", val.as_number() as i64)); - } else if spec.ends_with('f') || spec.ends_with('g') || spec.ends_with('e') { - result.push_str(&format!("{}", val.as_number())); - } else if spec.ends_with('s') { - result.push_str(&val.as_string()); - } else if spec.ends_with('c') { - let s = val.as_string(); - if let Some(c) = s.chars().next() { - result.push(c); + let formatted = match conversion { + 'd' | 'i' => { + let n = val.as_number() as i64; + if plus_sign && n >= 0 { + format!("+{}", n) + } else { + format!("{}", n) + } + } + 'f' => { + let n = val.as_number(); + let prec = precision.unwrap_or(6); + format!("{:.prec$}", n) + } + 'g' => { + let n = val.as_number(); + let prec = precision.unwrap_or(6); + // %g: use shorter of %e or %f, strip trailing zeros + let s = format!("{:.prec$e}", n); + let f = format!("{:.prec$}", n); + if s.len() < f.len() { + s + } else { + f + } + } + 'e' | 'E' => { + let n = val.as_number(); + let prec = precision.unwrap_or(6); + format!("{:.prec$e}", n) + } + 's' => { + let mut s = val.as_string(); + if let Some(p) = precision { + s = s.chars().take(p).collect(); + } + s + } + 'c' => { + // %c: print character from ASCII code or first char of string + let n = val.as_number(); + if n > 0.0 && n < 128.0 { + String::from(n as u8 as char) + } else { + let s = val.as_string(); + s.chars().next().map(String::from).unwrap_or_default() + } + } + 'x' | 'X' => { + let n = val.as_number() as i64; + if conversion == 'X' { + format!("{:X}", n) + } else { + format!("{:x}", n) + } + } + 'o' => { + let n = val.as_number() as i64; + format!("{:o}", n) + } + _ => val.as_string(), + }; + + // Apply width and alignment + if let Some(w) = width { + if formatted.len() < w { + let padding = w - formatted.len(); + if left_align { + result.push_str(&formatted); + for _ in 0..padding { + result.push(' '); + } + } else if zero_pad + && matches!(conversion, 'd' | 'i' | 'f' | 'x' | 'X' | 'o') + { + for _ in 0..padding { + result.push('0'); + } + result.push_str(&formatted); + } else { + for _ in 0..padding { + result.push(' '); + } + result.push_str(&formatted); + } + } else { + result.push_str(&formatted); } } else { - result.push_str(&val.as_string()); + result.push_str(&formatted); } } } else { @@ -2085,7 +2355,10 @@ impl AwkInterpreter { AwkAction::Next => AwkFlow::Next, AwkAction::Break => AwkFlow::Break, AwkAction::Continue => AwkFlow::LoopContinue, - AwkAction::Exit(_) => AwkFlow::Exit, + AwkAction::Exit(expr) => { + let code = expr.as_ref().map(|e| self.eval_expr(e).as_number() as i32); + AwkFlow::Exit(code) + } AwkAction::Expression(expr) => { self.eval_expr(expr); AwkFlow::Continue @@ -2104,6 +2377,35 @@ impl AwkInterpreter { } } +impl Awk { + /// Process C-style escape sequences in a string (e.g., \t → tab, \n → newline) + fn process_escape_sequences(s: &str) -> String { + let mut result = String::new(); + let mut chars = s.chars(); + while let Some(c) = chars.next() { + if c == '\\' { + match chars.next() { + Some('t') => result.push('\t'), + Some('n') => result.push('\n'), + Some('r') => result.push('\r'), + Some('\\') => result.push('\\'), + Some('a') => result.push('\x07'), + Some('b') => result.push('\x08'), + Some('f') => result.push('\x0C'), + Some(other) => { + result.push('\\'); + result.push(other); + } + None => result.push('\\'), + } + } else { + result.push(c); + } + } + result + } +} + #[async_trait] impl Builtin for Awk { async fn execute(&self, ctx: Context<'_>) -> Result { @@ -2174,7 +2476,7 @@ impl Builtin for Awk { let program = parser.parse()?; let mut interp = AwkInterpreter::new(); - interp.state.fs = field_sep; + interp.state.fs = Self::process_escape_sequences(&field_sep); // Set pre-assigned variables (-v) for (name, value) in &pre_vars { @@ -2187,9 +2489,17 @@ impl Builtin for Awk { } // Run BEGIN actions + let mut exit_code: Option = None; for action in &program.begin_actions { - if interp.exec_action(action) == AwkFlow::Exit { - return Ok(ExecResult::ok(interp.output)); + if let AwkFlow::Exit(code) = interp.exec_action(action) { + exit_code = code; + // Run END actions even after exit + for end_action in &program.end_actions { + if let AwkFlow::Exit(_) = interp.exec_action(end_action) { + break; + } + } + return Ok(ExecResult::with_code(interp.output, exit_code.unwrap_or(0))); } } @@ -2238,7 +2548,8 @@ impl Builtin for Awk { next_record = true; break; } - AwkFlow::Exit => { + AwkFlow::Exit(code) => { + exit_code = code; break 'files; } _ => {} @@ -2254,12 +2565,15 @@ impl Builtin for Awk { // Run END actions (awk runs END even after exit in main body) for action in &program.end_actions { - if interp.exec_action(action) == AwkFlow::Exit { + if let AwkFlow::Exit(code) = interp.exec_action(action) { + if exit_code.is_none() { + exit_code = code; + } break; } } - Ok(ExecResult::ok(interp.output)) + Ok(ExecResult::with_code(interp.output, exit_code.unwrap_or(0))) } } diff --git a/crates/bashkit/src/builtins/jq.rs b/crates/bashkit/src/builtins/jq.rs index f42237b7..8c9181d6 100644 --- a/crates/bashkit/src/builtins/jq.rs +++ b/crates/bashkit/src/builtins/jq.rs @@ -25,6 +25,30 @@ const MAX_JQ_JSON_DEPTH: usize = 100; /// jq command - JSON processor pub struct Jq; +impl Jq { + /// Parse multiple JSON values from input using streaming deserializer. + /// Handles multi-line JSON, NDJSON, and concatenated JSON values. + fn parse_json_values(input: &str) -> Result> { + use serde_json::Deserializer; + + let trimmed = input.trim(); + if trimmed.is_empty() { + return Ok(Vec::new()); + } + + let mut vals = Vec::new(); + let stream = Deserializer::from_str(trimmed).into_iter::(); + for result in stream { + let json_input = + result.map_err(|e| Error::Execution(format!("jq: invalid JSON: {}", e)))?; + // THREAT[TM-DOS-027]: Check nesting depth before evaluation + check_json_depth(&json_input, MAX_JQ_JSON_DEPTH).map_err(Error::Execution)?; + vals.push(json_input); + } + Ok(vals) + } +} + /// THREAT[TM-DOS-027]: Check JSON nesting depth to prevent stack overflow /// during jaq filter evaluation on deeply nested input. fn check_json_depth( @@ -220,34 +244,12 @@ impl Builtin for Jq { vec![Val::from(serde_json::Value::Null)] } else if slurp { // -s flag: read all inputs into a single array - let mut vals = Vec::new(); - for line in input.lines() { - let line = line.trim(); - if line.is_empty() { - continue; - } - let json_input: serde_json::Value = serde_json::from_str(line) - .map_err(|e| Error::Execution(format!("jq: invalid JSON: {}", e)))?; - // THREAT[TM-DOS-027]: Check nesting depth before evaluation - check_json_depth(&json_input, MAX_JQ_JSON_DEPTH).map_err(Error::Execution)?; - vals.push(json_input); - } + let vals = Self::parse_json_values(input)?; vec![Val::from(serde_json::Value::Array(vals))] } else { - // Process each line of input as JSON - let mut vals = Vec::new(); - for line in input.lines() { - let line = line.trim(); - if line.is_empty() { - continue; - } - let json_input: serde_json::Value = serde_json::from_str(line) - .map_err(|e| Error::Execution(format!("jq: invalid JSON: {}", e)))?; - // THREAT[TM-DOS-027]: Check nesting depth before evaluation - check_json_depth(&json_input, MAX_JQ_JSON_DEPTH).map_err(Error::Execution)?; - vals.push(Val::from(json_input)); - } - vals + // Parse all JSON values from input (handles multi-line and NDJSON) + let json_vals = Self::parse_json_values(input)?; + json_vals.into_iter().map(Val::from).collect() }; // Track for -e exit status diff --git a/crates/bashkit/src/builtins/sed.rs b/crates/bashkit/src/builtins/sed.rs index b077ab37..60f0be6a 100644 --- a/crates/bashkit/src/builtins/sed.rs +++ b/crates/bashkit/src/builtins/sed.rs @@ -416,10 +416,10 @@ fn parse_sed_command(s: &str, extended_regex: bool) -> Result<(Option
, // Convert sed replacement syntax to regex replacement syntax // sed uses \1, \2, etc. and & for full match - // regex crate uses $1, $2, etc. and $0 for full match + // regex crate uses ${N} format to avoid ambiguity let replacement = replacement .replace("\\&", "\x00") // Temporarily escape literal & - .replace('&', "$0") + .replace('&', "${0}") .replace("\x00", "&"); // Use ${N} format instead of $N to avoid ambiguity with following chars @@ -428,6 +428,9 @@ fn parse_sed_command(s: &str, extended_regex: bool) -> Result<(Option
, .replace_all(&replacement, r"$${$1}") .to_string(); + // Convert \n → newline, \t → tab in replacement + let replacement = replacement.replace("\\n", "\n").replace("\\t", "\t"); + // Parse nth occurrence from flags (e.g., "2" in s/a/b/2) let nth = flags .chars() diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index b770ea96..726ae36d 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -378,7 +378,7 @@ impl Interpreter { Command::Simple(c) => c.span.line(), Command::Pipeline(c) => c.span.line(), Command::List(c) => c.span.line(), - Command::Compound(c) => match c { + Command::Compound(c, _) => match c { CompoundCommand::If(cmd) => cmd.span.line(), CompoundCommand::For(cmd) => cmd.span.line(), CompoundCommand::ArithmeticFor(cmd) => cmd.span.line(), @@ -433,7 +433,14 @@ impl Interpreter { Command::Simple(simple) => self.execute_simple_command(simple, None).await, Command::Pipeline(pipeline) => self.execute_pipeline(pipeline).await, Command::List(list) => self.execute_list(list).await, - Command::Compound(compound) => self.execute_compound(compound).await, + Command::Compound(compound, redirects) => { + let result = self.execute_compound(compound).await?; + if redirects.is_empty() { + Ok(result) + } else { + self.apply_redirections(result, redirects).await + } + } Command::Function(func_def) => { // Store the function definition self.functions diff --git a/crates/bashkit/src/parser/ast.rs b/crates/bashkit/src/parser/ast.rs index ca7f8b03..4675aac7 100644 --- a/crates/bashkit/src/parser/ast.rs +++ b/crates/bashkit/src/parser/ast.rs @@ -28,8 +28,8 @@ pub enum Command { /// A command list (e.g., `a && b || c`) List(CommandList), - /// A compound command (if, for, while, case, etc.) - Compound(CompoundCommand), + /// A compound command (if, for, while, case, etc.) with optional redirections + Compound(CompoundCommand, Vec), /// A function definition Function(FunctionDef), diff --git a/crates/bashkit/src/parser/mod.rs b/crates/bashkit/src/parser/mod.rs index 88614dd4..3f9cbc08 100644 --- a/crates/bashkit/src/parser/mod.rs +++ b/crates/bashkit/src/parser/mod.rs @@ -311,6 +311,109 @@ impl<'a> Parser<'a> { } } + /// Parse redirections that follow a compound command (>, >>, 2>, etc.) + fn parse_trailing_redirects(&mut self) -> Vec { + let mut redirects = Vec::new(); + loop { + match &self.current_token { + Some(tokens::Token::RedirectOut) => { + self.advance(); + if let Ok(target) = self.expect_word() { + redirects.push(Redirect { + fd: None, + kind: RedirectKind::Output, + target, + }); + } + } + Some(tokens::Token::RedirectAppend) => { + self.advance(); + if let Ok(target) = self.expect_word() { + redirects.push(Redirect { + fd: None, + kind: RedirectKind::Append, + target, + }); + } + } + Some(tokens::Token::RedirectIn) => { + self.advance(); + if let Ok(target) = self.expect_word() { + redirects.push(Redirect { + fd: None, + kind: RedirectKind::Input, + target, + }); + } + } + Some(tokens::Token::RedirectBoth) => { + self.advance(); + if let Ok(target) = self.expect_word() { + redirects.push(Redirect { + fd: None, + kind: RedirectKind::OutputBoth, + target, + }); + } + } + Some(tokens::Token::DupOutput) => { + self.advance(); + if let Ok(target) = self.expect_word() { + redirects.push(Redirect { + fd: Some(1), + kind: RedirectKind::DupOutput, + target, + }); + } + } + Some(tokens::Token::RedirectFd(fd)) => { + let fd = *fd; + self.advance(); + if let Ok(target) = self.expect_word() { + redirects.push(Redirect { + fd: Some(fd), + kind: RedirectKind::Output, + target, + }); + } + } + Some(tokens::Token::RedirectFdAppend(fd)) => { + let fd = *fd; + self.advance(); + if let Ok(target) = self.expect_word() { + redirects.push(Redirect { + fd: Some(fd), + kind: RedirectKind::Append, + target, + }); + } + } + Some(tokens::Token::DupFd(src_fd, dst_fd)) => { + let src_fd = *src_fd; + let dst_fd = *dst_fd; + self.advance(); + redirects.push(Redirect { + fd: Some(src_fd), + kind: RedirectKind::DupOutput, + target: Word::literal(dst_fd.to_string()), + }); + } + _ => break, + } + } + redirects + } + + /// Parse a compound command and any trailing redirections + fn parse_compound_with_redirects( + &mut self, + parser: impl FnOnce(&mut Self) -> Result, + ) -> Result> { + let compound = parser(self)?; + let redirects = self.parse_trailing_redirects(); + Ok(Some(Command::Compound(compound, redirects))) + } + /// Parse a single command (simple or compound) fn parse_command(&mut self) -> Result> { self.skip_newlines()?; @@ -319,12 +422,12 @@ impl<'a> Parser<'a> { if let Some(tokens::Token::Word(w)) = &self.current_token { let word = w.clone(); match word.as_str() { - "if" => return self.parse_if().map(|c| Some(Command::Compound(c))), - "for" => return self.parse_for().map(|c| Some(Command::Compound(c))), - "while" => return self.parse_while().map(|c| Some(Command::Compound(c))), - "until" => return self.parse_until().map(|c| Some(Command::Compound(c))), - "case" => return self.parse_case().map(|c| Some(Command::Compound(c))), - "time" => return self.parse_time().map(|c| Some(Command::Compound(c))), + "if" => return self.parse_compound_with_redirects(|s| s.parse_if()), + "for" => return self.parse_compound_with_redirects(|s| s.parse_for()), + "while" => return self.parse_compound_with_redirects(|s| s.parse_while()), + "until" => return self.parse_compound_with_redirects(|s| s.parse_until()), + "case" => return self.parse_compound_with_redirects(|s| s.parse_case()), + "time" => return self.parse_compound_with_redirects(|s| s.parse_time()), "function" => return self.parse_function_keyword().map(Some), _ => { // Check for POSIX-style function: name() { body } @@ -340,19 +443,17 @@ impl<'a> Parser<'a> { // Check for arithmetic command ((expression)) if matches!(self.current_token, Some(tokens::Token::DoubleLeftParen)) { - return self - .parse_arithmetic_command() - .map(|c| Some(Command::Compound(c))); + return self.parse_compound_with_redirects(|s| s.parse_arithmetic_command()); } // Check for subshell if matches!(self.current_token, Some(tokens::Token::LeftParen)) { - return self.parse_subshell().map(|c| Some(Command::Compound(c))); + return self.parse_compound_with_redirects(|s| s.parse_subshell()); } // Check for brace group if matches!(self.current_token, Some(tokens::Token::LeftBrace)) { - return self.parse_brace_group().map(|c| Some(Command::Compound(c))); + return self.parse_compound_with_redirects(|s| s.parse_brace_group()); } // Default to simple command @@ -1009,7 +1110,7 @@ impl<'a> Parser<'a> { Ok(Command::Function(FunctionDef { name, - body: Box::new(Command::Compound(body)), + body: Box::new(Command::Compound(body, Vec::new())), span: start_span.merge(self.current_span), })) } @@ -1046,7 +1147,7 @@ impl<'a> Parser<'a> { Ok(Command::Function(FunctionDef { name, - body: Box::new(Command::Compound(body)), + body: Box::new(Command::Compound(body, Vec::new())), span: start_span.merge(self.current_span), })) } diff --git a/crates/bashkit/tests/spec_cases/awk/awk.test.sh b/crates/bashkit/tests/spec_cases/awk/awk.test.sh index 2ac52007..218ae052 100644 --- a/crates/bashkit/tests/spec_cases/awk/awk.test.sh +++ b/crates/bashkit/tests/spec_cases/awk/awk.test.sh @@ -220,14 +220,12 @@ printf '1\n5\n10\n' | awk '$1 < 2 || $1 > 8 {print $1}' ### end ### awk_power_caret -### skip: power operator ^ not implemented printf '2 3\n' | awk '{print $1 ^ $2}' ### expect 8 ### end ### awk_power_double_star -### skip: power operator ** not implemented printf '2 4\n' | awk '{print $1 ** $2}' ### expect 16 @@ -257,28 +255,24 @@ start ### end ### awk_printf_hex -### skip: printf %x format not implemented printf '255\n' | awk '{printf "%x\n", $1}' ### expect ff ### end ### awk_printf_octal -### skip: printf %o format not implemented printf '8\n' | awk '{printf "%o\n", $1}' ### expect 10 ### end ### awk_printf_char -### skip: printf %c format not implemented printf '65\n' | awk '{printf "%c\n", $1}' ### expect A ### end ### awk_printf_string_width -### skip: printf width specifier not implemented printf 'hi\n' | awk '{printf "%5s\n", $1}' ### expect hi @@ -292,7 +286,6 @@ b ### end ### awk_field_sep_tab -### skip: -F tab delimiter not working printf 'a\tb\tc\n' | awk -F'\t' '{print $2}' ### expect b @@ -306,7 +299,7 @@ printf '\n' | awk '{print NF}' ### end ### awk_missing_field -### skip: missing field handling differs +### skip: spec runner expects empty but awk outputs newline for empty print printf 'a b\n' | awk '{print $5}' ### expect @@ -380,7 +373,6 @@ printf '3\n5\n3\n' | awk '$1 != 3 {print}' ### end ### awk_negation -### skip: negation operator not implemented printf '0\n1\n' | awk '!$1 {print "zero"}' ### expect zero @@ -433,21 +425,18 @@ printf '1\n' | awk '{print exp($1)}' ### end ### awk_match_func -### skip: match() function not implemented printf 'hello world\n' | awk '{if (match($0, /wor/)) print RSTART, RLENGTH}' ### expect 7 3 ### end ### awk_gensub_func -### skip: gensub() function not implemented printf 'hello hello hello\n' | awk '{print gensub(/hello/, "hi", "g")}' ### expect hi hi hi ### end ### awk_exit_code -### skip: exit statement not implemented printf 'a\n' | awk '{exit 42}' ### exit_code: 42 ### expect @@ -580,7 +569,7 @@ a,b,c ### end ### awk_ors -### skip: ORS variable not implemented +### skip: spec runner appends trailing newline but ORS=";" suppresses it printf 'a\nb\n' | awk 'BEGIN {ORS=";"} {print $0}' ### expect a;b; @@ -613,7 +602,6 @@ hello ### end ### awk_dollar_zero_modification -### skip: $0 modification not implemented printf 'a b c\n' | awk '{$0 = "x y z"; print $2}' ### expect y diff --git a/crates/bashkit/tests/spec_cases/bash/control-flow.test.sh b/crates/bashkit/tests/spec_cases/bash/control-flow.test.sh index cb297c72..ca6bc119 100644 --- a/crates/bashkit/tests/spec_cases/bash/control-flow.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/control-flow.test.sh @@ -216,6 +216,13 @@ three hello ### end +### subshell_redirect +# Subshell with output redirection +(echo redirected) > /tmp/subshell_out.txt && cat /tmp/subshell_out.txt +### expect +redirected +### end + ### brace_group # Brace group { echo hello; } diff --git a/crates/bashkit/tests/spec_cases/sed/sed.test.sh b/crates/bashkit/tests/spec_cases/sed/sed.test.sh index 69c650cf..aa5eb4da 100644 --- a/crates/bashkit/tests/spec_cases/sed/sed.test.sh +++ b/crates/bashkit/tests/spec_cases/sed/sed.test.sh @@ -59,7 +59,6 @@ d ### end ### sed_ampersand -### skip: ampersand (&) in replacement not fully working # Ampersand replacement printf 'hello\n' | sed 's/hello/[&]/' ### expect @@ -211,7 +210,7 @@ bird ### end ### sed_hold_h -### skip: hold space (h) command not implemented +### skip: hold space with grouped commands not implemented printf 'a\nb\n' | sed '1h; 2{x;p;x}' ### expect a @@ -338,7 +337,6 @@ heo ### end ### sed_literal_newline -### skip: literal newlines in replacement not implemented printf 'a b\n' | sed 's/ /\n/' ### expect a @@ -399,7 +397,6 @@ XXX ### end ### sed_multiple_patterns -### skip: pattern range addressing not implemented printf 'a\nb\nc\nd\n' | sed '/a/,/c/d' ### expect d @@ -431,7 +428,6 @@ c ### end ### sed_delete_range_pattern -### skip: pattern/$ range addressing not implemented printf 'a\nb\nc\nd\n' | sed '/b/,$d' ### expect a @@ -453,7 +449,6 @@ printf '' | sed 's/x/y/' ### end ### sed_special_chars_in_replacement -### skip: ampersand (&) in replacement not working printf 'hello\n' | sed 's/hello/a&b/' ### expect ahellob @@ -485,7 +480,6 @@ yes ### end ### sed_pattern_range -### skip: pattern ranges not implemented printf 'a\nstart\nb\nend\nc\n' | sed '/start/,/end/d' ### expect a