From 5c6231d9a5fd0df07caa33c7e0808a186cb3b308 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Fri, 3 Jan 2025 11:25:48 -0800 Subject: [PATCH 01/11] ilex: Factor out a proc2decl crate for generating proc macros --- Cargo.toml | 2 +- ilex/attr/Cargo.toml | 3 + ilex/attr/lib.rs | 152 +++++++-------- ilex/src/spec.rs | 1 + proc2decl/Cargo.toml | 14 ++ proc2decl/src/lib.rs | 434 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 518 insertions(+), 88 deletions(-) create mode 100644 proc2decl/Cargo.toml create mode 100644 proc2decl/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 79b6d48..472497d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["byteyarn", "buf-trait", "ilex", "ilex/attr", "twie"] +members = ["byteyarn", "buf-trait", "ilex", "ilex/attr", "twie", "proc2decl"] resolver = "2" [workspace.package] diff --git a/ilex/attr/Cargo.toml b/ilex/attr/Cargo.toml index f178e94..959f80d 100644 --- a/ilex/attr/Cargo.toml +++ b/ilex/attr/Cargo.toml @@ -13,3 +13,6 @@ license.workspace = true [lib] path = "lib.rs" proc-macro = true + +[dependencies] +proc2decl = { path = "../../proc2decl" } \ No newline at end of file diff --git a/ilex/attr/lib.rs b/ilex/attr/lib.rs index 97cd792..dc9d55d 100644 --- a/ilex/attr/lib.rs +++ b/ilex/attr/lib.rs @@ -1,93 +1,6 @@ //! Implementation detail of `ilex`. -use proc_macro::Delimiter; -use proc_macro::Group; -use proc_macro::Ident; -use proc_macro::Punct; -use proc_macro::Spacing; -use proc_macro::Span; use proc_macro::TokenStream; -use proc_macro::TokenTree; - -/// Generates a lexer spec struct. -/// -/// This macro generates the type of struct described in the -/// [crate documentation][crate]. The syntax is as follows. -/// -/// ```ignore -/// use ilex::rule::Keyword; -/// use ilex::Lexeme; -/// -/// /// My cool spec. -/// #[ilex::spec] -/// struct MySpec { -/// #[named("...")] -/// #[rule(/* ... */)] -/// dollar: Lexeme = "$", -/// } -/// ``` -/// -/// The type of each field must be a [`Lexeme`] with a [`Rule`] type as its -/// parameter. There are two special attributes that can follow. -/// -/// - `#[named]` makes the rule into a *named* rule. This name can be used by -/// diagnostics, and corresponds to calling `Spec::named_rule()`. -/// -/// - `#[rule]` is the value to use to construct the rule, which must be -/// `Into`, where `R` is the type inside `Lexeme` (so, above, the rule -/// value must be `Into`). By default, this value is the name of the -/// rule, to make the common case of declaring a keyword as simple as writing -/// `nullptr: Lexeme`, assuming Rust itself doesn't already use that -/// keyword. -/// -/// Note that *order matters* for the fields: when breaking a tie between two -/// potential tokens of the same length, the first one in the struct will win. -/// In practice, this means you should put keywords before identifiers. -/// -/// Additionally, the following functions will be defined for the `MySpec` type. -/// -/// ``` -/// # struct Spec; -/// # struct MySpec; -/// # fn norun(_: i32) { -/// impl MySpec { -/// /// Gets the global instance of this spec. -/// pub fn get() -> &'static Self { -/// // ... -/// # todo!() -/// } -/// -/// /// Gets the actual compiled spec. -/// pub fn spec(&self) -> &Spec { -/// // ... -/// # todo!() -/// } -/// } -/// # } -/// ``` -/// -// God cross-trait links suck. -/// [`Lexeme`]: https://docs.rs/ilex/latest/ilex/struct.Lexeme.html -/// [`Rule`]: https://docs.rs/ilex/latest/ilex/rule/trait.Rule.html -/// [crate]: https://docs.rs/ilex -#[proc_macro_attribute] -pub fn spec(_attr: TokenStream, item: TokenStream) -> TokenStream { - // This is implemented as a decl macro, because that's easier to - // understand and debug than proc macros. I hate proc macros so much. - let span = Span::call_site(); - let macro_call: [TokenTree; 8] = [ - Punct::new(':', Spacing::Joint).into(), - Punct::new(':', Spacing::Alone).into(), - Ident::new("ilex", span).into(), - Punct::new(':', Spacing::Joint).into(), - Punct::new(':', Spacing::Alone).into(), - Ident::new("__spec__", span).into(), - Punct::new('!', Spacing::Alone).into(), - Group::new(Delimiter::Brace, item).into(), - ]; - - macro_call.into_iter().collect() -} // This helper exists only to make the #[spec] field attributes inert. #[doc(hidden)] @@ -95,3 +8,68 @@ pub fn spec(_attr: TokenStream, item: TokenStream) -> TokenStream { pub fn derive(_: TokenStream) -> TokenStream { TokenStream::new() } + +proc2decl::bridge! { + /// Generates a lexer spec struct. + /// + /// This macro generates the type of struct described in the + /// [crate documentation][crate]. The syntax is as follows. + /// + /// ```ignore + /// use ilex::rule::Keyword; + /// use ilex::Lexeme; + /// + /// /// My cool spec. + /// #[ilex::spec] + /// struct MySpec { + /// #[named("...")] + /// #[rule(/* ... */)] + /// dollar: Lexeme = "$", + /// } + /// ``` + /// + /// The type of each field must be a [`Lexeme`] with a [`Rule`] type as its + /// parameter. There are two special attributes that can follow. + /// + /// - `#[named]` makes the rule into a *named* rule. This name can be used by + /// diagnostics, and corresponds to calling `Spec::named_rule()`. + /// + /// - `#[rule]` is the value to use to construct the rule, which must be + /// `Into`, where `R` is the type inside `Lexeme` (so, above, the rule + /// value must be `Into`). By default, this value is the name of the + /// rule, to make the common case of declaring a keyword as simple as writing + /// `nullptr: Lexeme`, assuming Rust itself doesn't already use that + /// keyword. + /// + /// Note that *order matters* for the fields: when breaking a tie between two + /// potential tokens of the same length, the first one in the struct will win. + /// In practice, this means you should put keywords before identifiers. + /// + /// Additionally, the following functions will be defined for the `MySpec` type. + /// + /// ``` + /// # struct Spec; + /// # struct MySpec; + /// # fn norun(_: i32) { + /// impl MySpec { + /// /// Gets the global instance of this spec. + /// pub fn get() -> &'static Self { + /// // ... + /// # todo!() + /// } + /// + /// /// Gets the actual compiled spec. + /// pub fn spec(&self) -> &Spec { + /// // ... + /// # todo!() + /// } + /// } + /// # } + /// ``` + /// + // God cross-trait links suck. + /// [`Lexeme`]: https://docs.rs/ilex/latest/ilex/struct.Lexeme.html + /// [`Rule`]: https://docs.rs/ilex/latest/ilex/rule/trait.Rule.html + /// [crate]: https://docs.rs/ilex + macro #[spec] => ilex::__spec__; +} diff --git a/ilex/src/spec.rs b/ilex/src/spec.rs index 9139843..ea49840 100644 --- a/ilex/src/spec.rs +++ b/ilex/src/spec.rs @@ -297,6 +297,7 @@ impl Lexeme { #[macro_export] macro_rules! __spec__ { ( + #[spec] $(#[$meta:meta])* $vis:vis struct $name:ident {$( $(#[$($fmeta:tt)*])* diff --git a/proc2decl/Cargo.toml b/proc2decl/Cargo.toml new file mode 100644 index 0000000..3a14d56 --- /dev/null +++ b/proc2decl/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "proc2decl" +version = "0.1.0" +edition.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true +keywords.workspace = true +license.workspace = true + +[dependencies] +nu-glob = "0.101.0" +unicode-xid = "0.2.6" +walkdir = "2.5.0" diff --git a/proc2decl/src/lib.rs b/proc2decl/src/lib.rs new file mode 100644 index 0000000..4940df5 --- /dev/null +++ b/proc2decl/src/lib.rs @@ -0,0 +1,434 @@ +//! `proc2decl` exists for one reason only: because proc macros are a toxic +//! ecosystem. +//! +//! Sometimes, you want to use an attribute to define a macro. Unfortunately, +//! Rust does not support declarative macros (also called macros-by-example) +//! for attributes, for reasons that essentially boil down to cookie-licking. +//! +//! This crate exists for one purpose only, and that is ot facilitate writing +//! declarative macros that an attribute converts into. +//! +//! # How Uo Use +//! +//! 1. Define the macro-by-example you wish to use as the main implementation of +//! your attribute or derive. +//! +//! 2. Crate a proc-macro crate. This is where the documentation for your +//! attribute will need to live. Your actual crate should depend on this +//! crate. +//! +//! 3. Use [`bridge!()`] to define your bridge proc macros. These +//! macro calls should be documented, since their doc comments are the ones +//! that will appear in rustdoc for your macros. +//! +//! 4. `pub use` the macros in your actual crate. +//! +//! Proc macros suck! + +pub extern crate proc_macro; + +use std::collections::HashMap; +use std::env; +use std::ffi::OsStr; +use std::fs; +use std::path::Component; +use std::path::PathBuf; +use std::sync::atomic::AtomicU64; +use std::sync::atomic::Ordering; + +use nu_glob::Pattern; +use proc_macro::Delimiter; +use proc_macro::Group; +use proc_macro::Ident; +use proc_macro::Literal; +use proc_macro::Punct; +use proc_macro::Spacing; +use proc_macro::Span; +use proc_macro::TokenStream; +use proc_macro::TokenTree; +use walkdir::WalkDir; + +/// Defines a new attribute or derive proc macro that forwards to the given +/// function-like macro. +/// +/// # Attribute Macros +/// +/// The tokens passed to `$macro!()` will be `#[$name(...)]` containing the +/// arguments of the attribute, followed by the item passed to the macro by +/// rustc. Like all other attribute macros, it will replace the annotated +/// item with the result of evaluating the macro, in this case a call to the +/// actual macro-by-example that implements it. +/// +/// ```ignore +/// macro_rules! __impl { +/// (#[my_macro] const $name:ident: $ty:ty = $expr:expr;) => {/* ... */} +/// } +/// +/// proc2decl::bridge! { +/// // My cool macro. +/// macro #[my_macro] => my_crate::__impl; +/// } +/// ``` +/// +/// # Derive Macros +/// +/// The tokens passed to `$macro!()` will be the item passed to the macro by +/// rustc. Like all other derive macros, it will insert the result of evaluating +/// the macro immediately after the annotated item, in this case a call to the +/// actual macro-by-example that implements it. +/// +/// The `$attrs` are the names of inert helper attributes to define for +/// the derive. +/// +/// ```ignore +/// macro_rules! __impl { +/// (struct $name:ident {}) => {/* ... */} +/// } +/// +/// proc2decl::bridge! { +/// // My cool macro. +/// macro #[derive(MyMacro)], #[helper] => my_crate::__impl; +/// } +/// ``` +#[macro_export] +macro_rules! bridge { + ( + $(#[$attr:meta])* + macro #[$name:ident] => $crate_:ident::$macro:ident; + ) => { + $(#[$attr])* + #[proc_macro_attribute] + pub fn $name( + attr: $crate::proc_macro::TokenStream, + item: $crate::proc_macro::TokenStream, + ) -> $crate::proc_macro::TokenStream { + use $crate::proc_macro::*; + let span = Span::call_site(); + + $crate::attr_bridge( + stringify!($name), + stringify!($crate_), + stringify!($macro), + span, + attr, + item, + ) + } + }; + + ( + $(#[$attr:meta])* + macro #[derive($name:ident)] $(, #[$attrs:ident])* => $crate_:ident::$macro:ident + ) => { + $(#[$attr])* + #[proc_macro_derive($name, attributes($($attrs,)*))] + pub fn $name( + item: $crate::proc_macro::TokenStream, + ) -> $crate::proc_macro::TokenStream { + use $crate::proc_macro::*; + let span = Span::call_site(); + + $crate::derive_bridge( + stringify!($name), + stringify!($crate_), + stringify!($macro), + span, + item, + ) + } + }; +} + +/// Defines a new attribute proc macro that finds files matching a glob and +/// forwards the directory structure to the given function-like macro, in such +/// a way that a corresponding module structure can be defined using the +/// directory structure. +/// +/// The resulting attribute should be called as #[my_attr("glob", ...)], where +/// `glob` is a glob relative to the root of the crate the attribute appears in. +/// The glob will not match files across symlinks. +/// +/// The expanded-to macro will be called with the annotated item, followed by +/// token trees in the following form: +/// +/// ```ignore +/// foo { +/// bar { +/// baz("foo/bar/baz.txt", b"contents") +/// empty("foo/bar/empty.txt", b"contents") +/// } +/// bar2 { +/// boing("foo/bar2/boing.txt", b"contents") +/// } +/// } +/// ``` +/// +/// Any directories whose names contain identifiers that are not valid Rust +/// identifiers will be ignored. +#[macro_export] +macro_rules! fs_bridge { + ( + $(#[$attr:meta])* + macro #[$name:ident] => $crate_:ident::$macro:ident; + ) => { + $(#[$attr])* + #[proc_macro_attribute] + pub fn $name( + attr: $crate::proc_macro::TokenStream, + item: $crate::proc_macro::TokenStream, + ) -> $crate::proc_macro::TokenStream { + use $crate::proc_macro::*; + let span = Span::call_site(); + + $crate::dir_bridge( + stringify!($name), + stringify!($crate_), + stringify!($macro), + span, + attr, + item, + ) + } + }; +} + +static COUNTER: AtomicU64 = AtomicU64::new(0); + +#[doc(hidden)] +pub fn derive_bridge( + _name: &str, + crate_: &str, + macro_: &str, + span: Span, + item: TokenStream, +) -> TokenStream { + let extern_ = + format!("__extern{}_{}__", COUNTER.fetch_add(1, Ordering::Relaxed), crate_); + + stream([ + // extern crate $crate as __extern_$crate__; + Ident::new("extern", span).into(), + Ident::new("crate", span).into(), + Ident::new(crate_, span).into(), + Ident::new("as", span).into(), + Ident::new(&extern_, span).into(), + Punct::new(';', Spacing::Alone).into(), + // __extern_$crate__::$macro! { attr item } + Ident::new(&extern_, span).into(), + Punct::new(':', Spacing::Joint).into(), + Punct::new(':', Spacing::Alone).into(), + Ident::new(macro_, span).into(), + Punct::new('!', Spacing::Alone).into(), + Group::new(Delimiter::Brace, item).into(), + ]) +} + +#[doc(hidden)] +pub fn attr_bridge( + name: &str, + crate_: &str, + macro_: &str, + span: Span, + args: TokenStream, + mut item: TokenStream, +) -> TokenStream { + if !args.is_empty() { + item = stream2( + [ + // #[name(args)] + Punct::new('#', Spacing::Alone).into(), + Group::new( + Delimiter::Bracket, + stream([ + Ident::new(name, span).into(), + Group::new(Delimiter::Parenthesis, args).into(), + ]), + ) + .into(), + ], + item, + ); + } else { + item = stream2( + [ + // #[name] + Punct::new('#', Spacing::Alone).into(), + Group::new(Delimiter::Bracket, stream([Ident::new(name, span).into()])) + .into(), + ], + item, + ); + } + + derive_bridge(name, crate_, macro_, span, item) +} + +#[doc(hidden)] +pub fn dir_bridge( + name: &str, + crate_: &str, + macro_: &str, + span: Span, + args: TokenStream, + item: TokenStream, +) -> TokenStream { + let Some(TokenTree::Literal(lit)) = args.clone().into_iter().next() else { + panic!("#[{crate_}::{name}] requires a glob as its first argument"); + }; + + // TODO(mcyoung): support all Rust string literals. + let lit = lit.to_string(); + if !lit.starts_with('"') || !lit.starts_with('"') || lit.contains('\\') { + panic!("#[{crate_}::{name}] only supports single-quoted string literals without escapes"); + } + let glob = match Pattern::new(&lit[1..lit.len() - 1]) { + Ok(p) => p, + Err(e) => { + panic!("#[{crate_}::{name}] requires a glob as its first argument: {e}") + } + }; + + struct File { + path: String, + components: Vec, + contents: Vec, + } + + let mut names = Vec::new(); + let mut table = HashMap::new(); + let mut push_name = |name: &OsStr| -> Option { + let utf8 = name.to_str()?; + if !is_valid_ident(utf8) { + return None; + } + + Some(*table.entry(utf8.to_string()).or_insert_with_key(|k| { + let n = names.len(); + names.push(k.clone()); + n + })) + }; + + let mut files = Vec::new(); + let root = PathBuf::from(env::var_os("CARGO_MANIFEST_DIR").unwrap()); + 'walk: for entry in WalkDir::new(&root) { + let entry = match entry { + Ok(p) => p, + Err(e) => panic!("directory walk failed: {e}"), + }; + + let path = entry.path(); + if path.is_dir() { + continue 'walk; + } + + let rel = path.strip_prefix(&root).unwrap(); + if !glob.matches_path(rel) { + continue 'walk; + } + + let mut components = Vec::new(); + if let Some(parent) = rel.parent() { + for component in parent.components() { + let Component::Normal(component) = component else { + continue 'walk; + }; + let Some(name) = push_name(component) else { + continue 'walk; + }; + components.push(name); + } + } + + let Some(name) = push_name(path.file_stem().unwrap()) else { + continue 'walk; + }; + components.push(name); + + let Some(utf8) = path.as_os_str().to_str() else { + continue 'walk; + }; + + let contents = match fs::read(path) { + Ok(bytes) => bytes, + Err(e) => panic!("could not open file: {e}"), + }; + + files.push(File { + path: utf8.to_string(), + components, + contents, + }); + } + files.sort_by(|a, b| Ord::cmp(&a.components, &b.components)); + + let mut mod_stack: Vec> = vec![item.into_iter().collect()]; + let mut dir_stack = &[][..]; + for file in &files { + let dir = &file.components[..file.components.len() - 1]; + let [_, remove, add] = common_prefix(dir_stack, dir); + for &i in remove { + let items = mod_stack.pop().unwrap(); + mod_stack.last_mut().unwrap().extend_from_slice(&[ + Ident::new(&names[i], span).into(), + Group::new(Delimiter::Brace, items.into_iter().collect()).into(), + ]); + } + for _ in add { + mod_stack.push(Vec::new()); + } + dir_stack = dir; + + let name = &names[*file.components.last().unwrap()]; + mod_stack.last_mut().unwrap().extend_from_slice(&[ + Ident::new(name, span).into(), + Group::new( + Delimiter::Parenthesis, + stream([ + Literal::string(&file.path).into(), + Punct::new(',', Spacing::Alone).into(), + Literal::byte_string(&file.contents).into(), + ]), + ) + .into(), + ]); + } + + attr_bridge( + name, + crate_, + macro_, + span, + args, + mod_stack.swap_remove(0).into_iter().collect(), + ) +} + +fn common_prefix<'a, T: PartialEq>(a: &'a [T], b: &'a [T]) -> [&'a [T]; 3] { + for (i, (x, y)) in a.iter().zip(b).enumerate() { + if x != y { + return [&a[..i], &a[i..], &b[i..]]; + } + } + [a, &[], &[]] +} + +fn is_valid_ident(name: &str) -> bool { + use unicode_xid::UnicodeXID as _; + // See https://doc.rust-lang.org/reference/identifiers.html + name.chars().enumerate().all(|(i, c)| { + if i == 0 { + c == '_' || c.is_xid_start() + } else { + c.is_xid_continue() + } + }) +} + +fn stream(tt: [TokenTree; N]) -> TokenStream { + tt.into_iter().collect() +} + +fn stream2(tt: [TokenTree; N], ts: TokenStream) -> TokenStream { + tt.into_iter().chain(ts).collect() +} From c05fe6d9339d93e7905a0172017410d2b0a07f7d Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Tue, 14 Jan 2025 14:18:46 -0800 Subject: [PATCH 02/11] allman: Add a crate for code formatting and line reflowing --- Cargo.toml | 9 +- allman/Cargo.toml | 16 +++ allman/src/layout.rs | 113 ++++++++++++++++ allman/src/lib.rs | 301 +++++++++++++++++++++++++++++++++++++++++++ allman/src/render.rs | 139 ++++++++++++++++++++ 5 files changed, 577 insertions(+), 1 deletion(-) create mode 100644 allman/Cargo.toml create mode 100644 allman/src/layout.rs create mode 100644 allman/src/lib.rs create mode 100644 allman/src/render.rs diff --git a/Cargo.toml b/Cargo.toml index 472497d..25ba5cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,12 @@ [workspace] -members = ["byteyarn", "buf-trait", "ilex", "ilex/attr", "twie", "proc2decl"] +members = [ + "allman", + "byteyarn", + "buf-trait", + "ilex", "ilex/attr", + "proc2decl", + "twie", +] resolver = "2" [workspace.package] diff --git a/allman/Cargo.toml b/allman/Cargo.toml new file mode 100644 index 0000000..8060578 --- /dev/null +++ b/allman/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "allman" +version = "0.1.0" +description = "source code formatting and line reflowing toolkit" + +edition.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true +keywords.workspace = true +license.workspace = true + +[dependencies] +byteyarn = { path = "../byteyarn" } + +unicode-width = "0.2.0" \ No newline at end of file diff --git a/allman/src/layout.rs b/allman/src/layout.rs new file mode 100644 index 0000000..4cf6ac9 --- /dev/null +++ b/allman/src/layout.rs @@ -0,0 +1,113 @@ +//! Layout algorithm implementation. +//! +//! The only thing the layout algorithm *actually* has to decide is whether each +//! group breaks or not. The algorithm is as follows. +//! +//! 1. Measure the width of each element recursively. Elements which span +//! multiple lines are treated as being of infinite width. +//! +//! 2. Mark groups as broken recursively: for each group, if at its current +//! position, it would overflow the maximum column length, break it, and +//! recurse into it. + +use unicode_width::UnicodeWidthStr; + +use crate::Cursor; +use crate::Doc; +use crate::If; +use crate::Measure; +use crate::Options; +use crate::Tag; +use crate::TagInfo; + +impl Doc<'_> { + pub(crate) fn do_layout(&self, opts: &Options) { + for (t, c) in self.cursor() { + measure(t, c); + } + + LayoutState { opts, indent: 0, column: 0 }.do_layout(self.cursor()); + } +} + +struct LayoutState<'a> { + opts: &'a Options, + + /// The column to start the next line at. + indent: usize, + + /// The next column that we would be writing at. + column: usize, +} + +impl LayoutState<'_> { + /// Advances state for rendering a tag within a broken group. + fn do_layout(&mut self, cursor: Cursor) { + for (tag, cursor) in cursor { + let cond = tag.cond != Some(If::Flat); + + let mut m = tag.measure.get(); + m.column = self.column; + match &tag.tag { + Tag::Text(text) => match text.rfind("\n") { + Some(nl) => self.column = self.indent + text[nl..].width(), + None => self.column += m.width.unwrap(), + }, + + Tag::Space => self.column += 1, + Tag::Break(0) => {} + Tag::Break(_) => self.column = self.indent, + + Tag::Group(max) => { + let mut width = + m.width.filter(|w| self.column + w <= self.opts.max_columns); + + if width.is_some_and(|w| w > *max) { + width = None; + } + + if let Some(w) = width { + // Don't need to do layout here: everything already fits. + self.column += w; + } else { + m.width = None; + + self.do_layout(cursor); + } + } + + Tag::Indent(columns) => { + if cond { + let prev = self.indent; + self.indent = self.indent.saturating_add_signed(*columns); + self.do_layout(cursor); + self.indent = prev; + } + } + } + tag.measure.set(m); + } + } +} + +/// Calculates the width of each element if it was laid out in one line. +fn measure(tag: &TagInfo, cursor: Cursor) { + let tag_width = match &tag.tag { + _ if tag.cond == Some(If::Broken) => Some(0), + + Tag::Text(text) => (!text.contains("\n")).then(|| text.width()), + Tag::Space => Some(1), + Tag::Break(_) => None, + + _ => Some(0), + }; + + let width = cursor + .map(|(t, c)| { + measure(t, c); + t.measure.get().width + }) + .fold(tag_width, |a, b| a?.checked_add(b?)); + + tag.measure.set(Measure { width, column: 0 }); +} diff --git a/allman/src/lib.rs b/allman/src/lib.rs new file mode 100644 index 0000000..f3bb0bf --- /dev/null +++ b/allman/src/lib.rs @@ -0,0 +1,301 @@ +//! `allman` đŸ—’ī¸đŸ–‹ī¸ - A code formatting and line reflowing toolkit. +//! +//! [`allman::Doc`][Doc] is a DOM-like structure that specifies how indentation, +//! like breaking, and reflowing should be handled. It is a tree of [`Tag`]s +//! that dictate layout information for the source code to format. +//! +//! For example, the Allman brace style (for which this crate is named) can +//! be implemented as follows: +//! +//! ``` +//! # use allman::*; +//! // flat: fn foo() { ... } +//! // +//! // broken: +//! // fn foo() +//! // { +//! // // ... +//! // } +//! Doc::new() +//! .tag("fn") +//! .tag(Tag::Space) +//! .tag("foo") +//! .tag("(").tag(")") +//! .tag_with(Tag::Group(40), |doc| { +//! doc +//! .tag_if(Tag::Space, If::Flat) +//! .tag_if(Tag::Break(1), If::Broken) +//! .tag("{") +//! .tag_if(Tag::Space, If::Flat) +//! .tag_if(Tag::Break(1), If::Broken) +//! .tag_with(Tag::Indent(2), |doc| { +//! // Brace contents here... +//! }) +//! .tag_if(Tag::Space, If::Flat) +//! .tag_if(Tag::Break(1), If::Broken) +//! .tag("}"); +//! }); +//! ``` +//! +//! When calling [`Doc::render()`], the layout algorithm will determine whether +//! [`Tag::Group`]s should be "broken", i.e., laid out with newlines inside. + +use core::slice; +use std::cell::Cell; +use std::fmt; +use std::io; + +use byteyarn::YarnBox; + +mod layout; +mod render; + +/// A source code document, which can be rendered as formatted text. +/// +/// A [`Doc`] is analogous to an HTML DOM, which is text along with markup for +/// laying out that text. The difference being that rather than being converted +/// into raster graphics by a browser engine, a [`Doc`] is rendered as a text +/// file. +#[derive(Clone, Default)] +pub struct Doc<'text> { + /// This is a flattened tree: each node specifies how many elements after it + /// make up its children. The `Cursor` type implements walking this tree. + tags: Vec>, +} + +/// A condition that can be applied to a tag. +/// +/// If a condition is set on a tag, and the condition is false, the tag is +/// treated as a no-op: its contents are not printed. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum If { + /// True when the containing group is printed on one line. + Flat, + /// True when the containing group does not fit on one line. + Broken, +} + +/// Options for [`Doc::render()`]. +pub struct Options { + /// The maximum number of columns in a line. + pub max_columns: usize, +} + +impl<'text> Doc<'text> { + /// Returns a new, empty document. + pub fn new() -> Self { + Self::default() + } + + /// Renders this document to the given writer. + pub fn render( + &self, + out: &mut dyn io::Write, + options: &Options, + ) -> io::Result<()> { + self.do_layout(options); + render::Printer::new(out).render(self.cursor(), options, true) + } + + /// Inserts a new self-closing tag into this doc. + pub fn tag(&mut self, tag: impl Into>) -> &mut Self { + self.tag_if_with(tag, None, |_| {}) + } + + /// Inserts a new tag into this doc. The given closure can be used to insert + /// tags into it. + /// + /// # Panics + /// + /// Panics if children are inserted and [`Tag::can_have_children()`] is false. + pub fn tag_with( + &mut self, + tag: impl Into>, + body: impl FnOnce(&mut Self), + ) -> &mut Self { + self.tag_if_with(tag, None, body) + } + + /// Inserts a new tag into this doc, with an optional condition. + pub fn tag_if( + &mut self, + tag: impl Into>, + cond: impl Into>, + ) -> &mut Self { + self.tag_if_with(tag, cond, |_| {}) + } + + /// Inserts a new tag into this doc, with an optional condition. The given + /// closure can be used to insert tags into it. + /// + /// # Panics + /// + /// Panics if children are inserted and [`Tag::can_have_children()`] is false. + pub fn tag_if_with( + &mut self, + tag: impl Into>, + cond: impl Into>, + body: impl FnOnce(&mut Self), + ) -> &mut Self { + let tag = tag.into(); + let compound = tag.can_have_children(); + + let consolidate = matches!( + (&tag, self.tags.last().map(|t| &t.tag)), + (Tag::Space, Some(Tag::Space)) + ); + + let idx = self.tags.len(); + self.tags.push(TagInfo { + tag, + len: 0, + cond: cond.into(), + measure: Cell::default(), + }); + body(self); + + let len = self.tags.len() - idx - 1; + assert!( + compound || len == 0, + "inserted children for {:?}", + &self.tags[idx].tag + ); + + if consolidate { + self.tags.pop(); + } + + self.tags[idx].len = len; + self + } + + fn cursor(&self) -> Cursor { + Cursor { iter: self.tags.iter() } + } +} + +#[derive(Clone, Debug)] +struct TagInfo<'text> { + tag: Tag<'text>, + len: usize, + cond: Option, + + measure: Cell, +} + +#[derive(Copy, Clone, Default, Debug)] +struct Measure { + /// The number of columns this tag takes up when it is formatted on one line. + /// + /// None if its width should be treated as infinite. + width: Option, + column: usize, +} + +/// An element of a [`Doc`]. +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum Tag<'text> { + /// Verbatim text. Line breaks inside of this text cause any groups that + /// contain it to be broken. + Text(YarnBox<'text, str>), + + /// Inserts a space, except if it would end a line. This is intended for + /// ensuring lines do not have trailing whitespace. [`Tag::Text`] containing + /// a space can be used to force a space at the end of a line. + /// + /// Consecutive space tags are consolidated into one. + Space, + + /// Inserts the given number of newlines, and breaks the surrounding group. + /// + /// Consecutive breaks are consolidated into one. A `Break(0)` can be used + /// to force a break without inserting an actual newline. + Break(usize), + + /// A sequence of tags that may either be rendered as one line, or broken into + /// multiple lines if it does not fit. + /// + /// The group will also break itself if it is wider than the given width; + /// use [`usize::MAX`] to disable this. + Group(usize), + + /// Change indentation by the given number of columns. + Indent(isize), +} + +impl Tag<'_> { + /// Returns whether or not this tag can contain child tags. + pub fn can_have_children(&self) -> bool { + matches!(self, Self::Group(..) | Self::Indent(..)) + } +} + +impl<'text, Y: Into>> From for Tag<'text> { + fn from(yarn: Y) -> Self { + Self::Text(yarn.into()) + } +} + +/// A cursor over a piece of a [`Doc`]. +struct Cursor<'a> { + iter: slice::Iter<'a, TagInfo<'a>>, +} + +impl<'a> Iterator for Cursor<'a> { + type Item = (&'a TagInfo<'a>, Cursor<'a>); + + fn next(&mut self) -> Option { + let next = self.iter.next()?; + if next.len == 0 { + // Fast path that avoids an extra bounds check. + return Some((next, Cursor { iter: [].iter() })); + } + + let (contents, rest) = self.iter.as_slice().split_at(next.len); + self.iter = rest.iter(); + Some((next, Cursor { iter: contents.iter() })) + } +} + +impl fmt::Debug for Doc<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fn fmt( + indent: usize, + cursor: Cursor, + f: &mut fmt::Formatter, + ) -> fmt::Result { + for (tag, cursor) in cursor { + write!(f, "{:<1$}", "\n", indent + 1)?; + match &tag.tag { + Tag::Text(y) => write!(f, "{y:?}")?, + Tag::Space => write!(f, "")?, + Tag::Break(n) => write!(f, "")?, + Tag::Group(w) => { + if cursor.iter.as_slice().is_empty() { + write!(f, "")?; + continue; + } + + write!(f, "")?; + fmt(indent + 2, cursor, f)?; + write!(f, "")?; + } + Tag::Indent(c) => { + if cursor.iter.as_slice().is_empty() { + write!(f, "")?; + continue; + } + + write!(f, "")?; + fmt(indent + 2, cursor, f)?; + write!(f, "")?; + } + } + } + write!(f, "{:<1$}", "\n", indent - 2 + 1)?; + Ok(()) + } + + fmt(0, self.cursor(), f) + } +} diff --git a/allman/src/render.rs b/allman/src/render.rs new file mode 100644 index 0000000..60c83a8 --- /dev/null +++ b/allman/src/render.rs @@ -0,0 +1,139 @@ +use std::io; +use std::io::Write; +use std::mem; + +use crate::If; +use crate::Options; +use crate::Tag; + +/// An indentation-aware pretty-printer. +pub struct Printer<'a> { + out: &'a mut dyn io::Write, + indent: usize, + space: bool, + newlines: usize, +} + +impl<'a> Printer<'a> { + /// Returns a new printer with the given output and options. + pub fn new(out: &'a mut dyn io::Write) -> Self { + Self { + out, + indent: 0, + space: false, + newlines: 0, + } + } + + /// Updates the indentation level with the given diff. + pub fn with_indent( + &mut self, + diff: isize, + body: impl FnOnce(&mut Self) -> R, + ) -> R { + let prev = self.indent; + self.indent = self.indent.saturating_add_signed(diff); + let r = body(self); + self.indent = prev; + r + } + + /// Writes indentation, if necessary. + pub fn write_indent(&mut self) -> io::Result<()> { + if mem::take(&mut self.newlines) == 0 { + return Ok(()); + } + + self.write_spaces(self.indent) + } + + /// Writes len ASCII spaces to the output. + pub fn write_spaces(&mut self, mut len: usize) -> io::Result<()> { + const SPACES: &[u8; 32] = b" "; + + while len > SPACES.len() { + self.out.write_all(SPACES)?; + len -= SPACES.len(); + } + self.out.write_all(&SPACES[..len])?; + Ok(()) + } + + pub fn render( + &mut self, + cursor: crate::Cursor, + _options: &Options, + parent_is_broken: bool, + ) -> io::Result<()> { + for (tag, cursor) in cursor { + let cond = match tag.cond { + Some(If::Broken) => parent_is_broken, + Some(If::Flat) => !parent_is_broken, + None => true, + }; + + match &tag.tag { + Tag::Text(text) => { + if cond { + write!(self, "{text}")?; + } + } + + Tag::Space => self.space |= cond, + Tag::Break(n) => { + if cond { + for _ in self.newlines..*n { + writeln!(self)?; + } + } + } + + Tag::Group(..) => { + let m = tag.measure.get(); + self.render(cursor, _options, m.width.is_none())?; + } + + Tag::Indent(columns) => { + if cond { + self.with_indent(*columns, |p| { + p.render(cursor, _options, parent_is_broken) + })?; + } + } + } + } + + Ok(()) + } +} + +impl io::Write for Printer<'_> { + fn write(&mut self, buf: &[u8]) -> io::Result { + if buf.is_empty() { + return Ok(0); + } + + if mem::take(&mut self.space) && !buf.starts_with(b"\n") { + self.write_all(b" ")?; + } + + for line in buf.split_inclusive(|&b| b == b'\n') { + if line == b"\n" { + self.newlines += 1; + self.out.write_all(line)?; + continue; + } + + self.write_indent()?; + self.out.write_all(line)?; + if line.ends_with(b"\n") { + self.newlines = 1; + } + } + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + self.out.flush() + } +} From 04af18e6091c6ea7cc905b4fa11b336beab4404f Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Tue, 14 Jan 2025 14:20:14 -0800 Subject: [PATCH 03/11] gilded: Add a crate for simple golden tests --- Cargo.toml | 1 + gilded/Cargo.toml | 23 +++ gilded/attr/Cargo.toml | 11 ++ gilded/attr/lib.rs | 6 + gilded/src/doc/emit.rs | 139 ++++++++++++++++++ gilded/src/doc/json.rs | 120 ++++++++++++++++ gilded/src/doc/mod.rs | 201 ++++++++++++++++++++++++++ gilded/src/doc/yaml.rs | 174 +++++++++++++++++++++++ gilded/src/lib.rs | 315 +++++++++++++++++++++++++++++++++++++++++ 9 files changed, 990 insertions(+) create mode 100644 gilded/Cargo.toml create mode 100644 gilded/attr/Cargo.toml create mode 100644 gilded/attr/lib.rs create mode 100644 gilded/src/doc/emit.rs create mode 100644 gilded/src/doc/json.rs create mode 100644 gilded/src/doc/mod.rs create mode 100644 gilded/src/doc/yaml.rs create mode 100644 gilded/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 25ba5cd..9077734 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ members = [ "allman", "byteyarn", "buf-trait", + "gilded", "gilded/attr", "ilex", "ilex/attr", "proc2decl", "twie", diff --git a/gilded/Cargo.toml b/gilded/Cargo.toml new file mode 100644 index 0000000..19e2cd1 --- /dev/null +++ b/gilded/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "gilded" +version = "0.1.0" +description = "Dead simple golden tests" + +edition.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true +keywords.workspace = true +license.workspace = true + +[dependencies] +gilded-attr = { path = "attr" } + +allman = { path = "../allman" } +byteyarn = { path = "../byteyarn" } + +camino = "1.1.9" +diffy = "0.4.0" +nu-glob = "0.101.0" +unicode-width = "0.2.0" + diff --git a/gilded/attr/Cargo.toml b/gilded/attr/Cargo.toml new file mode 100644 index 0000000..5bc4467 --- /dev/null +++ b/gilded/attr/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "gilded-attr" +version = "0.1.0" +edition = "2021" + +[dependencies] +proc2decl = { path = "../../proc2decl" } + +[lib] +path = "lib.rs" +proc-macro = true \ No newline at end of file diff --git a/gilded/attr/lib.rs b/gilded/attr/lib.rs new file mode 100644 index 0000000..5d69268 --- /dev/null +++ b/gilded/attr/lib.rs @@ -0,0 +1,6 @@ +//! Implementation detail of `gilded`. + +proc2decl::fs_bridge! { + /// + macro #[test] => gilded::__test__; +} diff --git a/gilded/src/doc/emit.rs b/gilded/src/doc/emit.rs new file mode 100644 index 0000000..1ec3c4a --- /dev/null +++ b/gilded/src/doc/emit.rs @@ -0,0 +1,139 @@ +use std::fmt; +use std::io; +use std::mem; + +use byteyarn::YarnRef; +use unicode_width::UnicodeWidthStr; + +use crate::doc::DocFormat; +use crate::doc::DocOptions; + +/// An indentation-aware pretty-printer. +pub struct Printer<'a> { + out: &'a mut dyn io::Write, + options: &'a DocOptions, + indent: usize, + at_newline: bool, +} + +impl<'a> Printer<'a> { + /// Returns a new printer with the given output and options. + pub fn new(out: &'a mut dyn io::Write, options: &'a DocOptions) -> Self { + Self { + out, + options, + indent: 0, + at_newline: true, + } + } + + /// Updates the indentation level with the given diff. + pub fn indent(&mut self, diff: isize) { + self.indent = self.indent.checked_add_signed(diff).unwrap(); + } + + /// Writes indentation, if necessary. + pub fn write_indent(&mut self) -> io::Result<()> { + if !mem::take(&mut self.at_newline) { + return Ok(()); + } + self.at_newline = false; + self.write_spaces(self.indent * self.options.tab_width) + } + + /// Writes len ASCII spaces to the output. + pub fn write_spaces(&mut self, mut len: usize) -> io::Result<()> { + const SPACES: &[u8; 32] = b" "; + + while len > SPACES.len() { + self.out.write_all(SPACES)?; + len -= SPACES.len(); + } + self.out.write_all(&SPACES[..len])?; + Ok(()) + } + + pub fn yaml_list_item(&mut self) -> io::Result<()> { + writeln!(self)?; + self + .write_indent((self.indent * self.options.tab_width).saturating_sub(2))?; + write!(self, "- ") + } + + pub fn escaped_string(&mut self, data: YarnRef<[u8]>) -> io::Result<()> { + let yaml = self.options.format == DocFormat::Yaml; + + if yaml { + if let Some(ident) = is_ident(data) { + return write!(self, "{ident}"); + } + } + + write!(self, "\"")?; + for chunk in data.utf8_chunks() { + let chunk = match chunk { + Ok(s) => s, + Err(e) => { + for b in e { + write!(self, "\\x{b:02x}")?; + } + continue; + } + }; + for c in chunk.chars() { + match c { + '\0' if yaml => write!(self, "\\0")?, + '\n' => write!(self, "\\n")?, + '\r' => write!(self, "\\r")?, + '\t' => write!(self, "\\t")?, + '\\' => write!(self, "\\\\")?, + '\"' => write!(self, "\\\"")?, + c if !c.is_control() => write!(self, "{c}")?, + c if yaml && c.is_ascii() => write!(self, "\\x{:02x}", c as u32)?, + c => { + for u in c.encode_utf16(&mut [0, 0]) { + write!(self, "\\u{u:04x}")?; + } + } + } + } + } + write!(self, "\"") + } + +} + +impl io::Write for Printer<'_> { + fn write(&mut self, buf: &[u8]) -> io::Result { + for line in buf.split_inclusive(|&b| b == b'\n') { + self.write_indent()?; + self.out.write_all(line)?; + if line.ends_with(b"\n") { + self.at_newline = true; + } + } + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + self.out.flush() + } +} + +/// Returns the number of terminal columns that the printed output of `d` takes +/// up. +pub fn width(d: &dyn fmt::Display) -> usize { + use fmt::Write; + + struct Counter(usize); + impl Write for Counter { + fn write_str(&mut self, s: &str) -> fmt::Result { + self.0 += s.width(); + Ok(()) + } + } + + let mut counter = Counter(0); + let _ = write!(&mut counter, "{}", d); + counter.0 +} \ No newline at end of file diff --git a/gilded/src/doc/json.rs b/gilded/src/doc/json.rs new file mode 100644 index 0000000..fd2324c --- /dev/null +++ b/gilded/src/doc/json.rs @@ -0,0 +1,120 @@ +//! Output implementation for JSON. + +use std::fmt; + +use allman::If; +use allman::Tag; +use byteyarn::YarnRef; + +use crate::doc::Doc; +use crate::doc::DocOptions; +use crate::doc::Value; + +pub fn build<'t>( + options: &DocOptions, + doc: &Doc<'t>, + out: &mut allman::Doc<'t>, +) { + let is_array = doc.entries.iter().all(|(k, _)| k.is_none()); + if is_array { + out.tag_with(Tag::Group(options.max_array_width), |out| { + out + .tag("[") + .tag_with(Tag::Indent(options.tab_width as isize), |out| { + for (i, (_, entry)) in doc.entries.iter().enumerate() { + if i > 0 { + out.tag(","); + out.tag_if(Tag::Space, If::Flat); + } + out.tag_if("\n", If::Broken); + value(options, entry, out); + } + }) + .tag_if("\n", If::Broken) + .tag("]"); + }); + } else { + out.tag_with(Tag::Group(options.max_object_width), |out| { + out + .tag("{") + .tag_with(Tag::Indent(options.tab_width as isize), |out| { + for (i, (key, entry)) in doc.entries.iter().enumerate() { + if i > 0 { + out.tag(","); + out.tag_if(Tag::Space, If::Flat); + } + out + .tag_if("\n", If::Broken) + .tag( + Escape(key.as_deref().unwrap_or_default().as_bytes()) + .to_string(), + ) + .tag(":") + .tag(Tag::Space); + value(options, entry, out); + } + }) + .tag_if("\n", If::Broken) + .tag("}"); + }); + } +} + +fn value<'t>(options: &DocOptions, v: &Value<'t>, out: &mut allman::Doc<'t>) { + match v { + Value::Bool(v) => { + out.tag(v.to_string()); + } + Value::Int(v) => { + out.tag(v.to_string()); + } + Value::UInt(v) => { + out.tag(v.to_string()); + } + Value::Fp(v) => { + out.tag(v.to_string()); + } + Value::String(v) => { + out.tag(Escape(v).to_string()); + } + Value::Doc(v) => build(options, v, out), + } +} + +/// A displayable that prints the given data as a JSON string. +pub struct Escape<'a>(&'a [u8]); + +impl fmt::Display for Escape<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "\"")?; + for chunk in YarnRef::new(self.0).utf8_chunks() { + let chunk = match chunk { + Ok(s) => s, + Err(e) => { + for b in e { + write!(f, "<{b:02x}>")?; + } + continue; + } + }; + + for c in chunk.chars() { + match c { + '\n' => write!(f, "\\n")?, + '\r' => write!(f, "\\r")?, + '\t' => write!(f, "\\t")?, + '\\' => write!(f, "\\\\")?, + '\"' => write!(f, "\\\"")?, + c if !c.is_control() => write!(f, "{c}")?, + c => { + for u in c.encode_utf16(&mut [0, 0]) { + write!(f, "\\u{u:04x}")?; + } + } + } + } + } + + write!(f, "\"") + } +} diff --git a/gilded/src/doc/mod.rs b/gilded/src/doc/mod.rs new file mode 100644 index 0000000..6189383 --- /dev/null +++ b/gilded/src/doc/mod.rs @@ -0,0 +1,201 @@ +use std::io; +use std::io::Write; + +use byteyarn::YarnBox; + +mod json; +mod yaml; + +/// A tree-shaped document that can be pretty-printed, for generating goldens. +/// +/// Golden tests that output tree-shaped data can use `Doc` to generate +/// diff-friendly, readable output. +pub struct Doc<'a> { + entries: Vec<(Option>, Value<'a>)>, +} + +// The format output to use when rendering a document. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum DocFormat { + Yaml, + Json, +} + +impl Default for DocFormat { + fn default() -> Self { + Self::Yaml + } +} + +/// Options for rendering a [`Doc`] as a string. +pub struct DocOptions { + // The format to output in; defaults to YAML. + pub format: DocFormat, + // The number of spaces to use for indentation. + pub tab_width: usize, + + // The maximum number of columns to have before wrapping occurs. + pub max_columns: usize, + // The maximum number of columns for a one-line array. + pub max_array_width: usize, + // The maximum number of columns for a one-line object. + pub max_object_width: usize, +} + +impl Default for DocOptions { + fn default() -> Self { + Self { + format: DocFormat::default(), + tab_width: 2, + max_columns: 80, + max_array_width: 50, + max_object_width: 40, + } + } +} + +/// A type which can be an element of a [`Doc`]. +/// +/// All of the primitive number types and types which convert to `YarnBox<[u8]>` +/// can be used as `Doc` values. `Option` for `T: DocValue` can also be +/// used, and will only be inserted if it is `Some`. +pub trait DocValue<'a> { + fn append_to(self, doc: &mut Doc<'a>); +} + +impl<'a> Doc<'a> { + /// Returns a new, empty `Doc`. + pub fn new() -> Self { + Self { entries: Vec::new() } + } + + /// Returns a new `Doc` with a single entry. + pub fn single( + name: impl Into>, + value: impl DocValue<'a>, + ) -> Self { + Self::new().entry(name, value) + } + + /// Appends a sequence of values to this document. + pub fn push( + mut self, + elements: impl IntoIterator>, + ) -> Self { + for e in elements { + e.append_to(&mut self); + } + self + } + + /// Appends an entry with the given name to this document. + pub fn entry( + mut self, + name: impl Into>, + value: impl DocValue<'a>, + ) -> Self { + let prev = self.entries.len(); + value.append_to(&mut self); + if prev < self.entries.len() { + self.entries.last_mut().unwrap().0 = Some(name.into()); + } + self + } + + /// Appends an entry which is an array with the given elements. + pub fn array( + self, + name: impl Into>, + elements: impl IntoIterator>, + ) -> Self { + self.entry(name, Self::new().push(elements)) + } + + // Converts this document into a string, using the given options. + pub fn to_string(&self, options: &DocOptions) -> String { + let mut out = Vec::new(); + let _ = self.render(&mut out, options); + String::from_utf8(out).unwrap() + } + + /// Converts this document into a string, writing it to the given output with + /// the given options. + pub fn render( + &self, + out: &mut dyn Write, + options: &DocOptions, + ) -> io::Result<()> { + let mut doc = allman::Doc::new(); + + match options.format { + DocFormat::Yaml => yaml::build( + yaml::Args { options, root: true, in_list: false }, + self, + &mut doc, + ), + DocFormat::Json => json::build(options, self, &mut doc), + } + + doc.render(out, &allman::Options { max_columns: options.max_columns }) + } +} + +impl Default for Doc<'_> { + fn default() -> Self { + Self::new() + } +} + +enum Value<'a> { + Bool(bool), + Int(i128), + UInt(u128), + Fp(f64), + String(YarnBox<'a>), + Doc(Doc<'a>), +} + +impl<'a, T: DocValue<'a>> DocValue<'a> for Option { + fn append_to(self, doc: &mut Doc<'a>) { + if let Some(v) = self { + v.append_to(doc) + } + } +} +impl<'a> DocValue<'a> for Doc<'a> { + fn append_to(self, doc: &mut Doc<'a>) { + doc.entries.push((None, Value::Doc(self))) + } +} + +macro_rules! impl_from { + ($({$($T:ty),*} => $V:ident,)*) => {$($( + impl<'a> DocValue<'a> for $T { + fn append_to(self, doc: &mut Doc<'a>) { + doc.entries.push((None, Value::$V(self as _))) + } + } + )*)*} +} + +impl_from! { + {bool} => Bool, + {i8, i16, i32, i64, i128, isize} => Int, + {u8, u16, u32, u64, u128, usize} => UInt, + {f32, f64} => Fp, +} + +macro_rules! impl_from_yarn { + ($(for<$lt:lifetime> $($T:ty),* => $U:ty,)*) => {$($( + impl<$lt> DocValue<$lt> for $T { + fn append_to(self, doc: &mut Doc<$lt>) { + doc.entries.push((None, Value::String(<$U>::from(self).into_bytes()))) + } + } + )*)*} +} + +impl_from_yarn! { + for<'a> &'a [u8], Vec, YarnBox<'a, [u8]> => YarnBox<'a, [u8]>, + for<'a> char, &'a str, String, YarnBox<'a, str> => YarnBox<'a, str>, +} diff --git a/gilded/src/doc/yaml.rs b/gilded/src/doc/yaml.rs new file mode 100644 index 0000000..6c85af0 --- /dev/null +++ b/gilded/src/doc/yaml.rs @@ -0,0 +1,174 @@ +//! Output implementation for YAML. + +use std::fmt; + +use allman::If; +use allman::Tag; +use byteyarn::YarnRef; + +use crate::doc::Doc; +use crate::doc::DocOptions; +use crate::doc::Value; + +pub struct Args<'a> { + pub root: bool, + pub in_list: bool, + pub options: &'a DocOptions, +} + +pub fn build<'t>(args: Args, doc: &'t Doc<'t>, out: &mut allman::Doc<'t>) { + let is_array = doc.entries.iter().all(|(k, _)| k.is_none()); + if is_array { + out.tag_with(Tag::Group(args.options.max_array_width), |out| { + out.tag_if("[", If::Flat); + if !args.root { + out.tag_if(Tag::Break(1), If::Broken); + } + for (i, (_, entry)) in doc.entries.iter().enumerate() { + if i > 0 { + out.tag_if(",", If::Flat); + out.tag_if(Tag::Space, If::Flat); + } + + out.tag_if("-", If::Broken); + out.tag_if(Tag::Space, If::Broken); + //out.tag_with(Tag::Indent(args.options.tab_width as isize), |out| { + value(Args { root: false, in_list: true, ..args }, entry, out); + //}); + + out.tag_if(Tag::Break(1), If::Broken); + } + out.tag_if("]", If::Flat); + }); + } else { + out.tag_with(Tag::Group(args.options.max_object_width), |out| { + let in_map = !args.root && !args.in_list; + if in_map { + out.tag_if(Tag::Break(1), If::Broken); + } + out + .tag_if("{", If::Flat) + .tag_with(Tag::Indent(args.options.tab_width as isize), |out| { + for (i, (key, entry)) in doc.entries.iter().enumerate() { + if i > 0 { + out.tag_if(",", If::Flat); + out.tag_if(Tag::Space, If::Flat); + } + + let key_bytes = key.as_deref().unwrap_or_default().as_bytes(); + let ident = is_ident(key_bytes); + + if let Some(ident) = ident { + out.tag(ident.to_box()); + + let mut entry = entry; + while let Value::Doc(d) = entry { + let [(Some(k), v)] = d.entries.as_slice() else { break }; + let Some(ident) = is_ident(k.as_bytes()) else { break }; + + out.tag(".").tag(ident.to_box()); + entry = v; + } + } else { + out.tag(Escape(key_bytes).to_string()); + } + out.tag(":").tag(Tag::Space); + + value(Args { root: false, in_list: false, ..args }, entry, out); + out.tag_if(Tag::Break(1), If::Broken); + } + }) + .tag_if("}", If::Flat); + }); + } +} + +fn value<'t>(args: Args, v: &'t Value<'t>, out: &mut allman::Doc<'t>) { + match v { + Value::Bool(v) => { + out.tag(v.to_string()); + } + Value::Int(v) => { + out.tag(v.to_string()); + } + Value::UInt(v) => { + out.tag(v.to_string()); + } + Value::Fp(v) => { + out.tag(v.to_string()); + } + Value::String(v) => { + if is_raw_string(v.as_ref()) { + out.tag("|").tag(Tag::Break(1)).tag_with( + Tag::Indent(args.options.tab_width as isize), + |out| { + out.tag(v.as_ref().to_utf8().unwrap().to_box()); + }, + ); + return; + } + out.tag(Escape(v).to_string()); + } + Value::Doc(v) => build(args, v, out), + } +} + +/// A displayable that prints the given data as a JSON string. +pub struct Escape<'a>(&'a [u8]); + +impl fmt::Display for Escape<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "\"")?; + for chunk in YarnRef::new(self.0).utf8_chunks() { + let chunk = match chunk { + Ok(s) => s, + Err(e) => { + for b in e { + write!(f, "\\x{b:02x}")?; + } + continue; + } + }; + + for c in chunk.chars() { + match c { + '\0' => write!(f, "\\0")?, + '\n' => write!(f, "\\n")?, + '\r' => write!(f, "\\r")?, + '\t' => write!(f, "\\t")?, + '\\' => write!(f, "\\\\")?, + '\"' => write!(f, "\\\"")?, + c if !c.is_control() => write!(f, "{c}")?, + c => { + for u in c.encode_utf16(&mut [0, 0]) { + write!(f, "\\u{u:04x}")?; + } + } + } + } + } + + write!(f, "\"") + } +} + +fn is_raw_string(data: YarnRef<[u8]>) -> bool { + data.to_utf8().is_ok_and(|s| { + s.contains("\n") && s.chars().all(|c| c == '\n' || !c.is_control()) + }) +} + +fn is_ident(data: &[u8]) -> Option> { + fn is_start(c: char) -> bool { + c.is_alphabetic() || c == '_' || c == '-' + } + fn is_continue(c: char) -> bool { + is_start(c) || c.is_numeric() + } + + let s = YarnRef::from(data).to_utf8().ok()?; + + let mut chars = s.chars(); + let is_ident = chars.next().is_some_and(is_start) && chars.all(is_continue); + is_ident.then_some(s) +} diff --git a/gilded/src/lib.rs b/gilded/src/lib.rs new file mode 100644 index 0000000..c5493ed --- /dev/null +++ b/gilded/src/lib.rs @@ -0,0 +1,315 @@ +//! `gilded` - Easy-peesy golden testing. 👑 +//! +//! # Why Golden Testing? +//! +//! A "golden test" is a test that transforms data in some way, and validates it +//! by diffing it against an expected result: the "golden". +//! +//! This is especially useful for testing scenarios that consume an input file +//! (say, a source code file, for testing a compiler) and generate structured, +//! diffable textual output (such as JSON or CSV data, or even a `Debug`). +//! +//! Golden tests are best for cases where the output must be deterministic, and +//! where capturing fine-grained detail is valuable. +//! +//! Because they simply compare the result to an expected value byte-for-byte, +//! changes can quickly regenerate the test output by using the output of the +//! test itself. Diffs can be examined in code review directly. +//! +//! # Defining a Test +//! +//! A `gilded` test is defined like so: +//! +//! ``` +//! #[gilded::test("testdata/**/*.txt")] +//! fn my_test(test: &gilded::Test) { +//! // ... +//! } +//! ``` +//! +//! `my_test` will be run as a separate unit test for every file (relative to +//! the crate root) which matches the glob passed to the attribute. The input +//! file's path and contents can be accessed through the [`Test`] accessors. +//! +//! To specify a test output, use [`Test::output()`]. This specifies the +//! file extension for the golden, and its computed contents. The extension is +//! used to construct the path of the result. If the input is `foo/bar.txt`, and +//! the extension for this output is `csv`, the output will be read/written to +//! `foo/bar.csv`. +//! +//! Panicking within the test body will fail the test as normal, tests should +//! not contain output assertions; those are handled by the framework. +//! +//! # Generating Goldens +//! +//! Once the test is created, simply set the `GILDED_REGENERATE` environment +//! variable: `GILDED_REGENERATE=1 cargo test`. +//! +//! To regenerate a specific test, simply pass its name as a filter to the test. +//! See `cargo test -- --help` for available flags.` + +use std::env; +use std::fs; +use std::fs::File; +use std::path::Path; +use std::str; + +use camino::Utf8Path; + +pub use gilded_attr::test; + +mod doc; +pub use doc::*; + +/// The environment variable that is checked to decide whether or not to +/// regenerate goldens. +pub const REGENERATE: &str = "GILDED_REGENERATE"; + +/// A golden test suite, corresponding to a single invocation of the +/// [`#[gilded::test]`][test] macro. +pub struct Suite { + name: &'static str, + crate_root: &'static Path, + test_root: &'static Utf8Path, + run: fn(&mut Test), +} + +impl Suite { + /// Returns the name of this test suite (i.e., the name of the function that + /// `#[gilded::test]` was applied to). + pub fn name(&self) -> &str { + self.name + } + + /// Constructs a new test suite. + #[doc(hidden)] + pub fn new( + name: &'static str, + crate_root: &'static str, + run: fn(&mut Test), + paths: &[&'static str], + ) -> Suite { + let crate_root = Path::new(crate_root); + + let Some(mut common_prefix) = paths.first().copied() else { + return Suite { + name, + crate_root, + run, + test_root: Utf8Path::new(""), + }; + }; + + common_prefix = Utf8Path::new(common_prefix) + .parent() + .map(Utf8Path::as_str) + .unwrap_or(""); + + let sep = std::path::MAIN_SEPARATOR; + for path in &paths[1..] { + let common = common_prefix.split_inclusive(sep); + let chunks = path.split_inclusive(sep); + + let len = common + .zip(chunks) + .take_while(|(a, b)| a == b) + .map(|(a, _)| a.len()) + .sum(); + + common_prefix = &common_prefix[..len]; + } + + common_prefix = common_prefix.trim_end_matches(sep); + Suite { + name, + crate_root, + run, + test_root: Utf8Path::new(common_prefix), + } + } + + /// Executes a test in this test suite with the given data. Panics to signal + /// test failure. + /// + /// This is the function called in the body of a generated test function. + #[doc(hidden)] + #[track_caller] + pub fn run(&'static self, path: &'static str, text: &'static [u8]) { + let root = self.crate_root.join(self.test_root); + let path = Utf8Path::new(path); + let file = self.crate_root.join(path); + let lock = root.join("GILDED_CHANGED"); + let lock_name = self.test_root.join("GILDED_CHANGED"); + + // TODO: make sure this is normalized to being a Unix path on Windows. + let name = path.strip_prefix(self.test_root).unwrap(); + + let mut test = Test { + suite: self, + path: name, + text, + outputs: Vec::new(), + }; + (self.run)(&mut test); + + let regen = env::var_os(REGENERATE).is_some(); + assert!( + regen || !lock.exists(), + "golden files have changed: verify changes and then delete {lock_name}", + ); + if regen { + eprintln!("{}", lock.display()); + File::create(lock).unwrap(); + } + + let mut failed = false; + for (extn, text) in &test.outputs { + let file = file.with_extension(extn); + let name = name.with_extension(extn); + + if regen { + if text.is_empty() { + if file.exists() { + fs::remove_file(file).unwrap(); + } + } else { + fs::write(file, text).unwrap(); + } + + continue; + } + + let mut want = String::new(); + if file.exists() { + want = fs::read_to_string(file).unwrap() + } + + if text == &*want { + continue; + } + + let fmt = diffy::PatchFormatter::new().with_color(); + let patch = diffy::create_patch(text, &want); + let patch = fmt.fmt_patch(&patch); + eprintln!("mismatch for {name}:\n{patch}\n"); + failed = true; + } + + assert!(!failed, "golden output did not match test output"); + assert!( + !regen, + "golden files have changed: verify changes and then delete {lock_name}", + ) + } +} + +/// A handle for a single golden test case. +pub struct Test<'t> { + suite: &'t Suite, + + path: &'t Utf8Path, + text: &'t [u8], + + outputs: Vec<(String, String)>, +} + +impl<'t> Test<'t> { + /// Returns the test suite this test case belongs to. + pub fn suite(&self) -> &'t Suite { + self.suite + } + + /// Returns a path for the test input. + /// + /// This path will be unique among test outputs, and will be the same + /// regardless of platform. However, it need not correspond to the actual + /// path used to read and write the test data. + pub fn path(&self) -> &'t Utf8Path { + self.path + } + + /// Returns the textual content of the test input. + pub fn text(&self) -> &'t [u8] { + self.text + } + + /// Outputs a result for this test. + /// + /// A test may have many results, each of which has the same path as the input + /// with an extra extension. For example, for a `foo.txt` input, the output + /// might be `foo.txt.stderr`, in which case `extension` would be `stderr`. + pub fn output(&mut self, extension: &str, result: String) { + self.outputs.push((extension.into(), result)); + } +} + +/// Implementation macro for `#[gilded::test]`. +#[doc(hidden)] +#[macro_export] +macro_rules! __test__ { + ( + #[test($($_:tt)*)] + $(#[$attr:meta])* + fn $name:ident($($args:tt)*) { $($body:tt)* } + $($tt:tt)* + ) => { + #[cfg(test)] + mod $name { + use super::*; + pub static __SUITE__: ::std::sync::LazyLock<$crate::Suite> = + ::std::sync::LazyLock::new(|| $crate::Suite::new( + stringify!($name), + env!("CARGO_MANIFEST_DIR"), + |$($args)*| -> () { $($body)* }, + &$crate::__test__!(@paths[] $($tt)*), + )); + + $crate::__test__! { @tests $(#[$attr])* $($tt)* } + } + }; + + ( + @tests + $(#[$attr:meta])* + $mod:ident { $(inner:tt)* } + $($outer:tt)* + ) => { + mod $mod { + use super::__SUITE__; + $crate::__test__! { @tests $(#[$attr])* $(inner)* } + } + $crate::__test__! { @tests $(#[$attr])* $(outer)* } + }; + + ( + @tests + $(#[$attr:meta])* + $test:ident($path:expr, $text:expr) + $($tt:tt)* + ) => { + $(#[$attr])* + #[::std::prelude::rust_2021::test] + fn $test() { __SUITE__.run($path, $text) } + $crate::__test__! { @tests $(#[$attr])* $($tt)* } + }; + + (@tests $(#[$attr:meta])*) => {}; + + ( + @paths[$($e:expr,)*] + $mod:ident { $(inner:tt)* } + $($outer:tt)* + ) => { + $crate::__test__!(@paths[$($e,)*] $(inner)* $(outer)*) + }; + + ( + @paths[$($e:expr,)*] + $test:ident($path:expr, $text:expr) + $($tt:tt)* + ) => { + $crate::__test__!(@paths[$($e,)* $path,] $($tt)*) + }; + + (@paths $e:expr) => { $e }; +} From fe95a2d3df11cc181a7a4f61bbd4d41a9fe8c798 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Tue, 14 Jan 2025 14:23:04 -0800 Subject: [PATCH 04/11] ilex: Switch tests over to using #[gilded::test] --- ilex/Cargo.toml | 2 + ilex/src/file/context.rs | 29 +- ilex/src/lib.rs | 1 - ilex/src/token/mod.rs | 1 + ilex/src/token/summary.rs | 101 ++++ ilex/tests/greedy.rs | 52 -- ilex/tests/greedy/greedy.tokens.yaml | 52 ++ ilex/tests/greedy/greedy.txt | 6 + ilex/tests/greedy/main.rs | 50 ++ ilex/tests/json.rs | 335 ------------- ilex/tests/json/array.ast.txt | 11 + ilex/tests/json/array.json | 1 + ilex/tests/json/array.tokens.yaml | 27 ++ ilex/tests/json/main.rs | 185 +++++++ ilex/tests/json/null.ast.txt | 1 + ilex/tests/json/null.json | 1 + ilex/tests/json/null.tokens.yaml | 6 + ilex/tests/json/obj.ast.txt | 67 +++ ilex/tests/json/obj.json | 12 + ilex/tests/json/obj.tokens.yaml | 245 ++++++++++ ilex/tests/llvm.rs | 284 ----------- ilex/tests/llvm/main.rs | 113 +++++ ilex/tests/llvm/smoke.ll | 27 ++ ilex/tests/llvm/smoke.tokens.yaml | 453 ++++++++++++++++++ ilex/tests/{numbers.rs => numbers/main.rs} | 100 ++-- ilex/tests/numbers/numbers.fp64.txt | 20 + ilex/tests/numbers/numbers.tokens.yaml | 234 +++++++++ ilex/tests/numbers/numbers.txt | 18 + ilex/tests/ui/ambiguous.rs | 176 ------- .../idents.stderr} | 9 +- ilex/tests/ui/ambiguous/idents.txt | 1 + .../tests/ui/ambiguous/no_xid_after_br.stderr | 9 + ilex/tests/ui/ambiguous/no_xid_after_br.txt | 1 + .../no_xid_after_cm.stderr} | 3 +- ilex/tests/ui/ambiguous/no_xid_after_cm.txt | 1 + .../tests/ui/ambiguous/no_xid_after_id.stderr | 9 + ilex/tests/ui/ambiguous/no_xid_after_id.txt | 1 + .../no_xid_after_kw.stderr} | 3 +- ilex/tests/ui/ambiguous/no_xid_after_kw.txt | 1 + .../no_xid_after_nm.stderr} | 9 +- ilex/tests/ui/ambiguous/no_xid_after_nm.txt | 1 + .../no_xid_after_st.stderr} | 3 +- ilex/tests/ui/ambiguous/no_xid_after_st.txt | 1 + ilex/tests/ui/ambiguous/nums.stderr | 9 + ilex/tests/ui/ambiguous/nums.txt | 1 + .../symbols_after_comment.tokens.yaml | 12 + .../ui/ambiguous/symbols_after_comment.txt | 1 + .../symbols_after_quoted.tokens.yaml | 10 + .../ui/ambiguous/symbols_after_quoted.txt | 1 + ilex/tests/ui/digital.rs | 161 ------- .../invalid.stderr} | 25 +- ilex/tests/ui/digital/invalid.txt | 4 + .../missing.stderr} | 5 +- ilex/tests/ui/digital/missing.txt | 2 + ilex/tests/ui/digital/points.stderr | 49 ++ ilex/tests/ui/digital/points.txt | 5 + ilex/tests/ui/digital/separators.stderr | 65 +++ ilex/tests/ui/digital/separators.txt | 5 + ilex/tests/ui/eof.rs | 157 ------ .../eof_bracket.stdout => eof/bracket.stderr} | 5 +- ilex/tests/ui/eof/bracket.txt | 1 + .../bracket_multiline.stderr} | 5 +- ilex/tests/ui/eof/bracket_multiline.txt | 3 + .../eof_comment.stdout => eof/comment.stderr} | 3 +- ilex/tests/ui/eof/comment.txt | 1 + .../comment_multiline.stderr} | 7 +- ilex/tests/ui/eof/comment_multiline.txt | 4 + .../mixed_brackets.stderr} | 12 +- ilex/tests/ui/eof/mixed_brackets.txt | 1 + .../ui/eof/mixed_brackets_multiline.stderr | 39 ++ .../tests/ui/eof/mixed_brackets_multiline.txt | 11 + .../eof_quoted.stdout => eof/quoted.stderr} | 5 +- ilex/tests/ui/eof/quoted.txt | 1 + .../quoted_multiline.stderr} | 5 +- ilex/tests/ui/eof/quoted_multiline.txt | 3 + ilex/tests/ui/goldens/ambiguous_nums.stdout | 17 - ilex/tests/ui/goldens/digit_points.stdout | 43 -- ilex/tests/ui/goldens/digit_separators.stdout | 57 --- ilex/tests/ui/goldens/does_not_exist.stdout | 3 - .../goldens/mixed_brackets_multiline.stdout | 35 -- ilex/tests/ui/goldens/no_xid_after_br.stdout | 9 - ilex/tests/ui/goldens/no_xid_after_id.stdout | 9 - ilex/tests/ui/goldens/not_utf8.stdout | 3 - ilex/tests/ui/main.rs | 284 ++++++++++- ilex/tests/ui/new_file.rs | 20 - ilex/tests/ui/not_utf8 | 1 - ilex/tests/ui/too_small.rs | 61 --- .../cxx_tag.stderr} | 6 +- ilex/tests/ui/too_small/cxx_tag.txt | 1 + .../ident.stderr} | 3 +- ilex/tests/ui/too_small/ident.txt | 1 + .../rust_hashes.stderr} | 9 +- ilex/tests/ui/too_small/rust_hashes.txt | 1 + ilex/tests/ui/unrecognized.rs | 23 - .../unrecognized.stderr} | 15 +- ilex/tests/ui/unrecognized/unrecognized.txt | 1 + 96 files changed, 2313 insertions(+), 1569 deletions(-) create mode 100644 ilex/src/token/summary.rs delete mode 100644 ilex/tests/greedy.rs create mode 100644 ilex/tests/greedy/greedy.tokens.yaml create mode 100644 ilex/tests/greedy/greedy.txt create mode 100644 ilex/tests/greedy/main.rs delete mode 100644 ilex/tests/json.rs create mode 100644 ilex/tests/json/array.ast.txt create mode 100644 ilex/tests/json/array.json create mode 100644 ilex/tests/json/array.tokens.yaml create mode 100644 ilex/tests/json/main.rs create mode 100644 ilex/tests/json/null.ast.txt create mode 100644 ilex/tests/json/null.json create mode 100644 ilex/tests/json/null.tokens.yaml create mode 100644 ilex/tests/json/obj.ast.txt create mode 100644 ilex/tests/json/obj.json create mode 100644 ilex/tests/json/obj.tokens.yaml delete mode 100644 ilex/tests/llvm.rs create mode 100644 ilex/tests/llvm/main.rs create mode 100644 ilex/tests/llvm/smoke.ll create mode 100644 ilex/tests/llvm/smoke.tokens.yaml rename ilex/tests/{numbers.rs => numbers/main.rs} (62%) create mode 100644 ilex/tests/numbers/numbers.fp64.txt create mode 100644 ilex/tests/numbers/numbers.tokens.yaml create mode 100644 ilex/tests/numbers/numbers.txt delete mode 100644 ilex/tests/ui/ambiguous.rs rename ilex/tests/ui/{goldens/ambiguous_idents.stdout => ambiguous/idents.stderr} (69%) create mode 100644 ilex/tests/ui/ambiguous/idents.txt create mode 100644 ilex/tests/ui/ambiguous/no_xid_after_br.stderr create mode 100644 ilex/tests/ui/ambiguous/no_xid_after_br.txt rename ilex/tests/ui/{goldens/no_xid_after_cm.stdout => ambiguous/no_xid_after_cm.stderr} (77%) create mode 100644 ilex/tests/ui/ambiguous/no_xid_after_cm.txt create mode 100644 ilex/tests/ui/ambiguous/no_xid_after_id.stderr create mode 100644 ilex/tests/ui/ambiguous/no_xid_after_id.txt rename ilex/tests/ui/{goldens/no_xid_after_kw.stdout => ambiguous/no_xid_after_kw.stderr} (63%) create mode 100644 ilex/tests/ui/ambiguous/no_xid_after_kw.txt rename ilex/tests/ui/{goldens/no_xid_after_nm.stdout => ambiguous/no_xid_after_nm.stderr} (68%) create mode 100644 ilex/tests/ui/ambiguous/no_xid_after_nm.txt rename ilex/tests/ui/{goldens/no_xid_after_st.stdout => ambiguous/no_xid_after_st.stderr} (69%) create mode 100644 ilex/tests/ui/ambiguous/no_xid_after_st.txt create mode 100644 ilex/tests/ui/ambiguous/nums.stderr create mode 100644 ilex/tests/ui/ambiguous/nums.txt create mode 100644 ilex/tests/ui/ambiguous/symbols_after_comment.tokens.yaml create mode 100644 ilex/tests/ui/ambiguous/symbols_after_comment.txt create mode 100644 ilex/tests/ui/ambiguous/symbols_after_quoted.tokens.yaml create mode 100644 ilex/tests/ui/ambiguous/symbols_after_quoted.txt delete mode 100644 ilex/tests/ui/digital.rs rename ilex/tests/ui/{goldens/invalid_digits.stdout => digital/invalid.stderr} (62%) create mode 100644 ilex/tests/ui/digital/invalid.txt rename ilex/tests/ui/{goldens/missing_digits.stdout => digital/missing.stderr} (63%) create mode 100644 ilex/tests/ui/digital/missing.txt create mode 100644 ilex/tests/ui/digital/points.stderr create mode 100644 ilex/tests/ui/digital/points.txt create mode 100644 ilex/tests/ui/digital/separators.stderr create mode 100644 ilex/tests/ui/digital/separators.txt delete mode 100644 ilex/tests/ui/eof.rs rename ilex/tests/ui/{goldens/eof_bracket.stdout => eof/bracket.stderr} (64%) create mode 100644 ilex/tests/ui/eof/bracket.txt rename ilex/tests/ui/{goldens/eof_bracket_multiline.stdout => eof/bracket_multiline.stderr} (62%) create mode 100644 ilex/tests/ui/eof/bracket_multiline.txt rename ilex/tests/ui/{goldens/eof_comment.stdout => eof/comment.stderr} (76%) create mode 100644 ilex/tests/ui/eof/comment.txt rename ilex/tests/ui/{goldens/eof_comment_multiline.stdout => eof/comment_multiline.stderr} (59%) create mode 100644 ilex/tests/ui/eof/comment_multiline.txt rename ilex/tests/ui/{goldens/mixed_brackets.stdout => eof/mixed_brackets.stderr} (64%) create mode 100644 ilex/tests/ui/eof/mixed_brackets.txt create mode 100644 ilex/tests/ui/eof/mixed_brackets_multiline.stderr create mode 100644 ilex/tests/ui/eof/mixed_brackets_multiline.txt rename ilex/tests/ui/{goldens/eof_quoted.stdout => eof/quoted.stderr} (64%) create mode 100644 ilex/tests/ui/eof/quoted.txt rename ilex/tests/ui/{goldens/eof_quoted_multiline.stdout => eof/quoted_multiline.stderr} (62%) create mode 100644 ilex/tests/ui/eof/quoted_multiline.txt delete mode 100644 ilex/tests/ui/goldens/ambiguous_nums.stdout delete mode 100644 ilex/tests/ui/goldens/digit_points.stdout delete mode 100644 ilex/tests/ui/goldens/digit_separators.stdout delete mode 100644 ilex/tests/ui/goldens/does_not_exist.stdout delete mode 100644 ilex/tests/ui/goldens/mixed_brackets_multiline.stdout delete mode 100644 ilex/tests/ui/goldens/no_xid_after_br.stdout delete mode 100644 ilex/tests/ui/goldens/no_xid_after_id.stdout delete mode 100644 ilex/tests/ui/goldens/not_utf8.stdout delete mode 100644 ilex/tests/ui/new_file.rs delete mode 100644 ilex/tests/ui/not_utf8 delete mode 100644 ilex/tests/ui/too_small.rs rename ilex/tests/ui/{goldens/cxx_string_tag_too_small.stdout => too_small/cxx_tag.stderr} (73%) create mode 100644 ilex/tests/ui/too_small/cxx_tag.txt rename ilex/tests/ui/{goldens/ident_too_small.stdout => too_small/ident.stderr} (69%) create mode 100644 ilex/tests/ui/too_small/ident.txt rename ilex/tests/ui/{goldens/rust_string_hashes_too_small.stdout => too_small/rust_hashes.stderr} (62%) create mode 100644 ilex/tests/ui/too_small/rust_hashes.txt delete mode 100644 ilex/tests/ui/unrecognized.rs rename ilex/tests/ui/{goldens/unrecognized.stdout => unrecognized/unrecognized.stderr} (62%) create mode 100644 ilex/tests/ui/unrecognized/unrecognized.txt diff --git a/ilex/Cargo.toml b/ilex/Cargo.toml index 2eb82d2..78ac86d 100644 --- a/ilex/Cargo.toml +++ b/ilex/Cargo.toml @@ -12,6 +12,7 @@ license.workspace = true [dependencies] byteyarn = { version = "0.5", path = "../byteyarn" } +gilded = { path = "../gilded" } twie = { version = "0.5", path = "../twie" } ilex-attr = { version = "0.5.0", path = "attr" } @@ -19,6 +20,7 @@ ilex-attr = { version = "0.5.0", path = "attr" } annotate-snippets = "0.10.0" camino = "1.1.6" num-traits = "0.2.17" +ptree = "0.5.2" similar-asserts = "1.5.0" regex-syntax = "0.8.2" regex-automata = "0.4.3" # Bless Andrew for his patience. diff --git a/ilex/src/file/context.rs b/ilex/src/file/context.rs index 5d774e7..b83acf4 100644 --- a/ilex/src/file/context.rs +++ b/ilex/src/file/context.rs @@ -101,6 +101,28 @@ impl Context { self.file(idx).unwrap() } + /// Adds a new file to this source context, validating that it is valid + /// UTF-8. + pub fn new_file_from_bytes<'a>( + &self, + path: impl Into<&'a Utf8Path>, + text: impl Into>, + report: &Report, + ) -> Result { + let path = path.into(); + let text = String::from_utf8(text.into()).map_err(|e| { + let n = e.utf8_error().valid_up_to(); + let b = e.as_bytes()[n]; + + report + .error(f!("input file `{path}` was not valid UTF-8")) + .note(f!("encountered non-UTF-8 byte {b:#02x} at offset {n}")); + report.fatal().unwrap() + })?; + + Ok(self.new_file(path, text)) + } + /// Adds a new file to this source context by opening `name` and reading it /// from the file system. pub fn open_file<'a>( @@ -118,12 +140,7 @@ impl Context { } }; - let Ok(utf8) = String::from_utf8(bytes) else { - report.error(f!("input file `{path}` was not valid UTF-8")); - return report.fatal(); - }; - - Ok(self.new_file(path, utf8)) + self.new_file_from_bytes(path, bytes, report) } /// Gets the `idx`th file in this source context. diff --git a/ilex/src/lib.rs b/ilex/src/lib.rs index 06f2088..2a0c4dc 100644 --- a/ilex/src/lib.rs +++ b/ilex/src/lib.rs @@ -266,7 +266,6 @@ pub mod rule; pub mod testing; pub mod token; -#[cfg(not(test))] pub use { crate::{ file::Context, diff --git a/ilex/src/token/mod.rs b/ilex/src/token/mod.rs index 92c2742..2988559 100644 --- a/ilex/src/token/mod.rs +++ b/ilex/src/token/mod.rs @@ -35,6 +35,7 @@ use crate::Never; use crate::WrongKind; mod stream; +pub mod summary; pub use stream::switch::switch; pub use stream::switch::Switch; diff --git a/ilex/src/token/summary.rs b/ilex/src/token/summary.rs new file mode 100644 index 0000000..459a9ea --- /dev/null +++ b/ilex/src/token/summary.rs @@ -0,0 +1,101 @@ +//! Implementation of `Stream::summary()`. + +use gilded::Doc; + +use crate::file::Span; +use crate::file::Spanned; +use crate::token::Any; +use crate::token::Cursor; +use crate::token::Stream; + +use crate::token::Sign; +use crate::token::Token; + +use super::Content; + +impl Stream<'_> { + /// Returns a string that summarizes the contents of this token stream. + pub fn summary(&self) -> String { + self.cursor().summary().to_string(&Default::default()) + } +} + +impl<'a> Cursor<'a> { + fn summary(&self) -> Doc<'a> { + Doc::new().push({ *self }.map(|token| { + let doc = Doc::new() + .entry("lexeme", token.lexeme().index()) + .entry("span", span2doc(token.span())); + + match token { + Any::Eof(..) => Doc::single("eof", doc), + Any::Keyword(..) => Doc::single("keyword", doc), + Any::Bracket(tok) => Doc::single( + "bracket", + doc + .array("delims", tok.delimiters().into_iter().map(span2doc)) + .entry("contents", tok.contents().summary()), + ), + + Any::Ident(tok) => Doc::single( + "ident", + doc + .entry("prefix", tok.prefix().map(span2doc)) + .entry("suffix", tok.suffix().map(span2doc)) + .entry("name", span2doc(tok.name())), + ), + + Any::Digital(tok) => Doc::single( + "ident", + doc + .entry("prefix", tok.prefix().map(span2doc)) + .entry("suffix", tok.suffix().map(span2doc)) + .entry("radix", tok.radix()) + .entry("sign", tok.sign().map(sign2str)) + .array("blocks", tok.digit_blocks().map(span2doc)) + .array( + "exponents", + tok.exponents().map(|exp| { + Doc::new() + .entry("span", span2doc(exp.span())) + .entry("prefix", exp.prefix().map(span2doc)) + .entry("radix", exp.radix()) + .entry("sign", exp.sign().map(sign2str)) + .array("blocks", exp.digit_blocks().map(span2doc)) + }), + ), + ), + + Any::Quoted(tok) => Doc::single( + "quoted", + doc + .entry("prefix", tok.prefix().map(span2doc)) + .entry("suffix", tok.suffix().map(span2doc)) + .array("delims", tok.delimiters().into_iter().map(span2doc)) + .array( + "contents", + tok.raw_content().map(|c| match c { + Content::Lit(lit) => Doc::single("text", span2doc(lit)), + Content::Esc(esc, data) => Doc::new() + .entry("esc", span2doc(esc)) + .entry("data", data.map(span2doc)), + }), + ), + ), + } + })) + } +} + +fn span2doc(span: Span) -> Doc { + Doc::new() + .array("span", [span.start(), span.end()]) + .entry("text", span.text()) +} + +fn sign2str(s: Sign) -> &'static str { + match s { + Sign::Pos => "+", + Sign::Neg => "-", + } +} diff --git a/ilex/tests/greedy.rs b/ilex/tests/greedy.rs deleted file mode 100644 index 306b2f0..0000000 --- a/ilex/tests/greedy.rs +++ /dev/null @@ -1,52 +0,0 @@ -// This test verifies that lexing is greedy in *most* cases. - -use ilex::rule::*; -use ilex::testing::Matcher; - -#[test] -fn greedy() { - let mut spec = ilex::Spec::builder(); - let rust_like = spec.rule(Quoted::with(Bracket::rust_style( - "#%", - ("poisonous", "["), - ("]", ">"), - ))); - - let cpp_like = spec.rule(Quoted::with(Bracket::cxx_style( - Ident::new(), - ("R\"", "("), - (")", "\""), - ))); - - let array = spec.rule(Bracket::from(("[", "]"))); - let poison = spec.rule(Keyword::new("poison")); - let ident = spec.rule(Ident::new()); - - let spec = spec.compile(); - - let text = r#" - poison - poisonous - poisonous[xyz]> - poisonous#%#%[xyz]#%#%> - poisonous [xyz] - R"cc(some c++)" )cc" - "#; - - let ctx = ilex::Context::new(); - let _u = ctx.use_for_debugging_spans(); - let report = ctx.new_report(); - let tokens = ctx.new_file("test.file", text).lex(&spec, &report).unwrap(); - eprintln!("stream: {tokens:#?}"); - - Matcher::new() - .then1(poison, "poison") - .then1(ident, "poisonous") - .then2(rust_like, ("poisonous[", "]>"), ["xyz"]) - .then2(rust_like, ("poisonous#%#%[", "]#%#%>"), ["xyz"]) - .then1(ident, "poisonous") - .then2(array, ("[", "]"), Matcher::new().then1(ident, "xyz")) - .then2(cpp_like, ("R\"cc(", ")cc\""), ["some c++)\" "]) - .eof() - .assert_matches(&tokens); -} diff --git a/ilex/tests/greedy/greedy.tokens.yaml b/ilex/tests/greedy/greedy.tokens.yaml new file mode 100644 index 0000000..3bfeb32 --- /dev/null +++ b/ilex/tests/greedy/greedy.tokens.yaml @@ -0,0 +1,52 @@ +- keyword: + lexeme: 3 + span: {span: [0, 6], text: "poison"} +- ident: + lexeme: 4 + span: {span: [7, 16], text: "poisonous"} + name: {span: [7, 16], text: "poisonous"} +- quoted: + lexeme: 0 + span: + span: [17, 32] + text: "poisonous[xyz]>" + delims: + - {span: [17, 27], text: "poisonous["} + - {span: [30, 32], text: "]>"} + contents: [{text: {span: [27, 30], text: "xyz"}}] +- quoted: + lexeme: 0 + span: + span: [33, 56] + text: "poisonous#%#%[xyz]#%#%>" + delims: + - {span: [33, 47], text: "poisonous#%#%["} + - {span: [50, 56], text: "]#%#%>"} + contents: [{text: {span: [47, 50], text: "xyz"}}] +- ident: + lexeme: 4 + span: {span: [57, 66], text: "poisonous"} + name: {span: [57, 66], text: "poisonous"} +- bracket: + lexeme: 2 + span: {span: [67, 72], text: "[xyz]"} + delims: + - {span: [67, 68], text: "["} + - {span: [71, 72], text: "]"} + contents: + - ident: + lexeme: 4 + span: {span: [68, 71], text: "xyz"} + name: {span: [68, 71], text: "xyz"} +- quoted: + lexeme: 1 + span: + span: [73, 93] + text: "R\"cc(some c++)\" )cc\"" + delims: + - {span: [73, 78], text: "R\"cc("} + - {span: [89, 93], text: ")cc\""} + contents: [{text: {span: [78, 89], text: "some c++)\" "}}] +- eof: + lexeme: 2147483647 + span: {span: [93, 93], text: ""} diff --git a/ilex/tests/greedy/greedy.txt b/ilex/tests/greedy/greedy.txt new file mode 100644 index 0000000..2c058ce --- /dev/null +++ b/ilex/tests/greedy/greedy.txt @@ -0,0 +1,6 @@ +poison +poisonous +poisonous[xyz]> +poisonous#%#%[xyz]#%#%> +poisonous [xyz] +R"cc(some c++)" )cc" \ No newline at end of file diff --git a/ilex/tests/greedy/main.rs b/ilex/tests/greedy/main.rs new file mode 100644 index 0000000..044cb6e --- /dev/null +++ b/ilex/tests/greedy/main.rs @@ -0,0 +1,50 @@ +use ilex::rule::*; +use ilex::Context; +use ilex::Lexeme; + +#[gilded::test("tests/greedy/*.txt")] +fn greedy(test: &mut gilded::Test) { + // This test verifies that lexing is greedy in *most* cases. + + #[ilex::spec] + struct Greedy { + #[rule(Quoted::with(Bracket::rust_style( + "#%", + ("poisonous", "["), + ("]", ">"), + )))] + rust_like: Lexeme, + + #[rule(Quoted::with(Bracket::cxx_style( + Ident::new(), + ("R\"", "("), + (")", "\""), + )))] + cpp_like: Lexeme, + + #[rule("[", "]")] + array: Lexeme, + + poison: Lexeme, + + #[rule(Ident::new())] + ident: Lexeme, + } + + let ctx = Context::new(); + let report = ctx.new_report(); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + match file.lex(Greedy::get().spec(), &report) { + Ok(stream) => { + test.output("tokens.yaml", stream.summary()); + test.output("stderr", "".into()); + } + Err(fatal) => { + test.output("tokens.yaml", "".into()); + test.output("stderr", format!("{fatal:?}")); + } + } +} diff --git a/ilex/tests/json.rs b/ilex/tests/json.rs deleted file mode 100644 index 567ea89..0000000 --- a/ilex/tests/json.rs +++ /dev/null @@ -1,335 +0,0 @@ -use core::fmt; -use std::fmt::Write; - -use ilex::fp::Fp64; -use ilex::report::Expected; -use ilex::report::Report; -use ilex::rule::*; -use ilex::testing::DigitalMatcher; -use ilex::testing::Matcher; -use ilex::token; -use ilex::token::Content as C; -use ilex::token::Cursor; -use ilex::Lexeme; - -#[ilex::spec] -struct JsonSpec { - #[rule(",")] - comma: Lexeme, - - #[rule(":")] - colon: Lexeme, - - #[rule("true")] - true_: Lexeme, - - #[rule("false")] - false_: Lexeme, - - #[rule("null")] - null: Lexeme, - - #[named] - #[rule("[", "]")] - array: Lexeme, - - #[named] - #[rule("{", "}")] - object: Lexeme, - - #[named] - #[rule(Quoted::new('"') - .invalid_escape(r"\") - .escapes([ - "\\\"", r"\\", r"\/", - r"\b", r"\f", r"\n", r"\t", r"\r", - ]) - .fixed_length_escape(r"\u", 4))] - string: Lexeme, - - #[named] - #[rule(Digital::new(10) - .minus() - .point_limit(0..2) - .exponents(["e", "E"], Digits::new(10).plus().minus()))] - number: Lexeme, -} - -const SOME_JSON: &str = r#" -{ - "keywords": [null, true, false], - "string": "abcdefg", - "number": 42, - "int": 42.0, - "frac": 0.42, - "neg": -42, - "exp": 42e+42, - "nest": { - "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff" - } -} -"#; - -#[test] -fn check_tokens() { - let json = JsonSpec::get(); - let ctx = ilex::Context::new(); - let _u = ctx.use_for_debugging_spans(); - let report = ctx.new_report(); - let tokens = ctx - .new_file("", SOME_JSON) - .lex(json.spec(), &report) - .unwrap(); - eprintln!("stream: {tokens:#?}"); - - Matcher::new() - .then2( - json.object, - ("{", "}"), - Matcher::new() - .then2(json.string, ('"', '"'), ["keywords"]) - .then1(json.colon, ":") - .then2( - json.array, - ("[", "]"), - Matcher::new() - .then1(json.null, "null") - .then1(json.comma, ",") - .then1(json.true_, "true") - .then1(json.comma, ",") - .then1(json.false_, "false"), - ) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["string"]) - .then1(json.colon, ":") - .then2(json.string, ('"', '"'), ["abcdefg"]) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["number"]) - .then1(json.colon, ":") - .then2(json.number, 10, ["42"]) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["int"]) - .then1(json.colon, ":") - .then2(json.number, 10, ["42", "0"]) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["frac"]) - .then1(json.colon, ":") - .then2(json.number, 10, ["0", "42"]) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["neg"]) - .then1(json.colon, ":") - .then1( - json.number, - DigitalMatcher::new(10, ["42"]).sign_span(Sign::Neg, "-"), - ) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["exp"]) - .then1(json.colon, ":") - .then1( - json.number, - DigitalMatcher::new(10, ["42"]) - .exp(10, "e", ["42"]) - .sign_span(Sign::Pos, "+"), - ) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["nest"]) - .then1(json.colon, ":") - .then2( - json.object, - ("{", "}"), - Matcher::new() - .then2(json.string, ('"', '"'), [C::lit("escapes"), C::esc(r"\n")]) - .then1(json.colon, ":") - .then2( - json.string, - ('"', '"'), - [ - C::esc("\\\""), - C::esc(r"\\"), - C::esc(r"\/"), - C::esc(r"\b"), - C::esc(r"\f"), - C::esc(r"\n"), - C::esc(r"\t"), - C::esc(r"\r"), - C::esc_with_data(r"\u", "0000"), - C::esc_with_data(r"\u", "1234"), - C::esc_with_data(r"\u", "ffff"), - ], - ), - ), - ) - .eof() - .assert_matches(&tokens); -} - -#[derive(Clone, Debug, PartialEq)] -enum Json { - Null, - Num(f64), - Bool(bool), - Str(String), - Arr(Vec), - Obj(Vec<(String, Json)>), -} - -#[test] -fn parse_test() { - use similar_asserts::assert_eq; - - let value = parse("null").unwrap(); - assert_eq!(value, Json::Null); - - let value = parse("[null, true, false]").unwrap(); - assert_eq!( - value, - Json::Arr(vec![Json::Null, Json::Bool(true), Json::Bool(false)]) - ); - - let value = parse(SOME_JSON).unwrap(); - assert_eq!( - value, - Json::Obj(vec![ - ( - "keywords".into(), - Json::Arr(vec![Json::Null, Json::Bool(true), Json::Bool(false)]) - ), - ("string".into(), Json::Str("abcdefg".into())), - ("number".into(), Json::Num(42.0)), - ("int".into(), Json::Num(42.0)), - ("frac".into(), Json::Num(0.42)), - ("neg".into(), Json::Num(-42.0)), - ("exp".into(), Json::Num(42e42)), - ( - "nest".into(), - Json::Obj(vec![( - "escapes\n".into(), - Json::Str("\"\\/\u{8}\u{c}\n\t\r\0ሴ\u{ffff}".into()) - )]) - ), - ]) - ); -} - -fn parse(data: &str) -> Result { - struct Error(String); - impl fmt::Debug for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_char('\n')?; - f.write_str(&self.0) - } - } - - let json = JsonSpec::get(); - - let ctx = ilex::Context::new(); - let _u = ctx.use_for_debugging_spans(); - let report = ctx.new_report(); - let stream = ctx - .new_file("", data) - .lex(json.spec(), &report) - .map_err(|e| Error(e.to_string()))?; - let value = parse0(&report, json, &mut stream.cursor()); - - report.fatal_or(value).map_err(|e| Error(e.to_string())) -} - -fn parse0(report: &Report, json: &JsonSpec, cursor: &mut Cursor) -> Json { - let quote2str = |str: token::Quoted| -> String { - str.to_utf8(|key, data, buf| { - let char = match key.text() { - "\\\"" => '\"', - r"\\" => '\\', - r"\/" => '/', - r"\b" => '\x08', - r"\f" => '\x0c', - r"\n" => '\n', - r"\t" => '\t', - r"\r" => '\r', - // This is sloppy about surrogates but this is just an example. - r"\u" => { - let data = data.unwrap(); - let code = - u16::from_str_radix(data.text(), 16).unwrap_or_else(|_| { - report.builtins(json.spec()).expected( - [Expected::Name("hex-encoded u16".into())], - data.text(), - data, - ); - 0 - }); - for c in char::decode_utf16([code]) { - buf.push(c.unwrap_or('đŸ˜ĸ')) - } - return; - } - esc => panic!("{}", esc), - }; - buf.push(char); - }) - }; - - let value = token::switch() - .case(json.null, |_, _| Json::Null) - .case(json.false_, |_, _| Json::Bool(false)) - .case(json.true_, |_, _| Json::Bool(true)) - .case(json.string, |str: token::Quoted, _| Json::Str(quote2str(str))) - .case(json.number, |num: token::Digital, _| { - Json::Num(num.to_float::(.., report).unwrap().to_hard()) - }) - .case(json.array, |array: token::Bracket, _| { - let mut trailing = None; - let vec = array - .contents() - .delimited(json.comma, |c| Some(parse0(report, json, c))) - .map(|(e, c)| { - trailing = c; - e - }) - .collect(); - - if let Some(comma) = trailing { - report - .error("trailing commas are not allowed in JSON") - .saying(comma, "remove this comma"); - } - - Json::Arr(vec) - }) - .case(json.object, |object: token::Bracket, _| { - let mut trailing = None; - let vec = object - .contents() - .delimited(json.comma, |c| { - let key = c - .take(json.string, report) - .map(|q| quote2str(q)) - .unwrap_or("đŸ˜ĸ".into()); - c.take(json.colon, report); - let value = parse0(report, json, c); - Some((key, value)) - }) - .map(|(e, c)| { - trailing = c; - e - }) - .collect(); - - if let Some(comma) = trailing { - report - .error("trailing commas are not allowed in JSON") - .saying(comma, "remove this comma"); - } - - Json::Obj(vec) - }) - .take(cursor, report); - value.unwrap_or(Json::Null) -} diff --git a/ilex/tests/json/array.ast.txt b/ilex/tests/json/array.ast.txt new file mode 100644 index 0000000..c74a872 --- /dev/null +++ b/ilex/tests/json/array.ast.txt @@ -0,0 +1,11 @@ +Arr( + [ + Null, + Bool( + true, + ), + Bool( + false, + ), + ], +) \ No newline at end of file diff --git a/ilex/tests/json/array.json b/ilex/tests/json/array.json new file mode 100644 index 0000000..db2c3f1 --- /dev/null +++ b/ilex/tests/json/array.json @@ -0,0 +1 @@ +[null, true, false] \ No newline at end of file diff --git a/ilex/tests/json/array.tokens.yaml b/ilex/tests/json/array.tokens.yaml new file mode 100644 index 0000000..6886c09 --- /dev/null +++ b/ilex/tests/json/array.tokens.yaml @@ -0,0 +1,27 @@ +- bracket: + lexeme: 5 + span: + span: [0, 19] + text: "[null, true, false]" + delims: + - {span: [0, 1], text: "["} + - {span: [18, 19], text: "]"} + contents: + - keyword: + lexeme: 4 + span: {span: [1, 5], text: "null"} + - keyword: + lexeme: 0 + span: {span: [5, 6], text: ","} + - keyword: + lexeme: 2 + span: {span: [7, 11], text: "true"} + - keyword: + lexeme: 0 + span: {span: [11, 12], text: ","} + - keyword: + lexeme: 3 + span: {span: [13, 18], text: "false"} +- eof: + lexeme: 2147483647 + span: {span: [19, 19], text: ""} diff --git a/ilex/tests/json/main.rs b/ilex/tests/json/main.rs new file mode 100644 index 0000000..56140a5 --- /dev/null +++ b/ilex/tests/json/main.rs @@ -0,0 +1,185 @@ +use ilex::fp::Fp64; +use ilex::report::Expected; +use ilex::report::Report; +use ilex::rule::*; +use ilex::token; +use ilex::token::Cursor; +use ilex::Context; +use ilex::Lexeme; + +#[ilex::spec] +struct JsonSpec { + #[rule(",")] + comma: Lexeme, + + #[rule(":")] + colon: Lexeme, + + #[rule("true")] + true_: Lexeme, + + #[rule("false")] + false_: Lexeme, + + #[rule("null")] + null: Lexeme, + + #[named] + #[rule("[", "]")] + array: Lexeme, + + #[named] + #[rule("{", "}")] + object: Lexeme, + + #[named] + #[rule(Quoted::new('"') + .invalid_escape(r"\") + .escapes([ + "\\\"", r"\\", r"\/", + r"\b", r"\f", r"\n", r"\t", r"\r", + ]) + .fixed_length_escape(r"\u", 4))] + string: Lexeme, + + #[named] + #[rule(Digital::new(10) + .minus() + .point_limit(0..2) + .exponents(["e", "E"], Digits::new(10).plus().minus()))] + number: Lexeme, +} + +#[gilded::test("tests/json/*.json")] +fn check_tokens(test: &mut gilded::Test) { + let ctx = Context::new(); + let report = ctx.new_report(); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + let stream = match file.lex(JsonSpec::get().spec(), &report) { + Ok(stream) => stream, + Err(fatal) => { + test.output("tokens.yaml", "".into()); + test.output("ast.txt", "".into()); + test.output("stderr", format!("{fatal:?}")); + return; + } + }; + + test.output("tokens.yaml", stream.summary()); + + let json = parse(&report, JsonSpec::get(), &mut stream.cursor()); + if let Err(fatal) = report.fatal_or(()) { + test.output("ast.txt", "".into()); + test.output("stderr", format!("{fatal:?}")); + return; + } + + test.output("ast.txt", format!("{json:#?}")); + test.output("stderr", "".into()); +} + +#[derive(Clone, Debug, PartialEq)] +enum Json { + Null, + Num(f64), + Bool(bool), + Str(String), + Arr(Vec), + Obj(Vec<(String, Json)>), +} + +fn parse(report: &Report, json: &JsonSpec, cursor: &mut Cursor) -> Json { + let quote2str = |str: token::Quoted| -> String { + str.to_utf8(|key, data, buf| { + let char = match key.text() { + "\\\"" => '\"', + r"\\" => '\\', + r"\/" => '/', + r"\b" => '\x08', + r"\f" => '\x0c', + r"\n" => '\n', + r"\t" => '\t', + r"\r" => '\r', + // This is sloppy about surrogates but this is just an example. + r"\u" => { + let data = data.unwrap(); + let code = + u16::from_str_radix(data.text(), 16).unwrap_or_else(|_| { + report.builtins(json.spec()).expected( + [Expected::Name("hex-encoded u16".into())], + data.text(), + data, + ); + 0 + }); + for c in char::decode_utf16([code]) { + buf.push(c.unwrap_or('đŸ˜ĸ')) + } + return; + } + esc => panic!("{}", esc), + }; + buf.push(char); + }) + }; + + let value = token::switch() + .case(json.null, |_, _| Json::Null) + .case(json.false_, |_, _| Json::Bool(false)) + .case(json.true_, |_, _| Json::Bool(true)) + .case(json.string, |str: token::Quoted, _| Json::Str(quote2str(str))) + .case(json.number, |num: token::Digital, _| { + Json::Num(num.to_float::(.., report).unwrap().to_hard()) + }) + .case(json.array, |array: token::Bracket, _| { + let mut trailing = None; + let vec = array + .contents() + .delimited(json.comma, |c| Some(parse(report, json, c))) + .map(|(e, c)| { + trailing = c; + e + }) + .collect(); + + if let Some(comma) = trailing { + report + .error("trailing commas are not allowed in JSON") + .saying(comma, "remove this comma"); + } + + Json::Arr(vec) + }) + .case(json.object, |object: token::Bracket, _| { + let mut trailing = None; + let vec = object + .contents() + .delimited(json.comma, |c| { + let key = c + .take(json.string, report) + .map(|q| quote2str(q)) + .unwrap_or("đŸ˜ĸ".into()); + c.take(json.colon, report); + let value = parse(report, json, c); + Some((key, value)) + }) + .map(|(e, c)| { + trailing = c; + e + }) + .collect(); + + if let Some(comma) = trailing { + report + .error("trailing commas are not allowed in JSON") + .saying(comma, "remove this comma"); + } + + Json::Obj(vec) + }) + .take(cursor, report); + value.unwrap_or(Json::Null) +} diff --git a/ilex/tests/json/null.ast.txt b/ilex/tests/json/null.ast.txt new file mode 100644 index 0000000..5f6f79d --- /dev/null +++ b/ilex/tests/json/null.ast.txt @@ -0,0 +1 @@ +Null \ No newline at end of file diff --git a/ilex/tests/json/null.json b/ilex/tests/json/null.json new file mode 100644 index 0000000..ec747fa --- /dev/null +++ b/ilex/tests/json/null.json @@ -0,0 +1 @@ +null \ No newline at end of file diff --git a/ilex/tests/json/null.tokens.yaml b/ilex/tests/json/null.tokens.yaml new file mode 100644 index 0000000..5746ee7 --- /dev/null +++ b/ilex/tests/json/null.tokens.yaml @@ -0,0 +1,6 @@ +- keyword: + lexeme: 4 + span: {span: [0, 4], text: "null"} +- eof: + lexeme: 2147483647 + span: {span: [4, 4], text: ""} diff --git a/ilex/tests/json/obj.ast.txt b/ilex/tests/json/obj.ast.txt new file mode 100644 index 0000000..fe75b3d --- /dev/null +++ b/ilex/tests/json/obj.ast.txt @@ -0,0 +1,67 @@ +Obj( + [ + ( + "keywords", + Arr( + [ + Null, + Bool( + true, + ), + Bool( + false, + ), + ], + ), + ), + ( + "string", + Str( + "abcdefg", + ), + ), + ( + "number", + Num( + 42.0, + ), + ), + ( + "int", + Num( + 42.0, + ), + ), + ( + "frac", + Num( + 0.42, + ), + ), + ( + "neg", + Num( + -42.0, + ), + ), + ( + "exp", + Num( + 4.2e43, + ), + ), + ( + "nest", + Obj( + [ + ( + "escapes\n", + Str( + "\"\\/\u{8}\u{c}\n\t\r\0ሴ\u{ffff}", + ), + ), + ], + ), + ), + ], +) \ No newline at end of file diff --git a/ilex/tests/json/obj.json b/ilex/tests/json/obj.json new file mode 100644 index 0000000..4b0bf1a --- /dev/null +++ b/ilex/tests/json/obj.json @@ -0,0 +1,12 @@ +{ + "keywords": [null, true, false], + "string": "abcdefg", + "number": 42, + "int": 42.0, + "frac": 0.42, + "neg": -42, + "exp": 42e+42, + "nest": { + "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff" + } +} \ No newline at end of file diff --git a/ilex/tests/json/obj.tokens.yaml b/ilex/tests/json/obj.tokens.yaml new file mode 100644 index 0000000..26b25dd --- /dev/null +++ b/ilex/tests/json/obj.tokens.yaml @@ -0,0 +1,245 @@ +- bracket: + lexeme: 6 + span: + span: [0, 209] + text: | + { + "keywords": [null, true, false], + "string": "abcdefg", + "number": 42, + "int": 42.0, + "frac": 0.42, + "neg": -42, + "exp": 42e+42, + "nest": { + "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff" + } + } + delims: + - {span: [0, 1], text: "{"} + - {span: [208, 209], text: "}"} + contents: + - quoted: + lexeme: 7 + span: {span: [4, 14], text: "\"keywords\""} + delims: + - {span: [4, 5], text: "\""} + - {span: [13, 14], text: "\""} + contents: [{text: {span: [5, 13], text: "keywords"}}] + - keyword: + lexeme: 1 + span: {span: [14, 15], text: ":"} + - bracket: + lexeme: 5 + span: + span: [16, 35] + text: "[null, true, false]" + delims: + - {span: [16, 17], text: "["} + - {span: [34, 35], text: "]"} + contents: + - keyword: + lexeme: 4 + span: {span: [17, 21], text: "null"} + - keyword: + lexeme: 0 + span: {span: [21, 22], text: ","} + - keyword: + lexeme: 2 + span: {span: [23, 27], text: "true"} + - keyword: + lexeme: 0 + span: {span: [27, 28], text: ","} + - keyword: + lexeme: 3 + span: {span: [29, 34], text: "false"} + - keyword: + lexeme: 0 + span: {span: [35, 36], text: ","} + - quoted: + lexeme: 7 + span: {span: [39, 47], text: "\"string\""} + delims: + - {span: [39, 40], text: "\""} + - {span: [46, 47], text: "\""} + contents: [{text: {span: [40, 46], text: "string"}}] + - keyword: + lexeme: 1 + span: {span: [47, 48], text: ":"} + - quoted: + lexeme: 7 + span: {span: [49, 58], text: "\"abcdefg\""} + delims: + - {span: [49, 50], text: "\""} + - {span: [57, 58], text: "\""} + contents: [{text: {span: [50, 57], text: "abcdefg"}}] + - keyword: + lexeme: 0 + span: {span: [58, 59], text: ","} + - quoted: + lexeme: 7 + span: {span: [62, 70], text: "\"number\""} + delims: + - {span: [62, 63], text: "\""} + - {span: [69, 70], text: "\""} + contents: [{text: {span: [63, 69], text: "number"}}] + - keyword: + lexeme: 1 + span: {span: [70, 71], text: ":"} + - ident: + lexeme: 8 + span: {span: [72, 74], text: "42"} + radix: 10 + blocks: [{span: [72, 74], text: "42"}] + exponents: [] + - keyword: + lexeme: 0 + span: {span: [74, 75], text: ","} + - quoted: + lexeme: 7 + span: {span: [78, 83], text: "\"int\""} + delims: + - {span: [78, 79], text: "\""} + - {span: [82, 83], text: "\""} + contents: [{text: {span: [79, 82], text: "int"}}] + - keyword: + lexeme: 1 + span: {span: [83, 84], text: ":"} + - ident: + lexeme: 8 + span: {span: [85, 89], text: "42.0"} + radix: 10 + blocks: + - {span: [85, 87], text: "42"} + - {span: [88, 89], text: "0"} + exponents: [] + - keyword: + lexeme: 0 + span: {span: [89, 90], text: ","} + - quoted: + lexeme: 7 + span: {span: [93, 99], text: "\"frac\""} + delims: + - {span: [93, 94], text: "\""} + - {span: [98, 99], text: "\""} + contents: [{text: {span: [94, 98], text: "frac"}}] + - keyword: + lexeme: 1 + span: {span: [99, 100], text: ":"} + - ident: + lexeme: 8 + span: {span: [101, 105], text: "0.42"} + radix: 10 + blocks: + - {span: [101, 102], text: "0"} + - {span: [103, 105], text: "42"} + exponents: [] + - keyword: + lexeme: 0 + span: {span: [105, 106], text: ","} + - quoted: + lexeme: 7 + span: {span: [109, 114], text: "\"neg\""} + delims: + - {span: [109, 110], text: "\""} + - {span: [113, 114], text: "\""} + contents: [{text: {span: [110, 113], text: "neg"}}] + - keyword: + lexeme: 1 + span: {span: [114, 115], text: ":"} + - ident: + lexeme: 8 + span: {span: [116, 119], text: "-42"} + radix: 10 + sign: "-" + blocks: [{span: [117, 119], text: "42"}] + exponents: [] + - keyword: + lexeme: 0 + span: {span: [119, 120], text: ","} + - quoted: + lexeme: 7 + span: {span: [123, 128], text: "\"exp\""} + delims: + - {span: [123, 124], text: "\""} + - {span: [127, 128], text: "\""} + contents: [{text: {span: [124, 127], text: "exp"}}] + - keyword: + lexeme: 1 + span: {span: [128, 129], text: ":"} + - ident: + lexeme: 8 + span: {span: [130, 136], text: "42e+42"} + radix: 10 + blocks: [{span: [130, 132], text: "42"}] + exponents: + - span: {span: [130, 136], text: "42e+42"} + prefix: {span: [132, 133], text: "e"} + radix: 10 + sign: "+" + blocks: [{span: [134, 136], text: "42"}] + - keyword: + lexeme: 0 + span: {span: [136, 137], text: ","} + - quoted: + lexeme: 7 + span: {span: [140, 146], text: "\"nest\""} + delims: + - {span: [140, 141], text: "\""} + - {span: [145, 146], text: "\""} + contents: [{text: {span: [141, 145], text: "nest"}}] + - keyword: + lexeme: 1 + span: {span: [146, 147], text: ":"} + - bracket: + lexeme: 6 + span: + span: [148, 207] + text: | + { + "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff" + } + delims: + - {span: [148, 149], text: "{"} + - {span: [206, 207], text: "}"} + contents: + - quoted: + lexeme: 7 + span: + span: [154, 165] + text: "\"escapes\\n\"" + delims: + - {span: [154, 155], text: "\""} + - {span: [164, 165], text: "\""} + contents: + - text: {span: [155, 162], text: "escapes"} + - {esc: {span: [162, 164], text: "\\n"}} + - keyword: + lexeme: 1 + span: {span: [165, 166], text: ":"} + - quoted: + lexeme: 7 + span: + span: [167, 203] + text: "\"\\\"\\\\\\/\\b\\f\\n\\t\\r\\u0000\\u1234\\uffff\"" + delims: + - {span: [167, 168], text: "\""} + - {span: [202, 203], text: "\""} + contents: + - {esc: {span: [168, 170], text: "\\\""}} + - {esc: {span: [170, 172], text: "\\\\"}} + - {esc: {span: [172, 174], text: "\\/"}} + - {esc: {span: [174, 176], text: "\\b"}} + - {esc: {span: [176, 178], text: "\\f"}} + - {esc: {span: [178, 180], text: "\\n"}} + - {esc: {span: [180, 182], text: "\\t"}} + - {esc: {span: [182, 184], text: "\\r"}} + - esc: {span: [184, 186], text: "\\u"} + data: {span: [186, 190], text: "0000"} + - esc: {span: [190, 192], text: "\\u"} + data: {span: [192, 196], text: "1234"} + - esc: {span: [196, 198], text: "\\u"} + data: {span: [198, 202], text: "ffff"} +- eof: + lexeme: 2147483647 + span: {span: [209, 209], text: ""} diff --git a/ilex/tests/llvm.rs b/ilex/tests/llvm.rs deleted file mode 100644 index 34e2585..0000000 --- a/ilex/tests/llvm.rs +++ /dev/null @@ -1,284 +0,0 @@ -use ilex::rule::*; -use ilex::testing::Matcher; -use ilex::token::Content as C; -use ilex::Lexeme; - -#[ilex::spec] -struct Llvm { - #[rule(";")] - comment: Lexeme, - - #[rule('(', ')')] - parens: Lexeme, - #[rule('[', ']')] - brackets: Lexeme, - #[rule('<', '>')] - vector: Lexeme, - #[rule('{', '}')] - braces: Lexeme, - #[rule("<{", "}>")] - packed: Lexeme, - #[rule("!{", "}")] - meta: Lexeme, - - #[rule(',')] - comma: Lexeme, - #[rule('=')] - equal: Lexeme, - #[rule('*')] - star: Lexeme, - #[rule('x')] - times: Lexeme, - - br: Lexeme, - call: Lexeme, - icmp: Lexeme, - #[rule("eq")] - icmp_eq: Lexeme, - ret: Lexeme, - unreachable: Lexeme, - - constant: Lexeme, - declare: Lexeme, - define: Lexeme, - global: Lexeme, - - label: Lexeme, - null: Lexeme, - ptr: Lexeme, - #[rule(Digital::new(10).prefix("i"))] - int: Lexeme, - void: Lexeme, - - private: Lexeme, - unnamed_addr: Lexeme, - nocapture: Lexeme, - nounwind: Lexeme, - - #[named] - #[rule(Quoted::new('"') - .fixed_length_escape(r"\", 2) - .prefixes(["", "c"]))] - string: Lexeme, - - #[named("identifier")] - #[rule(Ident::new() - .ascii_only() - .extra_starts(".0123456789".chars()) - .suffix(":"))] - label_ident: Lexeme, - - #[named("identifier")] - #[rule(Ident::new() - .ascii_only() - .extra_starts(".0123456789".chars()) - .prefixes(["!", "@", "%"]))] - bare: Lexeme, - - #[named("quoted identifier")] - #[rule(Quoted::new('"') - .fixed_length_escape(r"\", 2) - .prefixes(["!", "@", "%"]))] - quoted: Lexeme, - - #[named = "number"] - #[rule(Digital::new(10) - .minus() - .point_limit(0..2) - .exponents(["e", "E"], Digits::new(10).plus().minus()))] - dec: Lexeme, - - #[named = "number"] - #[rule(Digital::new(16).minus().prefix("0x"))] - hex: Lexeme, -} - -#[test] -fn llvm() { - let text = r#" - ; Declare the string constant as a global constant. - @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00" - - ; External declaration of the puts function - declare i32 @"non trivial name"(ptr nocapture) nounwind - - ; Definition of main function - define i32 @main(i32 %0, ptr %1) { - ; Call puts function to write out the string to stdout. - call i32 @"non trivial name"(ptr @.str) - ret i32 0 - } - - ; Named metadata - !0 = !{i32 42, null, !"string"} - !foo = !{!0} - @glb = global i8 0 - - define void @f(ptr %a) { - %c = icmp eq ptr %a, @glb - br i1 %c, label %BB_EXIT, label %BB_CONTINUE ; escapes %a - BB_EXIT: - call void @exit() - unreachable - BB_CONTINUE: - ret void - } - "#; - - let llvm = Llvm::get(); - let ctx = ilex::Context::new(); - let _u = ctx.use_for_debugging_spans(); - let report = ctx.new_report(); - let tokens = ctx - .new_file("test.file", text) - .lex(llvm.spec(), &report) - .unwrap(); - eprintln!("stream: {tokens:#?}"); - - Matcher::new() - .prefix1(llvm.bare, "@", ".str") - .comments(["; Declare the string constant as a global constant.\n"]) - .then1(llvm.equal, "=") - .then1(llvm.private, "private") - .then1(llvm.unnamed_addr, "unnamed_addr") - .then1(llvm.constant, "constant") - .then2( - llvm.brackets, - ("[", "]"), - Matcher::new() - .then2(llvm.dec, 10, ["13"]) - .then1(llvm.times, "x") - .prefix2(llvm.int, "i", 10, ["8"]), - ) - .prefix2( - llvm.string, - "c", - ('"', '"'), - [ - C::lit("hello world"), - C::esc_with_data(r"\", "0A"), - C::esc_with_data(r"\", "00"), - ], - ) - // - .then1(llvm.declare, "declare") - .comments(["; External declaration of the puts function\n"]) - .prefix2(llvm.int, "i", 10, ["32"]) - .prefix2(llvm.quoted, "@", ('"', '"'), ["non trivial name"]) - .then2( - llvm.parens, - ("(", ")"), - Matcher::new() - .then1(llvm.ptr, "ptr") - .then1(llvm.nocapture, "nocapture"), - ) - .then1(llvm.nounwind, "nounwind") - // - .then1(llvm.define, "define") - .comments(["; Definition of main function\n"]) - .prefix2(llvm.int, "i", 10, ["32"]) - .prefix1(llvm.bare, "@", "main") - .then2( - llvm.parens, - ("(", ")"), - Matcher::new() - .prefix2(llvm.int, "i", 10, ["32"]) - .prefix1(llvm.bare, "%", "0") - .then1(llvm.comma, ",") - .then1(llvm.ptr, "ptr") - .prefix1(llvm.bare, "%", "1"), - ) - .then2( - llvm.braces, - ("{", "}"), - Matcher::new() - .then1(llvm.call, "call") - .comments(["; Call puts function to write out the string to stdout.\n"]) - .prefix2(llvm.int, "i", 10, ["32"]) - .prefix2(llvm.quoted, "@", ('"', '"'), ["non trivial name"]) - .then2( - llvm.parens, - ("(", ")"), - Matcher::new() - .then1(llvm.ptr, "ptr") - .prefix1(llvm.bare, "@", ".str"), - ) - .then1(llvm.ret, "ret") - .prefix2(llvm.int, "i", 10, ["32"]) - .then2(llvm.dec, 10, ["0"]), - ) - // - .prefix1(llvm.bare, "!", "0") - .comments(["; Named metadata\n"]) - .then1(llvm.equal, "=") - .then2( - llvm.meta, - ("!{", "}"), - Matcher::new() - .prefix2(llvm.dec, "i", 10, ["32"]) - .then2(llvm.dec, 10, ["42"]) - .then1(llvm.comma, ",") - .then1(llvm.null, "null") - .then1(llvm.comma, ",") - .prefix2(llvm.quoted, "!", ('"', '"'), ["string"]), - ) - .prefix1(llvm.bare, "!", "foo") - .then1(llvm.equal, "=") - .then2(llvm.meta, ("!{", "}"), Matcher::new().prefix1(llvm.bare, "!", "0")) - // - .prefix1(llvm.bare, "@", "glb") - .then1(llvm.equal, "=") - .then1(llvm.global, "global") - .prefix2(llvm.int, "i", 10, ["8"]) - .then2(llvm.dec, 10, ["0"]) - // - .then1(llvm.define, "define") - .then1(llvm.void, "void") - .prefix1(llvm.bare, "@", "f") - .then2( - llvm.parens, - ("(", ")"), - Matcher::new() - .then1(llvm.ptr, "ptr") - .prefix1(llvm.bare, "%", "a"), - ) - .then2( - llvm.braces, - ("{", "}"), - Matcher::new() - .prefix1(llvm.bare, "%", "c") - .then1(llvm.equal, "=") - .then1(llvm.icmp, "icmp") - .then1(llvm.icmp_eq, "eq") - .then1(llvm.ptr, "ptr") - .prefix1(llvm.bare, "%", "a") - .then1(llvm.comma, ",") - .prefix1(llvm.bare, "@", "glb") - // - .then1(llvm.br, "br") - .prefix2(llvm.int, "i", 10, ["1"]) - .prefix1(llvm.bare, "%", "c") - .then1(llvm.comma, ",") - .then1(llvm.label, "label") - .prefix1(llvm.bare, "%", "BB_EXIT") - .then1(llvm.comma, ",") - .then1(llvm.label, "label") - .prefix1(llvm.bare, "%", "BB_CONTINUE") - // - .suffix1(llvm.label_ident, "BB_EXIT", ":") - .comments(["; escapes %a\n"]) - // - .then1(llvm.call, "call") - .then1(llvm.void, "void") - .prefix1(llvm.bare, "@", "exit") - .then2(llvm.parens, ("(", ")"), Matcher::new()) - // - .then1(llvm.unreachable, "unreachable") - // - .suffix1(llvm.label_ident, "BB_CONTINUE", ":") - .then1(llvm.ret, "ret") - .then1(llvm.void, "void"), - ) - .eof() - .assert_matches(&tokens) -} diff --git a/ilex/tests/llvm/main.rs b/ilex/tests/llvm/main.rs new file mode 100644 index 0000000..478a3c9 --- /dev/null +++ b/ilex/tests/llvm/main.rs @@ -0,0 +1,113 @@ +use ilex::rule::*; +use ilex::Context; +use ilex::Lexeme; + +#[ilex::spec] +struct Llvm { + #[rule(";")] + comment: Lexeme, + + #[rule('(', ')')] + parens: Lexeme, + #[rule('[', ']')] + brackets: Lexeme, + #[rule('<', '>')] + vector: Lexeme, + #[rule('{', '}')] + braces: Lexeme, + #[rule("<{", "}>")] + packed: Lexeme, + #[rule("!{", "}")] + meta: Lexeme, + + #[rule(',')] + comma: Lexeme, + #[rule('=')] + equal: Lexeme, + #[rule('*')] + star: Lexeme, + #[rule('x')] + times: Lexeme, + + br: Lexeme, + call: Lexeme, + icmp: Lexeme, + #[rule("eq")] + icmp_eq: Lexeme, + ret: Lexeme, + unreachable: Lexeme, + + constant: Lexeme, + declare: Lexeme, + define: Lexeme, + global: Lexeme, + + label: Lexeme, + null: Lexeme, + ptr: Lexeme, + #[rule(Digital::new(10).prefix("i"))] + int: Lexeme, + void: Lexeme, + + private: Lexeme, + unnamed_addr: Lexeme, + nocapture: Lexeme, + nounwind: Lexeme, + + #[named] + #[rule(Quoted::new('"') + .fixed_length_escape(r"\", 2) + .prefixes(["", "c"]))] + string: Lexeme, + + #[named("identifier")] + #[rule(Ident::new() + .ascii_only() + .extra_starts(".0123456789".chars()) + .suffix(":"))] + label_ident: Lexeme, + + #[named("identifier")] + #[rule(Ident::new() + .ascii_only() + .extra_starts(".0123456789".chars()) + .prefixes(["!", "@", "%"]))] + bare: Lexeme, + + #[named("quoted identifier")] + #[rule(Quoted::new('"') + .fixed_length_escape(r"\", 2) + .prefixes(["!", "@", "%"]))] + quoted: Lexeme, + + #[named = "number"] + #[rule(Digital::new(10) + .minus() + .point_limit(0..2) + .exponents(["e", "E"], Digits::new(10).plus().minus()))] + dec: Lexeme, + + #[named = "number"] + #[rule(Digital::new(16).minus().prefix("0x"))] + hex: Lexeme, +} + +#[gilded::test("tests/llvm/*.ll")] +fn llvm(test: &mut gilded::Test) { + let ctx = Context::new(); + let report = ctx.new_report(); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + match file.lex(Llvm::get().spec(), &report) { + Ok(stream) => { + test.output("tokens.yaml", stream.summary()); + test.output("stderr", "".into()); + } + Err(fatal) => { + test.output("tokens.yaml", "".into()); + test.output("stderr", format!("{fatal:?}")); + } + } +} diff --git a/ilex/tests/llvm/smoke.ll b/ilex/tests/llvm/smoke.ll new file mode 100644 index 0000000..089954e --- /dev/null +++ b/ilex/tests/llvm/smoke.ll @@ -0,0 +1,27 @@ +; Declare the string constant as a global constant. +@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00" + +; External declaration of the puts function +declare i32 @"non trivial name"(ptr nocapture) nounwind + +; Definition of main function +define i32 @main(i32 %0, ptr %1) { + ; Call puts function to write out the string to stdout. + call i32 @"non trivial name"(ptr @.str) + ret i32 0 +} + +; Named metadata +!0 = !{i32 42, null, !"string"} +!foo = !{!0} +@glb = global i8 0 + +define void @f(ptr %a) { + %c = icmp eq ptr %a, @glb + br i1 %c, label %BB_EXIT, label %BB_CONTINUE ; escapes %a +BB_EXIT: + call void @exit() + unreachable +BB_CONTINUE: + ret void +} \ No newline at end of file diff --git a/ilex/tests/llvm/smoke.tokens.yaml b/ilex/tests/llvm/smoke.tokens.yaml new file mode 100644 index 0000000..3400539 --- /dev/null +++ b/ilex/tests/llvm/smoke.tokens.yaml @@ -0,0 +1,453 @@ +- ident: + lexeme: 32 + span: {span: [52, 57], text: "@.str"} + prefix: {span: [52, 53], text: "@"} + name: {span: [53, 57], text: ".str"} +- keyword: + lexeme: 8 + span: {span: [58, 59], text: "="} +- keyword: + lexeme: 26 + span: {span: [60, 67], text: "private"} +- keyword: + lexeme: 27 + span: {span: [68, 80], text: "unnamed_addr"} +- keyword: + lexeme: 17 + span: {span: [81, 89], text: "constant"} +- bracket: + lexeme: 2 + span: {span: [90, 99], text: "[13 x i8]"} + delims: + - {span: [90, 91], text: "["} + - {span: [98, 99], text: "]"} + contents: + - ident: + lexeme: 34 + span: {span: [91, 93], text: "13"} + radix: 10 + blocks: [{span: [91, 93], text: "13"}] + exponents: [] + - keyword: + lexeme: 10 + span: {span: [94, 95], text: "x"} + - ident: + lexeme: 24 + span: {span: [96, 98], text: "i8"} + prefix: {span: [96, 97], text: "i"} + radix: 10 + blocks: [{span: [97, 98], text: "8"}] + exponents: [] +- quoted: + lexeme: 30 + span: + span: [100, 120] + text: "c\"hello world\\0A\\00\"" + prefix: {span: [100, 101], text: "c"} + delims: + - {span: [101, 102], text: "\""} + - {span: [119, 120], text: "\""} + contents: + - text: {span: [102, 113], text: "hello world"} + - esc: {span: [113, 114], text: "\\"} + data: {span: [114, 116], text: "0A"} + - esc: {span: [116, 117], text: "\\"} + data: {span: [117, 119], text: "00"} +- keyword: + lexeme: 18 + span: {span: [166, 173], text: "declare"} +- ident: + lexeme: 24 + span: {span: [174, 177], text: "i32"} + prefix: {span: [174, 175], text: "i"} + radix: 10 + blocks: [{span: [175, 177], text: "32"}] + exponents: [] +- quoted: + lexeme: 33 + span: + span: [178, 197] + text: "@\"non trivial name\"" + prefix: {span: [178, 179], text: "@"} + delims: + - {span: [179, 180], text: "\""} + - {span: [196, 197], text: "\""} + contents: + - text: + span: [180, 196] + text: "non trivial name" +- bracket: + lexeme: 1 + span: + span: [197, 212] + text: "(ptr nocapture)" + delims: + - {span: [197, 198], text: "("} + - {span: [211, 212], text: ")"} + contents: + - keyword: + lexeme: 23 + span: {span: [198, 201], text: "ptr"} + - keyword: + lexeme: 28 + span: {span: [202, 211], text: "nocapture"} +- keyword: + lexeme: 29 + span: {span: [213, 221], text: "nounwind"} +- keyword: + lexeme: 19 + span: {span: [253, 259], text: "define"} +- ident: + lexeme: 24 + span: {span: [260, 263], text: "i32"} + prefix: {span: [260, 261], text: "i"} + radix: 10 + blocks: [{span: [261, 263], text: "32"}] + exponents: [] +- ident: + lexeme: 32 + span: {span: [264, 269], text: "@main"} + prefix: {span: [264, 265], text: "@"} + name: {span: [265, 269], text: "main"} +- bracket: + lexeme: 1 + span: + span: [269, 285] + text: "(i32 %0, ptr %1)" + delims: + - {span: [269, 270], text: "("} + - {span: [284, 285], text: ")"} + contents: + - ident: + lexeme: 24 + span: {span: [270, 273], text: "i32"} + prefix: {span: [270, 271], text: "i"} + radix: 10 + blocks: [{span: [271, 273], text: "32"}] + exponents: [] + - ident: + lexeme: 32 + span: {span: [274, 276], text: "%0"} + prefix: {span: [274, 275], text: "%"} + name: {span: [275, 276], text: "0"} + - keyword: + lexeme: 7 + span: {span: [276, 277], text: ","} + - keyword: + lexeme: 23 + span: {span: [278, 281], text: "ptr"} + - ident: + lexeme: 32 + span: {span: [282, 284], text: "%1"} + prefix: {span: [282, 283], text: "%"} + name: {span: [283, 284], text: "1"} +- bracket: + lexeme: 4 + span: + span: [286, 401] + text: | + { + ; Call puts function to write out the string to stdout. + call i32 @"non trivial name"(ptr @.str) + ret i32 0 + } + delims: + - {span: [286, 287], text: "{"} + - {span: [400, 401], text: "}"} + contents: + - keyword: + lexeme: 12 + span: {span: [348, 352], text: "call"} + - ident: + lexeme: 24 + span: {span: [353, 356], text: "i32"} + prefix: {span: [353, 354], text: "i"} + radix: 10 + blocks: [{span: [354, 356], text: "32"}] + exponents: [] + - quoted: + lexeme: 33 + span: + span: [357, 376] + text: "@\"non trivial name\"" + prefix: {span: [357, 358], text: "@"} + delims: + - {span: [358, 359], text: "\""} + - {span: [375, 376], text: "\""} + contents: + - text: + span: [359, 375] + text: "non trivial name" + - bracket: + lexeme: 1 + span: {span: [376, 387], text: "(ptr @.str)"} + delims: + - {span: [376, 377], text: "("} + - {span: [386, 387], text: ")"} + contents: + - keyword: + lexeme: 23 + span: {span: [377, 380], text: "ptr"} + - ident: + lexeme: 32 + span: {span: [381, 386], text: "@.str"} + prefix: {span: [381, 382], text: "@"} + name: {span: [382, 386], text: ".str"} + - keyword: + lexeme: 15 + span: {span: [390, 393], text: "ret"} + - ident: + lexeme: 24 + span: {span: [394, 397], text: "i32"} + prefix: {span: [394, 395], text: "i"} + radix: 10 + blocks: [{span: [395, 397], text: "32"}] + exponents: [] + - ident: + lexeme: 34 + span: {span: [398, 399], text: "0"} + radix: 10 + blocks: [{span: [398, 399], text: "0"}] + exponents: [] +- ident: + lexeme: 32 + span: {span: [420, 422], text: "!0"} + prefix: {span: [420, 421], text: "!"} + name: {span: [421, 422], text: "0"} +- keyword: + lexeme: 8 + span: {span: [423, 424], text: "="} +- bracket: + lexeme: 6 + span: + span: [425, 451] + text: "!{i32 42, null, !\"string\"}" + delims: + - {span: [425, 427], text: "!{"} + - {span: [450, 451], text: "}"} + contents: + - ident: + lexeme: 24 + span: {span: [427, 430], text: "i32"} + prefix: {span: [427, 428], text: "i"} + radix: 10 + blocks: [{span: [428, 430], text: "32"}] + exponents: [] + - ident: + lexeme: 34 + span: {span: [431, 433], text: "42"} + radix: 10 + blocks: [{span: [431, 433], text: "42"}] + exponents: [] + - keyword: + lexeme: 7 + span: {span: [433, 434], text: ","} + - keyword: + lexeme: 22 + span: {span: [435, 439], text: "null"} + - keyword: + lexeme: 7 + span: {span: [439, 440], text: ","} + - quoted: + lexeme: 33 + span: {span: [441, 450], text: "!\"string\""} + prefix: {span: [441, 442], text: "!"} + delims: + - {span: [442, 443], text: "\""} + - {span: [449, 450], text: "\""} + contents: [{text: {span: [443, 449], text: "string"}}] +- ident: + lexeme: 32 + span: {span: [452, 456], text: "!foo"} + prefix: {span: [452, 453], text: "!"} + name: {span: [453, 456], text: "foo"} +- keyword: + lexeme: 8 + span: {span: [457, 458], text: "="} +- bracket: + lexeme: 6 + span: {span: [459, 464], text: "!{!0}"} + delims: + - {span: [459, 461], text: "!{"} + - {span: [463, 464], text: "}"} + contents: + - ident: + lexeme: 32 + span: {span: [461, 463], text: "!0"} + prefix: {span: [461, 462], text: "!"} + name: {span: [462, 463], text: "0"} +- ident: + lexeme: 32 + span: {span: [465, 469], text: "@glb"} + prefix: {span: [465, 466], text: "@"} + name: {span: [466, 469], text: "glb"} +- keyword: + lexeme: 8 + span: {span: [470, 471], text: "="} +- keyword: + lexeme: 20 + span: {span: [472, 478], text: "global"} +- ident: + lexeme: 24 + span: {span: [479, 481], text: "i8"} + prefix: {span: [479, 480], text: "i"} + radix: 10 + blocks: [{span: [480, 481], text: "8"}] + exponents: [] +- ident: + lexeme: 34 + span: {span: [482, 483], text: "0"} + radix: 10 + blocks: [{span: [482, 483], text: "0"}] + exponents: [] +- keyword: + lexeme: 19 + span: {span: [485, 491], text: "define"} +- keyword: + lexeme: 25 + span: {span: [492, 496], text: "void"} +- ident: + lexeme: 32 + span: {span: [497, 499], text: "@f"} + prefix: {span: [497, 498], text: "@"} + name: {span: [498, 499], text: "f"} +- bracket: + lexeme: 1 + span: {span: [499, 507], text: "(ptr %a)"} + delims: + - {span: [499, 500], text: "("} + - {span: [506, 507], text: ")"} + contents: + - keyword: + lexeme: 23 + span: {span: [500, 503], text: "ptr"} + - ident: + lexeme: 32 + span: {span: [504, 506], text: "%a"} + prefix: {span: [504, 505], text: "%"} + name: {span: [505, 506], text: "a"} +- bracket: + lexeme: 4 + span: + span: [508, 666] + text: | + { + %c = icmp eq ptr %a, @glb + br i1 %c, label %BB_EXIT, label %BB_CONTINUE ; escapes %a + BB_EXIT: + call void @exit() + unreachable + BB_CONTINUE: + ret void + } + delims: + - {span: [508, 509], text: "{"} + - {span: [665, 666], text: "}"} + contents: + - ident: + lexeme: 32 + span: {span: [512, 514], text: "%c"} + prefix: {span: [512, 513], text: "%"} + name: {span: [513, 514], text: "c"} + - keyword: + lexeme: 8 + span: {span: [515, 516], text: "="} + - keyword: + lexeme: 13 + span: {span: [517, 521], text: "icmp"} + - keyword: + lexeme: 14 + span: {span: [522, 524], text: "eq"} + - keyword: + lexeme: 23 + span: {span: [525, 528], text: "ptr"} + - ident: + lexeme: 32 + span: {span: [529, 531], text: "%a"} + prefix: {span: [529, 530], text: "%"} + name: {span: [530, 531], text: "a"} + - keyword: + lexeme: 7 + span: {span: [531, 532], text: ","} + - ident: + lexeme: 32 + span: {span: [533, 537], text: "@glb"} + prefix: {span: [533, 534], text: "@"} + name: {span: [534, 537], text: "glb"} + - keyword: + lexeme: 11 + span: {span: [540, 542], text: "br"} + - ident: + lexeme: 24 + span: {span: [543, 545], text: "i1"} + prefix: {span: [543, 544], text: "i"} + radix: 10 + blocks: [{span: [544, 545], text: "1"}] + exponents: [] + - ident: + lexeme: 32 + span: {span: [546, 548], text: "%c"} + prefix: {span: [546, 547], text: "%"} + name: {span: [547, 548], text: "c"} + - keyword: + lexeme: 7 + span: {span: [548, 549], text: ","} + - keyword: + lexeme: 21 + span: {span: [550, 555], text: "label"} + - ident: + lexeme: 32 + span: {span: [556, 564], text: "%BB_EXIT"} + prefix: {span: [556, 557], text: "%"} + name: {span: [557, 564], text: "BB_EXIT"} + - keyword: + lexeme: 7 + span: {span: [564, 565], text: ","} + - keyword: + lexeme: 21 + span: {span: [566, 571], text: "label"} + - ident: + lexeme: 32 + span: {span: [572, 584], text: "%BB_CONTINUE"} + prefix: {span: [572, 573], text: "%"} + name: {span: [573, 584], text: "BB_CONTINUE"} + - ident: + lexeme: 31 + span: {span: [598, 606], text: "BB_EXIT:"} + suffix: {span: [605, 606], text: ":"} + name: {span: [598, 605], text: "BB_EXIT"} + - keyword: + lexeme: 12 + span: {span: [609, 613], text: "call"} + - keyword: + lexeme: 25 + span: {span: [614, 618], text: "void"} + - ident: + lexeme: 32 + span: {span: [619, 624], text: "@exit"} + prefix: {span: [619, 620], text: "@"} + name: {span: [620, 624], text: "exit"} + - bracket: + lexeme: 1 + span: {span: [624, 626], text: "()"} + delims: + - {span: [624, 625], text: "("} + - {span: [625, 626], text: ")"} + contents: [] + - keyword: + lexeme: 16 + span: {span: [629, 640], text: "unreachable"} + - ident: + lexeme: 31 + span: {span: [641, 653], text: "BB_CONTINUE:"} + suffix: {span: [652, 653], text: ":"} + name: {span: [641, 652], text: "BB_CONTINUE"} + - keyword: + lexeme: 15 + span: {span: [656, 659], text: "ret"} + - keyword: + lexeme: 25 + span: {span: [660, 664], text: "void"} +- eof: + lexeme: 2147483647 + span: {span: [666, 666], text: ""} diff --git a/ilex/tests/numbers.rs b/ilex/tests/numbers/main.rs similarity index 62% rename from ilex/tests/numbers.rs rename to ilex/tests/numbers/main.rs index 97ec5d7..715ac31 100644 --- a/ilex/tests/numbers.rs +++ b/ilex/tests/numbers/main.rs @@ -1,6 +1,8 @@ use ilex::fp::Fp64; +use ilex::report::Report; use ilex::rule::*; use ilex::token; +use ilex::Context; use ilex::Lexeme; #[ilex::spec] @@ -54,48 +56,49 @@ struct Numbers { dec: Lexeme, } -#[test] -fn numbers() { - let lex = Numbers::get(); - let text = r#" - 0, - -00, - -0.0, - 123.456e78, - 9e9, - -9e9, - +9e+9, - 9e-9, - -0777, - 0o777, - %1210, - 0b0.0000000101, - 0o0.0024, - 0O1.01p01, - 0xfff.eep+10, - $DEADBEEF, - -0q0123.0123, - 3^a, - "#; - - let ctx = ilex::Context::new(); - let _u = ctx.use_for_debugging_spans(); +#[gilded::test("tests/numbers/*.txt")] +fn numbers(test: &mut gilded::Test) { + let ctx = Context::new(); let report = ctx.new_report(); - let tokens = ctx - .new_file("test.file", text) - .lex(lex.spec(), &report) + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) .unwrap(); - eprintln!("stream: {tokens:#?}"); - let mut cursor = tokens.cursor(); + match file.lex(Numbers::get().spec(), &report) { + Ok(stream) => { + test.output("tokens.yaml", stream.summary()); + match parse(Numbers::get(), stream.cursor(), &report) { + Ok(v) => { + test.output("fp64.txt", format!("{v:#?}")); + test.output("stderr", "".into()) + } + Err(fatal) => { + test.output("fp64.txt", "".into()); + test.output("stderr", format!("{fatal:?}")); + } + } + } + Err(fatal) => { + test.output("tokens.yaml", "".into()); + test.output("stderr", format!("{fatal:?}")); + test.output("fp64.txt", "".into()); + } + } +} + +fn parse( + lex: &Numbers, + mut cursor: ilex::token::Cursor, + report: &Report, +) -> Result, ilex::report::Fatal> { let numbers = cursor .delimited(lex.comma, |cursor| loop { let value = token::switch() .case(Lexeme::eof(), |_, _| Err(false)) .cases([lex.dec, lex.bin, lex.oct, lex.hex, lex.qua], |num, _| { - Ok(num.to_float::(.., &report).unwrap()) + Ok(num.to_float::(.., report).unwrap()) }) - .take(cursor, &report); + .take(cursor, report); match value { None => { cursor.back_up(1); @@ -108,33 +111,6 @@ fn numbers() { }) .map(|(v, _)| v) .collect::>(); - cursor.expect_finished(&report); - report.fatal_or(()).unwrap(); - - assert_eq!( - numbers, - [ - "0", - "-0", - "-0", - "123.456e78", - "9e9", - "-9e9", - "9e9", - "9e-9", - "-511", - "511", - "4", - "0.0048828125", - "0.0048828125", - "2.03125", - "4194232", - "3735928559", - "-27.10546875", - "3e10", - ] - .into_iter() - .map(Fp64::new) - .collect::>() - ); + cursor.expect_finished(report); + report.fatal_or(numbers) } diff --git a/ilex/tests/numbers/numbers.fp64.txt b/ilex/tests/numbers/numbers.fp64.txt new file mode 100644 index 0000000..19b5cdd --- /dev/null +++ b/ilex/tests/numbers/numbers.fp64.txt @@ -0,0 +1,20 @@ +[ + 0.0, + -0.0, + -0.0, + 1.23456e+80, + 9.0e+9, + -9.0e+9, + 9.0e+9, + 8.9999999999999995e-9, + -511.0, + 511.0, + 4.0, + 0.0048828125, + 0.0048828125, + 2.03125, + 4194232.0, + 3735928559.0, + -27.10546875, + 3.0e+10, +] \ No newline at end of file diff --git a/ilex/tests/numbers/numbers.tokens.yaml b/ilex/tests/numbers/numbers.tokens.yaml new file mode 100644 index 0000000..0a3cfe9 --- /dev/null +++ b/ilex/tests/numbers/numbers.tokens.yaml @@ -0,0 +1,234 @@ +- ident: + lexeme: 5 + span: {span: [0, 1], text: "0"} + radix: 10 + blocks: [{span: [0, 1], text: "0"}] + exponents: [] +- keyword: + lexeme: 0 + span: {span: [1, 2], text: ","} +- ident: + lexeme: 4 + span: {span: [3, 6], text: "-00"} + prefix: {span: [3, 4], text: "-"} + radix: 8 + sign: "-" + blocks: [{span: [5, 6], text: "0"}] + exponents: [] +- keyword: + lexeme: 0 + span: {span: [6, 7], text: ","} +- ident: + lexeme: 5 + span: {span: [8, 12], text: "-0.0"} + radix: 10 + sign: "-" + blocks: + - {span: [9, 10], text: "0"} + - {span: [11, 12], text: "0"} + exponents: [] +- keyword: + lexeme: 0 + span: {span: [12, 13], text: ","} +- ident: + lexeme: 5 + span: {span: [14, 24], text: "123.456e78"} + radix: 10 + blocks: + - {span: [14, 17], text: "123"} + - {span: [18, 21], text: "456"} + exponents: + - span: {span: [14, 24], text: "123.456e78"} + prefix: {span: [21, 22], text: "e"} + radix: 10 + blocks: [{span: [22, 24], text: "78"}] +- keyword: + lexeme: 0 + span: {span: [24, 25], text: ","} +- ident: + lexeme: 5 + span: {span: [26, 29], text: "9e9"} + radix: 10 + blocks: [{span: [26, 27], text: "9"}] + exponents: + - span: {span: [26, 29], text: "9e9"} + prefix: {span: [27, 28], text: "e"} + radix: 10 + blocks: [{span: [28, 29], text: "9"}] +- keyword: + lexeme: 0 + span: {span: [29, 30], text: ","} +- ident: + lexeme: 5 + span: {span: [31, 35], text: "-9e9"} + radix: 10 + sign: "-" + blocks: [{span: [32, 33], text: "9"}] + exponents: + - span: {span: [31, 35], text: "-9e9"} + prefix: {span: [33, 34], text: "e"} + radix: 10 + blocks: [{span: [34, 35], text: "9"}] +- keyword: + lexeme: 0 + span: {span: [35, 36], text: ","} +- ident: + lexeme: 5 + span: {span: [37, 42], text: "+9e+9"} + radix: 10 + sign: "+" + blocks: [{span: [38, 39], text: "9"}] + exponents: + - span: {span: [37, 42], text: "+9e+9"} + prefix: {span: [39, 40], text: "e"} + radix: 10 + sign: "+" + blocks: [{span: [41, 42], text: "9"}] +- keyword: + lexeme: 0 + span: {span: [42, 43], text: ","} +- ident: + lexeme: 5 + span: {span: [44, 48], text: "9e-9"} + radix: 10 + blocks: [{span: [44, 45], text: "9"}] + exponents: + - span: {span: [44, 48], text: "9e-9"} + prefix: {span: [45, 46], text: "e"} + radix: 10 + sign: "-" + blocks: [{span: [47, 48], text: "9"}] +- keyword: + lexeme: 0 + span: {span: [48, 49], text: ","} +- ident: + lexeme: 4 + span: {span: [50, 55], text: "-0777"} + prefix: {span: [50, 51], text: "-"} + radix: 8 + sign: "-" + blocks: [{span: [52, 55], text: "777"}] + exponents: [] +- keyword: + lexeme: 0 + span: {span: [55, 56], text: ","} +- ident: + lexeme: 4 + span: {span: [57, 62], text: "0o777"} + prefix: {span: [57, 59], text: "0o"} + radix: 8 + blocks: [{span: [59, 62], text: "777"}] + exponents: [] +- keyword: + lexeme: 0 + span: {span: [62, 63], text: ","} +- ident: + lexeme: 1 + span: {span: [64, 69], text: "%1210"} + prefix: {span: [64, 65], text: "%"} + radix: 2 + blocks: [{span: [65, 66], text: "1"}] + exponents: + - span: {span: [64, 69], text: "%1210"} + prefix: {span: [66, 67], text: "2"} + radix: 2 + blocks: [{span: [67, 69], text: "10"}] +- keyword: + lexeme: 0 + span: {span: [69, 70], text: ","} +- ident: + lexeme: 1 + span: {span: [71, 85], text: "0b0.0000000101"} + prefix: {span: [71, 73], text: "0b"} + radix: 2 + blocks: + - {span: [73, 74], text: "0"} + - {span: [75, 85], text: "0000000101"} + exponents: [] +- keyword: + lexeme: 0 + span: {span: [85, 86], text: ","} +- ident: + lexeme: 4 + span: {span: [87, 95], text: "0o0.0024"} + prefix: {span: [87, 89], text: "0o"} + radix: 8 + blocks: + - {span: [89, 90], text: "0"} + - {span: [91, 95], text: "0024"} + exponents: [] +- keyword: + lexeme: 0 + span: {span: [95, 96], text: ","} +- ident: + lexeme: 4 + span: {span: [97, 106], text: "0O1.01p01"} + prefix: {span: [97, 99], text: "0O"} + radix: 8 + blocks: + - {span: [99, 100], text: "1"} + - {span: [101, 103], text: "01"} + exponents: + - span: {span: [97, 106], text: "0O1.01p01"} + prefix: {span: [103, 104], text: "p"} + radix: 10 + blocks: [{span: [104, 106], text: "01"}] +- keyword: + lexeme: 0 + span: {span: [106, 107], text: ","} +- ident: + lexeme: 2 + span: {span: [108, 120], text: "0xfff.eep+10"} + prefix: {span: [108, 110], text: "0x"} + radix: 16 + blocks: + - {span: [110, 113], text: "fff"} + - {span: [114, 116], text: "ee"} + exponents: + - span: {span: [108, 120], text: "0xfff.eep+10"} + prefix: {span: [116, 117], text: "p"} + radix: 10 + sign: "+" + blocks: [{span: [118, 120], text: "10"}] +- keyword: + lexeme: 0 + span: {span: [120, 121], text: ","} +- ident: + lexeme: 2 + span: {span: [122, 131], text: "$DEADBEEF"} + prefix: {span: [122, 123], text: "$"} + radix: 16 + blocks: [{span: [123, 131], text: "DEADBEEF"}] + exponents: [] +- keyword: + lexeme: 0 + span: {span: [131, 132], text: ","} +- ident: + lexeme: 3 + span: {span: [133, 145], text: "-0q0123.0123"} + prefix: {span: [133, 135], text: "-0"} + radix: 4 + sign: "-" + blocks: + - {span: [136, 140], text: "0123"} + - {span: [141, 145], text: "0123"} + exponents: [] +- keyword: + lexeme: 0 + span: {span: [145, 146], text: ","} +- ident: + lexeme: 5 + span: {span: [147, 150], text: "3^a"} + radix: 10 + blocks: [{span: [147, 148], text: "3"}] + exponents: + - span: {span: [147, 150], text: "3^a"} + prefix: {span: [148, 149], text: "^"} + radix: 16 + blocks: [{span: [149, 150], text: "a"}] +- keyword: + lexeme: 0 + span: {span: [150, 151], text: ","} +- eof: + lexeme: 2147483647 + span: {span: [151, 151], text: ""} diff --git a/ilex/tests/numbers/numbers.txt b/ilex/tests/numbers/numbers.txt new file mode 100644 index 0000000..6a9cd42 --- /dev/null +++ b/ilex/tests/numbers/numbers.txt @@ -0,0 +1,18 @@ +0, +-00, +-0.0, +123.456e78, +9e9, +-9e9, ++9e+9, +9e-9, +-0777, +0o777, +%1210, +0b0.0000000101, +0o0.0024, +0O1.01p01, +0xfff.eep+10, +$DEADBEEF, +-0q0123.0123, +3^a, \ No newline at end of file diff --git a/ilex/tests/ui/ambiguous.rs b/ilex/tests/ui/ambiguous.rs deleted file mode 100644 index bf61deb..0000000 --- a/ilex/tests/ui/ambiguous.rs +++ /dev/null @@ -1,176 +0,0 @@ -use ilex::rule::*; -use ilex::testing; -use ilex::Context; -use ilex::Lexeme; - -#[ilex::spec] -struct Spec { - #[rule("null")] - kw: Lexeme, - #[rule("-null")] - kw2: Lexeme, - #[rule(")null")] - kw3: Lexeme, - - #[rule(Comment::nesting(Bracket::rust_style( - "/", - ("-", ""), - ("", "-"), - )))] - cm: Lexeme, - #[rule(Comment::nesting(Bracket::cxx_style( - Ident::new().min_len(1), - ("--", ""), - ("", ""), - )))] - cm2: Lexeme, - #[rule(Bracket::cxx_style( - Ident::new(), - ("$", "["), - ("]", ""), - ))] - br: Lexeme, - #[rule(Ident::new() - .prefix("/") - .suffixes(["", "%q", "/"]))] - id: Lexeme, - #[rule(Digital::new(10) - .prefixes(["", "%"]) - .suffixes(["", "%", "q", "/"]))] - nm: Lexeme, - #[rule(Quoted::new("'") - .prefixes(["%", "q"]) - .suffixes(["", "%", "q"]))] - st: Lexeme, - #[rule(Quoted::with(Bracket::cxx_style( - Ident::new(), - ("q", "("), - (")", ""), - )))] - st2: Lexeme, -} - -#[test] -fn no_xid_after_kw() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "null nullable") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/no_xid_after_kw.stdout"); -} - -#[test] -fn no_xid_after_br() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "$[] $null[]null $null[]nullable") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/no_xid_after_br.stdout"); -} - -#[test] -fn no_xid_after_cm() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - "--null some stuff null --null some more stuff nullnull", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/no_xid_after_cm.stdout"); -} - -#[test] -fn no_xid_after_id() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "/foo%q /null%q /foo%qua") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/no_xid_after_id.stdout"); -} - -#[test] -fn no_xid_after_nm() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "%123 %123qua") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/no_xid_after_nm.stdout"); -} - -#[test] -fn no_xid_after_st() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "q'xyz'q %'xyz'qua") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/no_xid_after_st.stdout"); -} - -#[test] -fn ambiguous_idents() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "/foo/bar/") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/ambiguous_idents.stdout"); -} - -#[test] -fn ambiguous_nums() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "1234%1234 1234/xyz") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/ambiguous_nums.stdout"); -} - -#[test] -fn symbols_after_comment() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - // Below, we expect -/ more comment /- to lex correctly, then lex a - // -null and a null, even though if it wasn't a comment, it would be - // ambiguous, because `--null null`` is also a valid comment. - "-/ comment /- null -/ more comment /--null null", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report_ok(&report); -} - -#[test] -fn symbols_after_quoted() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - // Below, we expect to lex a single quoted, even though `a]null` is a - // keyword. This is because searching for ambiguities stops just shy of - // the '. - "qnull(a)null", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report_ok(&report); -} diff --git a/ilex/tests/ui/goldens/ambiguous_idents.stdout b/ilex/tests/ui/ambiguous/idents.stderr similarity index 69% rename from ilex/tests/ui/goldens/ambiguous_idents.stdout rename to ilex/tests/ui/ambiguous/idents.stderr index cc36eb6..bb169ff 100644 --- a/ilex/tests/ui/goldens/ambiguous_idents.stdout +++ b/ilex/tests/ui/ambiguous/idents.stderr @@ -1,25 +1,28 @@ error: unexpected `b` in `/`-suffixed number - --> :1:6 + --> ambiguous/idents.txt:1:6 | 1 | /foo/bar/ | ^ | --- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `a` in `/`-suffixed number - --> :1:7 + --> ambiguous/idents.txt:1:7 | 1 | /foo/bar/ | ^ | --- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `r` in `/`-suffixed number - --> :1:8 + --> ambiguous/idents.txt:1:8 | 1 | /foo/bar/ | ^ | --- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: aborting due to 3 errors diff --git a/ilex/tests/ui/ambiguous/idents.txt b/ilex/tests/ui/ambiguous/idents.txt new file mode 100644 index 0000000..f2f1d82 --- /dev/null +++ b/ilex/tests/ui/ambiguous/idents.txt @@ -0,0 +1 @@ +/foo/bar/ diff --git a/ilex/tests/ui/ambiguous/no_xid_after_br.stderr b/ilex/tests/ui/ambiguous/no_xid_after_br.stderr new file mode 100644 index 0000000..29382cb --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_br.stderr @@ -0,0 +1,9 @@ +error: unexpected closing `able` + --> ambiguous/no_xid_after_br.txt:1:28 + | +1 | $[] $null[]null $null[]nullable + | ^^^^ expected to be opened by `--able` + | + = note: reported at: ilex/src/rt/emit2.rs:254:22 + +error: aborting due to previous error diff --git a/ilex/tests/ui/ambiguous/no_xid_after_br.txt b/ilex/tests/ui/ambiguous/no_xid_after_br.txt new file mode 100644 index 0000000..d680ddc --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_br.txt @@ -0,0 +1 @@ +$[] $null[]null $null[]nullable diff --git a/ilex/tests/ui/goldens/no_xid_after_cm.stdout b/ilex/tests/ui/ambiguous/no_xid_after_cm.stderr similarity index 77% rename from ilex/tests/ui/goldens/no_xid_after_cm.stdout rename to ilex/tests/ui/ambiguous/no_xid_after_cm.stderr index 4025d78..b9181e7 100644 --- a/ilex/tests/ui/goldens/no_xid_after_cm.stdout +++ b/ilex/tests/ui/ambiguous/no_xid_after_cm.stderr @@ -1,9 +1,10 @@ error: extraneous characters after `--null ... null` - --> :1:51 + --> ambiguous/no_xid_after_cm.txt:1:51 | 1 | --null some stuff null --null some more stuff nullnull | ^^^^ | -- help: maybe you meant to include a space here | + = note: reported at: ilex/src/rt/emit2.rs:779:10 error: aborting due to previous error diff --git a/ilex/tests/ui/ambiguous/no_xid_after_cm.txt b/ilex/tests/ui/ambiguous/no_xid_after_cm.txt new file mode 100644 index 0000000..84d7a9a --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_cm.txt @@ -0,0 +1 @@ +--null some stuff null --null some more stuff nullnull \ No newline at end of file diff --git a/ilex/tests/ui/ambiguous/no_xid_after_id.stderr b/ilex/tests/ui/ambiguous/no_xid_after_id.stderr new file mode 100644 index 0000000..7a15292 --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_id.stderr @@ -0,0 +1,9 @@ +error: unexpected closing `ua` + --> ambiguous/no_xid_after_id.txt:1:22 + | +1 | /foo%q /null%q /foo%qua + | ^^ expected to be opened by `--ua` + | + = note: reported at: ilex/src/rt/emit2.rs:254:22 + +error: aborting due to previous error diff --git a/ilex/tests/ui/ambiguous/no_xid_after_id.txt b/ilex/tests/ui/ambiguous/no_xid_after_id.txt new file mode 100644 index 0000000..c5ddb88 --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_id.txt @@ -0,0 +1 @@ +/foo%q /null%q /foo%qua diff --git a/ilex/tests/ui/goldens/no_xid_after_kw.stdout b/ilex/tests/ui/ambiguous/no_xid_after_kw.stderr similarity index 63% rename from ilex/tests/ui/goldens/no_xid_after_kw.stdout rename to ilex/tests/ui/ambiguous/no_xid_after_kw.stderr index 83c0803..9a3a571 100644 --- a/ilex/tests/ui/goldens/no_xid_after_kw.stdout +++ b/ilex/tests/ui/ambiguous/no_xid_after_kw.stderr @@ -1,8 +1,9 @@ error: unexpected closing `nullable` - --> :1:6 + --> ambiguous/no_xid_after_kw.txt:1:6 | 1 | null nullable | ^^^^^^^^ expected to be opened by `--nullable` | + = note: reported at: ilex/src/rt/emit2.rs:254:22 error: aborting due to previous error diff --git a/ilex/tests/ui/ambiguous/no_xid_after_kw.txt b/ilex/tests/ui/ambiguous/no_xid_after_kw.txt new file mode 100644 index 0000000..65ed97a --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_kw.txt @@ -0,0 +1 @@ +null nullable diff --git a/ilex/tests/ui/goldens/no_xid_after_nm.stdout b/ilex/tests/ui/ambiguous/no_xid_after_nm.stderr similarity index 68% rename from ilex/tests/ui/goldens/no_xid_after_nm.stdout rename to ilex/tests/ui/ambiguous/no_xid_after_nm.stderr index 788aa50..8456863 100644 --- a/ilex/tests/ui/goldens/no_xid_after_nm.stdout +++ b/ilex/tests/ui/ambiguous/no_xid_after_nm.stderr @@ -1,25 +1,28 @@ error: unexpected `q` in `%`-prefixed number - --> :1:10 + --> ambiguous/no_xid_after_nm.txt:1:10 | 1 | %123 %123qua | ^ | ------- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `u` in `%`-prefixed number - --> :1:11 + --> ambiguous/no_xid_after_nm.txt:1:11 | 1 | %123 %123qua | ^ | ------- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `a` in `%`-prefixed number - --> :1:12 + --> ambiguous/no_xid_after_nm.txt:1:12 | 1 | %123 %123qua | ^ | ------- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: aborting due to 3 errors diff --git a/ilex/tests/ui/ambiguous/no_xid_after_nm.txt b/ilex/tests/ui/ambiguous/no_xid_after_nm.txt new file mode 100644 index 0000000..9023903 --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_nm.txt @@ -0,0 +1 @@ +%123 %123qua diff --git a/ilex/tests/ui/goldens/no_xid_after_st.stdout b/ilex/tests/ui/ambiguous/no_xid_after_st.stderr similarity index 69% rename from ilex/tests/ui/goldens/no_xid_after_st.stdout rename to ilex/tests/ui/ambiguous/no_xid_after_st.stderr index 1183322..d136573 100644 --- a/ilex/tests/ui/goldens/no_xid_after_st.stdout +++ b/ilex/tests/ui/ambiguous/no_xid_after_st.stderr @@ -1,9 +1,10 @@ error: extraneous characters after `%'...'q` - --> :1:16 + --> ambiguous/no_xid_after_st.txt:1:16 | 1 | q'xyz'q %'xyz'qua | ^^ | -- help: maybe you meant to include a space here | + = note: reported at: ilex/src/rt/emit2.rs:779:10 error: aborting due to previous error diff --git a/ilex/tests/ui/ambiguous/no_xid_after_st.txt b/ilex/tests/ui/ambiguous/no_xid_after_st.txt new file mode 100644 index 0000000..517c131 --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_st.txt @@ -0,0 +1 @@ +q'xyz'q %'xyz'qua \ No newline at end of file diff --git a/ilex/tests/ui/ambiguous/nums.stderr b/ilex/tests/ui/ambiguous/nums.stderr new file mode 100644 index 0000000..de384cd --- /dev/null +++ b/ilex/tests/ui/ambiguous/nums.stderr @@ -0,0 +1,9 @@ +error: unexpected closing `xyz` + --> ambiguous/nums.txt:1:16 + | +1 | 1234%1234 1234/xyz + | ^^^ expected to be opened by `--xyz` + | + = note: reported at: ilex/src/rt/emit2.rs:254:22 + +error: aborting due to previous error diff --git a/ilex/tests/ui/ambiguous/nums.txt b/ilex/tests/ui/ambiguous/nums.txt new file mode 100644 index 0000000..9b7ae1f --- /dev/null +++ b/ilex/tests/ui/ambiguous/nums.txt @@ -0,0 +1 @@ +1234%1234 1234/xyz diff --git a/ilex/tests/ui/ambiguous/symbols_after_comment.tokens.yaml b/ilex/tests/ui/ambiguous/symbols_after_comment.tokens.yaml new file mode 100644 index 0000000..e9a7299 --- /dev/null +++ b/ilex/tests/ui/ambiguous/symbols_after_comment.tokens.yaml @@ -0,0 +1,12 @@ +- keyword: + lexeme: 0 + span: {span: [14, 18], text: "null"} +- keyword: + lexeme: 1 + span: {span: [37, 42], text: "-null"} +- keyword: + lexeme: 0 + span: {span: [43, 47], text: "null"} +- eof: + lexeme: 2147483647 + span: {span: [48, 48], text: ""} diff --git a/ilex/tests/ui/ambiguous/symbols_after_comment.txt b/ilex/tests/ui/ambiguous/symbols_after_comment.txt new file mode 100644 index 0000000..f382698 --- /dev/null +++ b/ilex/tests/ui/ambiguous/symbols_after_comment.txt @@ -0,0 +1 @@ +-/ comment /- null -/ more comment /--null null diff --git a/ilex/tests/ui/ambiguous/symbols_after_quoted.tokens.yaml b/ilex/tests/ui/ambiguous/symbols_after_quoted.tokens.yaml new file mode 100644 index 0000000..e4c73d1 --- /dev/null +++ b/ilex/tests/ui/ambiguous/symbols_after_quoted.tokens.yaml @@ -0,0 +1,10 @@ +- quoted: + lexeme: 9 + span: {span: [0, 12], text: "qnull(a)null"} + delims: + - {span: [0, 6], text: "qnull("} + - {span: [7, 12], text: ")null"} + contents: [{text: {span: [6, 7], text: "a"}}] +- eof: + lexeme: 2147483647 + span: {span: [13, 13], text: ""} diff --git a/ilex/tests/ui/ambiguous/symbols_after_quoted.txt b/ilex/tests/ui/ambiguous/symbols_after_quoted.txt new file mode 100644 index 0000000..e0b43b3 --- /dev/null +++ b/ilex/tests/ui/ambiguous/symbols_after_quoted.txt @@ -0,0 +1 @@ +qnull(a)null diff --git a/ilex/tests/ui/digital.rs b/ilex/tests/ui/digital.rs deleted file mode 100644 index 0698517..0000000 --- a/ilex/tests/ui/digital.rs +++ /dev/null @@ -1,161 +0,0 @@ -use ilex::rule::*; -use ilex::testing; -use ilex::Context; -use ilex::Lexeme; - -#[test] -fn digit_points() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -1/2/3e4/5 -1/2/3/4e4/5 -1/2e4/5 -1/2/3e4/5/6 -1/2/3e4 - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/digit_points.stdout"); -} - -#[test] -fn digit_separators() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -all_ok@_123_._456_e_789_._012_ -no_prefix@_123_._456_e_789_._012_ -no_suffix@_123_._456_e_789_._012_ -no_point@_123_._456_e_789_._012_ -no_exp@_123_._456_e_789_._012_ - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/digit_separators.stdout"); -} - -#[test] -fn missing_digits() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -0xdeadbeef -0x 0xf - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/missing_digits.stdout"); -} - -#[test] -fn invalid_digits() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -0o777 -0o8 -0o08 -0/0/aa11g - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/invalid_digits.stdout"); -} - -#[ilex::spec] -struct Spec { - #[rule(Digital::new(16).prefix("0x"))] - m1: Lexeme, - #[rule(Digital::new(8).prefix("0o"))] - m2: Lexeme, - - #[rule( Digital::new(10) - .point_limit(2..3) - .point('/') - .exponent("e", Digits::new(10).point_limit(1..2)) - .separator_with("_", - SeparatorCornerCases { - prefix: true, - suffix: true, - around_point: true, - around_exp: true, - }))] - m0: Lexeme, - #[rule(Digital::new(10) - .prefix("all_ok@") - .point_limit(0..3) - .exponent("e", Digits::new(10).point_limit(0..3)) - .separator_with("_", - SeparatorCornerCases { - prefix: true, - suffix: true, - around_point: true, - around_exp: true, - }))] - n0: Lexeme, - #[rule( Digital::new(10) - .prefix("no_prefix@") - .point_limit(0..3) - .exponent("e", Digits::new(10).point_limit(0..3)) - .separator_with("_", - SeparatorCornerCases { - prefix: false, - suffix: true, - around_point: true, - around_exp: true, - }))] - n1: Lexeme, - #[rule(Digital::new(10) - .prefix("no_suffix@") - .point_limit(0..3) - .exponent("e", Digits::new(10).point_limit(0..3)) - .separator_with("_", - SeparatorCornerCases { - prefix: true, - suffix: false, - around_point: true, - around_exp: true, - }))] - n2: Lexeme, - #[rule( Digital::new(10) - .prefix("no_point@") - .point_limit(0..3) - .exponent("e", Digits::new(10).point_limit(0..3)) - .separator_with("_", - SeparatorCornerCases { - prefix: true, - suffix: true, - around_point: false, - around_exp: true, - }))] - n3: Lexeme, - #[rule(Digital::new(10) - .prefix("no_exp@") - .point_limit(0..3) - .exponent("e", Digits::new(10).point_limit(0..3)) - .separator_with("_", - SeparatorCornerCases { - prefix: true, - suffix: true, - around_point: true, - around_exp: false, - }))] - n4: Lexeme, -} diff --git a/ilex/tests/ui/goldens/invalid_digits.stdout b/ilex/tests/ui/digital/invalid.stderr similarity index 62% rename from ilex/tests/ui/goldens/invalid_digits.stdout rename to ilex/tests/ui/digital/invalid.stderr index d18e9f9..50b3962 100644 --- a/ilex/tests/ui/goldens/invalid_digits.stdout +++ b/ilex/tests/ui/digital/invalid.stderr @@ -1,41 +1,46 @@ error: unexpected `8` in `0o`-prefixed number - --> :3:3 + --> digital/invalid.txt:2:3 | -3 | 0o8 +2 | 0o8 | ^ | --- help: because this value is octal (base 8), digits should be within '0'..='7' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `8` in `0o`-prefixed number - --> :4:4 + --> digital/invalid.txt:3:4 | -4 | 0o08 +3 | 0o08 | ^ | ---- help: because this value is octal (base 8), digits should be within '0'..='7' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `a` in number - --> :5:5 + --> digital/invalid.txt:4:5 | -5 | 0/0/aa11g +4 | 0/0/aa11g | ^ | --------- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `a` in number - --> :5:6 + --> digital/invalid.txt:4:6 | -5 | 0/0/aa11g +4 | 0/0/aa11g | ^ | --------- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `g` in number - --> :5:9 + --> digital/invalid.txt:4:9 | -5 | 0/0/aa11g +4 | 0/0/aa11g | ^ | --------- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: aborting due to 5 errors diff --git a/ilex/tests/ui/digital/invalid.txt b/ilex/tests/ui/digital/invalid.txt new file mode 100644 index 0000000..a425653 --- /dev/null +++ b/ilex/tests/ui/digital/invalid.txt @@ -0,0 +1,4 @@ +0o777 +0o8 +0o08 +0/0/aa11g diff --git a/ilex/tests/ui/goldens/missing_digits.stdout b/ilex/tests/ui/digital/missing.stderr similarity index 63% rename from ilex/tests/ui/goldens/missing_digits.stdout rename to ilex/tests/ui/digital/missing.stderr index 7898be4..ae4fa47 100644 --- a/ilex/tests/ui/goldens/missing_digits.stdout +++ b/ilex/tests/ui/digital/missing.stderr @@ -1,9 +1,10 @@ error: expected digits after `0x`, but found ` ` - --> :3:3 + --> digital/missing.txt:2:3 | -3 | 0x 0xf +2 | 0x 0xf | ^ expected digits after `0x` | ^^ because of this prefix | + = note: reported at: ilex/src/rt/emit2.rs:540:18 error: aborting due to previous error diff --git a/ilex/tests/ui/digital/missing.txt b/ilex/tests/ui/digital/missing.txt new file mode 100644 index 0000000..dd9d6d3 --- /dev/null +++ b/ilex/tests/ui/digital/missing.txt @@ -0,0 +1,2 @@ +0xdeadbeef +0x 0xf diff --git a/ilex/tests/ui/digital/points.stderr b/ilex/tests/ui/digital/points.stderr new file mode 100644 index 0000000..02acd80 --- /dev/null +++ b/ilex/tests/ui/digital/points.stderr @@ -0,0 +1,49 @@ +error: expected at least 2 `/`s + --> digital/points.txt:2:7 + | +2 | 1/2/3/4e4/5 + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:523:16 + +error: unrecognized character + --> digital/points.txt:2:6 + | +2 | 1/2/3/4e4/5 + | ^ + | + = note: reported at: ilex/src/rt/mod.rs:36:8 + +error: expected at least 2 `/`s + --> digital/points.txt:3:1 + | +3 | 1/2e4/5 + | ^^^ + | + = note: reported at: ilex/src/rt/emit2.rs:523:16 + +error: expected at least 2 `/`s + --> digital/points.txt:4:11 + | +4 | 1/2/3e4/5/6 + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:523:16 + +error: unrecognized character + --> digital/points.txt:4:10 + | +4 | 1/2/3e4/5/6 + | ^ + | + = note: reported at: ilex/src/rt/mod.rs:36:8 + +error: expected at least 1 `/` + --> digital/points.txt:5:6 + | +5 | 1/2/3e4 + | ^^ + | + = note: reported at: ilex/src/rt/emit2.rs:523:16 + +error: aborting due to 6 errors diff --git a/ilex/tests/ui/digital/points.txt b/ilex/tests/ui/digital/points.txt new file mode 100644 index 0000000..b423a24 --- /dev/null +++ b/ilex/tests/ui/digital/points.txt @@ -0,0 +1,5 @@ +1/2/3e4/5 +1/2/3/4e4/5 +1/2e4/5 +1/2/3e4/5/6 +1/2/3e4 \ No newline at end of file diff --git a/ilex/tests/ui/digital/separators.stderr b/ilex/tests/ui/digital/separators.stderr new file mode 100644 index 0000000..ae70934 --- /dev/null +++ b/ilex/tests/ui/digital/separators.stderr @@ -0,0 +1,65 @@ +error: unexpected digit separator in `no_prefix@`-prefixed number + --> digital/separators.txt:2:11 + | +2 | no_prefix@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:387:36 + +error: unexpected digit separator in `no_suffix@`-prefixed number + --> digital/separators.txt:3:33 + | +3 | no_suffix@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:474:28 + +error: unexpected digit separator in `no_point@`-prefixed number + --> digital/separators.txt:4:15 + | +4 | no_point@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:404:32 + +error: unexpected digit separator in `no_point@`-prefixed number + --> digital/separators.txt:4:16 + | +4 | no_point@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:387:36 + +error: unexpected digit separator in `no_point@`-prefixed number + --> digital/separators.txt:4:27 + | +4 | no_point@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:404:32 + +error: unexpected digit separator in `no_point@`-prefixed number + --> digital/separators.txt:4:28 + | +4 | no_point@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:387:36 + +error: unexpected digit separator in `no_exp@`-prefixed number + --> digital/separators.txt:5:19 + | +5 | no_exp@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:424:34 + +error: unexpected digit separator in `no_exp@`-prefixed number + --> digital/separators.txt:5:20 + | +5 | no_exp@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:387:36 + +error: aborting due to 8 errors diff --git a/ilex/tests/ui/digital/separators.txt b/ilex/tests/ui/digital/separators.txt new file mode 100644 index 0000000..cc63efb --- /dev/null +++ b/ilex/tests/ui/digital/separators.txt @@ -0,0 +1,5 @@ +all_ok@_123_._456_e_789_._012_ +no_prefix@_123_._456_e_789_._012_ +no_suffix@_123_._456_e_789_._012_ +no_point@_123_._456_e_789_._012_ +no_exp@_123_._456_e_789_._012_ \ No newline at end of file diff --git a/ilex/tests/ui/eof.rs b/ilex/tests/ui/eof.rs deleted file mode 100644 index b254930..0000000 --- a/ilex/tests/ui/eof.rs +++ /dev/null @@ -1,157 +0,0 @@ -use ilex::rule::*; -use ilex::testing; -use ilex::Context; -use ilex::Lexeme; - -#[ilex::spec] -struct Spec { - #[rule("/*", "*/")] - c1: Lexeme, - - #[rule("[", "]")] - b1: Lexeme, - - #[rule("(", ")")] - b2: Lexeme, - - #[rule(Quoted::new("'"))] - q1: Lexeme, -} - -#[test] -fn eof_comment() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "/* ok /* nested */ */ /* /* not ok */") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/eof_comment.stdout"); -} - -#[test] -fn eof_comment_multiline() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -/* ok - /* nested */ */ -/* - /* not ok */ - - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report( - &report, - "tests/ui/goldens/eof_comment_multiline.stdout", - ); -} - -#[test] -fn eof_bracket() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "[[[]]] [[]") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/eof_bracket.stdout"); -} - -#[test] -fn eof_bracket_multiline() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -[ - [] -][ - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report( - &report, - "tests/ui/goldens/eof_bracket_multiline.stdout", - ); -} - -#[test] -fn eof_quoted() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "'foo' '' 'bar") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/eof_quoted.stdout"); -} - -#[test] -fn eof_quoted_multiline() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -'foo' -'' -'bar - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report( - &report, - "tests/ui/goldens/eof_quoted_multiline.stdout", - ); -} - -#[test] -fn mixed_brackets() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "[] () [) (] [(])") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/mixed_brackets.stdout"); -} - -#[test] -fn mixed_brackets_multiline() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -[ - () -] -[ - ( - ] -) -[ - ) - ( -] - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report( - &report, - "tests/ui/goldens/mixed_brackets_multiline.stdout", - ); -} diff --git a/ilex/tests/ui/goldens/eof_bracket.stdout b/ilex/tests/ui/eof/bracket.stderr similarity index 64% rename from ilex/tests/ui/goldens/eof_bracket.stdout rename to ilex/tests/ui/eof/bracket.stderr index 33a4b53..ea31961 100644 --- a/ilex/tests/ui/goldens/eof_bracket.stdout +++ b/ilex/tests/ui/eof/bracket.stderr @@ -1,9 +1,10 @@ error: expected closing `]`, but found - --> :1:11 + --> eof/bracket.txt:1:11 | -1 | [[[]]] [[] +1 | [[[]]] [[] | ^ expected `]` here | - help: previously opened here | + = note: reported at: ilex/src/rt/lexer.rs:311:10 error: aborting due to previous error diff --git a/ilex/tests/ui/eof/bracket.txt b/ilex/tests/ui/eof/bracket.txt new file mode 100644 index 0000000..b2e2a9c --- /dev/null +++ b/ilex/tests/ui/eof/bracket.txt @@ -0,0 +1 @@ +[[[]]] [[] diff --git a/ilex/tests/ui/goldens/eof_bracket_multiline.stdout b/ilex/tests/ui/eof/bracket_multiline.stderr similarity index 62% rename from ilex/tests/ui/goldens/eof_bracket_multiline.stdout rename to ilex/tests/ui/eof/bracket_multiline.stderr index 203c2f2..9d0148b 100644 --- a/ilex/tests/ui/goldens/eof_bracket_multiline.stdout +++ b/ilex/tests/ui/eof/bracket_multiline.stderr @@ -1,9 +1,10 @@ error: expected closing `]`, but found - --> :4:3 + --> eof/bracket_multiline.txt:3:3 | -4 | ][ +3 | ][ | ^ expected `]` here | - help: previously opened here | + = note: reported at: ilex/src/rt/lexer.rs:311:10 error: aborting due to previous error diff --git a/ilex/tests/ui/eof/bracket_multiline.txt b/ilex/tests/ui/eof/bracket_multiline.txt new file mode 100644 index 0000000..9435e2c --- /dev/null +++ b/ilex/tests/ui/eof/bracket_multiline.txt @@ -0,0 +1,3 @@ +[ + [] +][ \ No newline at end of file diff --git a/ilex/tests/ui/goldens/eof_comment.stdout b/ilex/tests/ui/eof/comment.stderr similarity index 76% rename from ilex/tests/ui/goldens/eof_comment.stdout rename to ilex/tests/ui/eof/comment.stderr index ef1288a..eebb64f 100644 --- a/ilex/tests/ui/goldens/eof_comment.stdout +++ b/ilex/tests/ui/eof/comment.stderr @@ -1,9 +1,10 @@ error: expected closing `*/`, but found - --> :1:38 + --> eof/comment.txt:1:38 | 1 | /* ok /* nested */ */ /* /* not ok */ | ^ expected `*/` here | -- help: previously opened here | + = note: reported at: ilex/src/rt/emit2.rs:306:14 error: aborting due to previous error diff --git a/ilex/tests/ui/eof/comment.txt b/ilex/tests/ui/eof/comment.txt new file mode 100644 index 0000000..815cac1 --- /dev/null +++ b/ilex/tests/ui/eof/comment.txt @@ -0,0 +1 @@ +/* ok /* nested */ */ /* /* not ok */ \ No newline at end of file diff --git a/ilex/tests/ui/goldens/eof_comment_multiline.stdout b/ilex/tests/ui/eof/comment_multiline.stderr similarity index 59% rename from ilex/tests/ui/goldens/eof_comment_multiline.stdout rename to ilex/tests/ui/eof/comment_multiline.stderr index 88bcdcd..01a1c69 100644 --- a/ilex/tests/ui/goldens/eof_comment_multiline.stdout +++ b/ilex/tests/ui/eof/comment_multiline.stderr @@ -1,10 +1,11 @@ error: expected closing `*/`, but found - --> :5:15 + --> eof/comment_multiline.txt:4:15 | -4 | /* +3 | /* | -- help: previously opened here -5 | /* not ok */ +4 | /* not ok */ | ^ expected `*/` here | + = note: reported at: ilex/src/rt/emit2.rs:306:14 error: aborting due to previous error diff --git a/ilex/tests/ui/eof/comment_multiline.txt b/ilex/tests/ui/eof/comment_multiline.txt new file mode 100644 index 0000000..d1f7ed4 --- /dev/null +++ b/ilex/tests/ui/eof/comment_multiline.txt @@ -0,0 +1,4 @@ +/* ok + /* nested */ */ +/* + /* not ok */ diff --git a/ilex/tests/ui/goldens/mixed_brackets.stdout b/ilex/tests/ui/eof/mixed_brackets.stderr similarity index 64% rename from ilex/tests/ui/goldens/mixed_brackets.stdout rename to ilex/tests/ui/eof/mixed_brackets.stderr index ad1b1b8..243c71d 100644 --- a/ilex/tests/ui/goldens/mixed_brackets.stdout +++ b/ilex/tests/ui/eof/mixed_brackets.stderr @@ -1,32 +1,36 @@ error: unexpected closing `)` - --> :1:8 + --> eof/mixed_brackets.txt:1:8 | 1 | [] () [) (] [(]) | ^ expected to be opened by `(` | + = note: reported at: ilex/src/rt/emit2.rs:254:22 error: expected closing `)`, but found `]` - --> :1:11 + --> eof/mixed_brackets.txt:1:11 | 1 | [] () [) (] [(]) | ^ expected `)` here | - help: previously opened here | + = note: reported at: ilex/src/rt/lexer.rs:202:23 error: expected closing `)`, but found `]` - --> :1:15 + --> eof/mixed_brackets.txt:1:15 | 1 | [] () [) (] [(]) | ^ expected `)` here | - help: previously opened here | + = note: reported at: ilex/src/rt/lexer.rs:202:23 error: expected closing `)`, but found - --> :1:17 + --> eof/mixed_brackets.txt:1:17 | 1 | [] () [) (] [(]) | ^ expected `)` here | - help: previously opened here | + = note: reported at: ilex/src/rt/lexer.rs:311:10 error: aborting due to 4 errors diff --git a/ilex/tests/ui/eof/mixed_brackets.txt b/ilex/tests/ui/eof/mixed_brackets.txt new file mode 100644 index 0000000..0961a2b --- /dev/null +++ b/ilex/tests/ui/eof/mixed_brackets.txt @@ -0,0 +1 @@ +[] () [) (] [(]) \ No newline at end of file diff --git a/ilex/tests/ui/eof/mixed_brackets_multiline.stderr b/ilex/tests/ui/eof/mixed_brackets_multiline.stderr new file mode 100644 index 0000000..7cc82d5 --- /dev/null +++ b/ilex/tests/ui/eof/mixed_brackets_multiline.stderr @@ -0,0 +1,39 @@ +error: expected closing `)`, but found `]` + --> eof/mixed_brackets_multiline.txt:6:3 + | +5 | ( + | - help: previously opened here +6 | ] + | ^ expected `)` here + | + = note: reported at: ilex/src/rt/lexer.rs:202:23 + +error: unexpected closing `)` + --> eof/mixed_brackets_multiline.txt:9:3 + | +9 | ) + | ^ expected to be opened by `(` + | + = note: reported at: ilex/src/rt/emit2.rs:254:22 + +error: expected closing `)`, but found `]` + --> eof/mixed_brackets_multiline.txt:11:1 + | +10 | ( + | - help: previously opened here +11 | ] + | ^ expected `)` here + | + = note: reported at: ilex/src/rt/lexer.rs:202:23 + +error: expected closing `)`, but found + --> eof/mixed_brackets_multiline.txt:11:2 + | +10 | ( + | - help: previously opened here +11 | ] + | ^ expected `)` here + | + = note: reported at: ilex/src/rt/lexer.rs:311:10 + +error: aborting due to 4 errors diff --git a/ilex/tests/ui/eof/mixed_brackets_multiline.txt b/ilex/tests/ui/eof/mixed_brackets_multiline.txt new file mode 100644 index 0000000..ea6f94f --- /dev/null +++ b/ilex/tests/ui/eof/mixed_brackets_multiline.txt @@ -0,0 +1,11 @@ +[ + () +] +[ + ( + ] +) +[ + ) + ( +] \ No newline at end of file diff --git a/ilex/tests/ui/goldens/eof_quoted.stdout b/ilex/tests/ui/eof/quoted.stderr similarity index 64% rename from ilex/tests/ui/goldens/eof_quoted.stdout rename to ilex/tests/ui/eof/quoted.stderr index f6a98bb..b095d02 100644 --- a/ilex/tests/ui/goldens/eof_quoted.stdout +++ b/ilex/tests/ui/eof/quoted.stderr @@ -1,9 +1,10 @@ error: expected closing `'`, but found - --> :1:14 + --> eof/quoted.txt:1:14 | -1 | 'foo' '' 'bar +1 | 'foo' '' 'bar | ^ expected `'` here | - help: previously opened here | + = note: reported at: ilex/src/rt/emit2.rs:691:14 error: aborting due to previous error diff --git a/ilex/tests/ui/eof/quoted.txt b/ilex/tests/ui/eof/quoted.txt new file mode 100644 index 0000000..a92eb58 --- /dev/null +++ b/ilex/tests/ui/eof/quoted.txt @@ -0,0 +1 @@ +'foo' '' 'bar diff --git a/ilex/tests/ui/goldens/eof_quoted_multiline.stdout b/ilex/tests/ui/eof/quoted_multiline.stderr similarity index 62% rename from ilex/tests/ui/goldens/eof_quoted_multiline.stdout rename to ilex/tests/ui/eof/quoted_multiline.stderr index 1f3f1cd..ff96acd 100644 --- a/ilex/tests/ui/goldens/eof_quoted_multiline.stdout +++ b/ilex/tests/ui/eof/quoted_multiline.stderr @@ -1,9 +1,10 @@ error: expected closing `'`, but found - --> :4:5 + --> eof/quoted_multiline.txt:3:5 | -4 | 'bar +3 | 'bar | ^ expected `'` here | - help: previously opened here | + = note: reported at: ilex/src/rt/emit2.rs:691:14 error: aborting due to previous error diff --git a/ilex/tests/ui/eof/quoted_multiline.txt b/ilex/tests/ui/eof/quoted_multiline.txt new file mode 100644 index 0000000..2d4dde5 --- /dev/null +++ b/ilex/tests/ui/eof/quoted_multiline.txt @@ -0,0 +1,3 @@ +'foo' +'' +'bar diff --git a/ilex/tests/ui/goldens/ambiguous_nums.stdout b/ilex/tests/ui/goldens/ambiguous_nums.stdout deleted file mode 100644 index b5432d9..0000000 --- a/ilex/tests/ui/goldens/ambiguous_nums.stdout +++ /dev/null @@ -1,17 +0,0 @@ -error: extraneous characters after `%`-suffixed number - --> :1:6 - | -1 | 1234%1234 1234/xyz - | ^^^^ - | -- help: maybe you meant to include a space here - | - -error: extraneous characters after `/`-suffixed number - --> :1:16 - | -1 | 1234%1234 1234/xyz - | ^^^ - | -- help: maybe you meant to include a space here - | - -error: aborting due to 2 errors diff --git a/ilex/tests/ui/goldens/digit_points.stdout b/ilex/tests/ui/goldens/digit_points.stdout deleted file mode 100644 index b738704..0000000 --- a/ilex/tests/ui/goldens/digit_points.stdout +++ /dev/null @@ -1,43 +0,0 @@ -error: expected at least 2 `/`s - --> :3:7 - | -3 | 1/2/3/4e4/5 - | ^ - | - -error: unrecognized character - --> :3:6 - | -3 | 1/2/3/4e4/5 - | ^ - | - -error: expected at least 2 `/`s - --> :4:1 - | -4 | 1/2e4/5 - | ^^^ - | - -error: expected at least 2 `/`s - --> :5:11 - | -5 | 1/2/3e4/5/6 - | ^ - | - -error: unrecognized character - --> :5:10 - | -5 | 1/2/3e4/5/6 - | ^ - | - -error: expected at least 1 `/` - --> :6:6 - | -6 | 1/2/3e4 - | ^^ - | - -error: aborting due to 6 errors diff --git a/ilex/tests/ui/goldens/digit_separators.stdout b/ilex/tests/ui/goldens/digit_separators.stdout deleted file mode 100644 index 4fa983d..0000000 --- a/ilex/tests/ui/goldens/digit_separators.stdout +++ /dev/null @@ -1,57 +0,0 @@ -error: unexpected digit separator in `no_prefix@`-prefixed number - --> :3:11 - | -3 | no_prefix@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_suffix@`-prefixed number - --> :4:33 - | -4 | no_suffix@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_point@`-prefixed number - --> :5:15 - | -5 | no_point@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_point@`-prefixed number - --> :5:16 - | -5 | no_point@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_point@`-prefixed number - --> :5:27 - | -5 | no_point@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_point@`-prefixed number - --> :5:28 - | -5 | no_point@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_exp@`-prefixed number - --> :6:19 - | -6 | no_exp@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_exp@`-prefixed number - --> :6:20 - | -6 | no_exp@_123_._456_e_789_._012_ - | ^ - | - -error: aborting due to 8 errors diff --git a/ilex/tests/ui/goldens/does_not_exist.stdout b/ilex/tests/ui/goldens/does_not_exist.stdout deleted file mode 100644 index 5791c75..0000000 --- a/ilex/tests/ui/goldens/does_not_exist.stdout +++ /dev/null @@ -1,3 +0,0 @@ -error: could not open input file `does_not_exist`: No such file or directory (os error 2) - -error: aborting due to previous error diff --git a/ilex/tests/ui/goldens/mixed_brackets_multiline.stdout b/ilex/tests/ui/goldens/mixed_brackets_multiline.stdout deleted file mode 100644 index 84d5d93..0000000 --- a/ilex/tests/ui/goldens/mixed_brackets_multiline.stdout +++ /dev/null @@ -1,35 +0,0 @@ -error: expected closing `)`, but found `]` - --> :7:3 - | -6 | ( - | - help: previously opened here -7 | ] - | ^ expected `)` here - | - -error: unexpected closing `)` - --> :10:3 - | -10 | ) - | ^ expected to be opened by `(` - | - -error: expected closing `)`, but found `]` - --> :12:1 - | -11 | ( - | - help: previously opened here -12 | ] - | ^ expected `)` here - | - -error: expected closing `)`, but found - --> :12:2 - | -11 | ( - | - help: previously opened here -12 | ] - | ^ expected `)` here - | - -error: aborting due to 4 errors diff --git a/ilex/tests/ui/goldens/no_xid_after_br.stdout b/ilex/tests/ui/goldens/no_xid_after_br.stdout deleted file mode 100644 index 5eb60d0..0000000 --- a/ilex/tests/ui/goldens/no_xid_after_br.stdout +++ /dev/null @@ -1,9 +0,0 @@ -error: extraneous characters after `$null[ ... ]null` - --> :1:28 - | -1 | $[] $null[]null $null[]nullable - | ^^^^ - | -- help: maybe you meant to include a space here - | - -error: aborting due to previous error diff --git a/ilex/tests/ui/goldens/no_xid_after_id.stdout b/ilex/tests/ui/goldens/no_xid_after_id.stdout deleted file mode 100644 index 553403c..0000000 --- a/ilex/tests/ui/goldens/no_xid_after_id.stdout +++ /dev/null @@ -1,9 +0,0 @@ -error: extraneous characters after `/`-prefixed, `%q`-suffixed identifier - --> :1:22 - | -1 | /foo%q /null%q /foo%qua - | ^^ - | -- help: maybe you meant to include a space here - | - -error: aborting due to previous error diff --git a/ilex/tests/ui/goldens/not_utf8.stdout b/ilex/tests/ui/goldens/not_utf8.stdout deleted file mode 100644 index 02375b9..0000000 --- a/ilex/tests/ui/goldens/not_utf8.stdout +++ /dev/null @@ -1,3 +0,0 @@ -error: input file `tests/ui/not_utf8` was not valid UTF-8 - -error: aborting due to previous error diff --git a/ilex/tests/ui/main.rs b/ilex/tests/ui/main.rs index c57cad8..922356b 100644 --- a/ilex/tests/ui/main.rs +++ b/ilex/tests/ui/main.rs @@ -1,6 +1,278 @@ -mod ambiguous; -mod digital; -mod eof; -mod new_file; -mod too_small; -mod unrecognized; +use ilex::report::Options; +use ilex::rule::*; +use ilex::Context; +use ilex::Lexeme; + +#[gilded::test("tests/ui/ambiguous/*.txt")] +fn ambiguous(test: &mut gilded::Test) { + #[ilex::spec] + struct Spec { + #[rule("null")] + kw: Lexeme, + #[rule("-null")] + kw2: Lexeme, + #[rule(")null")] + kw3: Lexeme, + + #[rule(Comment::nesting(Bracket::rust_style( + "/", + ("-", ""), + ("", "-"), + )))] + cm: Lexeme, + #[rule(Comment::nesting(Bracket::cxx_style( + Ident::new().min_len(1), + ("--", ""), + ("", ""), + )))] + cm2: Lexeme, + #[rule(Bracket::cxx_style( + Ident::new(), + ("$", "["), + ("]", ""), + ))] + br: Lexeme, + #[rule(Ident::new() + .prefix("/") + .suffixes(["", "%q", "/"]))] + id: Lexeme, + #[rule(Digital::new(10) + .prefixes(["", "%"]) + .suffixes(["", "%", "q", "/"]))] + nm: Lexeme, + #[rule(Quoted::new("'") + .prefixes(["%", "q"]) + .suffixes(["", "%", "q"]))] + st: Lexeme, + #[rule(Quoted::with(Bracket::cxx_style( + Ident::new(), + ("q", "("), + (")", ""), + )))] + st2: Lexeme, + } + + let ctx = Context::new(); + let report = + ctx.new_report_with(Options { color: false, ..Default::default() }); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + match file.lex(Spec::get().spec(), &report) { + Ok(stream) => { + test.output("tokens.yaml", stream.summary()); + test.output("stderr", "".into()); + } + Err(fatal) => { + test.output("tokens.yaml", "".into()); + test.output("stderr", format!("{fatal:?}")); + } + } +} + +#[gilded::test("tests/ui/digital/*.txt")] +fn digital(test: &mut gilded::Test) { + #[ilex::spec] + struct Spec { + #[rule(Digital::new(16).prefix("0x"))] + m1: Lexeme, + #[rule(Digital::new(8).prefix("0o"))] + m2: Lexeme, + + #[rule( Digital::new(10) + .point_limit(2..3) + .point('/') + .exponent("e", Digits::new(10).point_limit(1..2)) + .separator_with("_", + SeparatorCornerCases { + prefix: true, + suffix: true, + around_point: true, + around_exp: true, + }))] + m0: Lexeme, + #[rule(Digital::new(10) + .prefix("all_ok@") + .point_limit(0..3) + .exponent("e", Digits::new(10).point_limit(0..3)) + .separator_with("_", + SeparatorCornerCases { + prefix: true, + suffix: true, + around_point: true, + around_exp: true, + }))] + n0: Lexeme, + #[rule( Digital::new(10) + .prefix("no_prefix@") + .point_limit(0..3) + .exponent("e", Digits::new(10).point_limit(0..3)) + .separator_with("_", + SeparatorCornerCases { + prefix: false, + suffix: true, + around_point: true, + around_exp: true, + }))] + n1: Lexeme, + #[rule(Digital::new(10) + .prefix("no_suffix@") + .point_limit(0..3) + .exponent("e", Digits::new(10).point_limit(0..3)) + .separator_with("_", + SeparatorCornerCases { + prefix: true, + suffix: false, + around_point: true, + around_exp: true, + }))] + n2: Lexeme, + #[rule( Digital::new(10) + .prefix("no_point@") + .point_limit(0..3) + .exponent("e", Digits::new(10).point_limit(0..3)) + .separator_with("_", + SeparatorCornerCases { + prefix: true, + suffix: true, + around_point: false, + around_exp: true, + }))] + n3: Lexeme, + #[rule(Digital::new(10) + .prefix("no_exp@") + .point_limit(0..3) + .exponent("e", Digits::new(10).point_limit(0..3)) + .separator_with("_", + SeparatorCornerCases { + prefix: true, + suffix: true, + around_point: true, + around_exp: false, + }))] + n4: Lexeme, + } + + let ctx = Context::new(); + let report = + ctx.new_report_with(Options { color: false, ..Default::default() }); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + match file.lex(Spec::get().spec(), &report) { + Ok(stream) => { + test.output("tokens.yaml", stream.summary()); + test.output("stderr", "".into()); + } + Err(fatal) => { + test.output("tokens.yaml", "".into()); + test.output("stderr", format!("{fatal:?}")); + } + } +} + +#[gilded::test("tests/ui/eof/*.txt")] +fn eof(test: &mut gilded::Test) { + #[ilex::spec] + struct Spec { + #[rule("/*", "*/")] + c1: Lexeme, + + #[rule("[", "]")] + b1: Lexeme, + + #[rule("(", ")")] + b2: Lexeme, + + #[rule(Quoted::new("'"))] + q1: Lexeme, + } + + let ctx = Context::new(); + let report = + ctx.new_report_with(Options { color: false, ..Default::default() }); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + match file.lex(Spec::get().spec(), &report) { + Ok(stream) => { + test.output("tokens.yaml", stream.summary()); + test.output("stderr", "".into()); + } + Err(fatal) => { + test.output("tokens.yaml", "".into()); + test.output("stderr", format!("{fatal:?}")); + } + } +} + +#[gilded::test("tests/ui/too_small/*.txt")] +fn too_small(test: &mut gilded::Test) { + #[ilex::spec] + struct Spec { + #[rule(Ident::new().prefix("%"))] + i1: Lexeme, + #[rule(Ident::new().prefix("$").min_len(3))] + i2: Lexeme, + + #[rule(Bracket::rust_style("#", ("r#", "'"), ("'#", "")))] + r1: Lexeme, + #[rule(Bracket::rust_style("#", ("q###", "'"), ("'###", "")))] + r2: Lexeme, + + #[rule(Bracket::cxx_style(Ident::new().min_len(1), ("R'", "("), (")", "'")))] + c1: Lexeme, + #[rule(Bracket::cxx_style(Ident::new().min_len(3), ("Q'", "("), (")", "'")))] + c2: Lexeme, + } + + let ctx = Context::new(); + let report = + ctx.new_report_with(Options { color: false, ..Default::default() }); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + match file.lex(Spec::get().spec(), &report) { + Ok(stream) => { + test.output("tokens.yaml", stream.summary()); + test.output("stderr", "".into()); + } + Err(fatal) => { + test.output("tokens.yaml", "".into()); + test.output("stderr", format!("{fatal:?}")); + } + } +} + +#[gilded::test("tests/ui/unrecognized/*.txt")] +fn unrecognized(test: &mut gilded::Test) { + #[ilex::spec] + struct Spec { + null: Lexeme, + + #[rule("[", "]")] + cm: Lexeme, + } + + let ctx = Context::new(); + let report = + ctx.new_report_with(Options { color: false, ..Default::default() }); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + match file.lex(Spec::get().spec(), &report) { + Ok(stream) => { + test.output("tokens.yaml", stream.summary()); + test.output("stderr", "".into()); + } + Err(fatal) => { + test.output("tokens.yaml", "".into()); + test.output("stderr", format!("{fatal:?}")); + } + } +} diff --git a/ilex/tests/ui/new_file.rs b/ilex/tests/ui/new_file.rs deleted file mode 100644 index 44916ba..0000000 --- a/ilex/tests/ui/new_file.rs +++ /dev/null @@ -1,20 +0,0 @@ -use ilex::testing; -use ilex::Context; - -#[test] -fn does_not_exist() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx.open_file("does_not_exist", &report); - - testing::check_report(&report, "tests/ui/goldens/does_not_exist.stdout"); -} - -#[test] -fn not_utf8() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx.open_file("tests/ui/not_utf8", &report); - - testing::check_report(&report, "tests/ui/goldens/not_utf8.stdout"); -} diff --git a/ilex/tests/ui/not_utf8 b/ilex/tests/ui/not_utf8 deleted file mode 100644 index ce542ef..0000000 --- a/ilex/tests/ui/not_utf8 +++ /dev/null @@ -1 +0,0 @@ -˙ \ No newline at end of file diff --git a/ilex/tests/ui/too_small.rs b/ilex/tests/ui/too_small.rs deleted file mode 100644 index abade6f..0000000 --- a/ilex/tests/ui/too_small.rs +++ /dev/null @@ -1,61 +0,0 @@ -use ilex::rule::*; -use ilex::testing; -use ilex::Context; -use ilex::Lexeme; - -#[ilex::spec] -struct Spec { - #[rule(Ident::new().prefix("%"))] - i1: Lexeme, - #[rule(Ident::new().prefix("$").min_len(3))] - i2: Lexeme, - - #[rule(Bracket::rust_style("#", ("r#", "'"), ("'#", "")))] - r1: Lexeme, - #[rule(Bracket::rust_style("#", ("q###", "'"), ("'###", "")))] - r2: Lexeme, - - #[rule(Bracket::cxx_style(Ident::new().min_len(1), ("R'", "("), (")", "'")))] - c1: Lexeme, - #[rule(Bracket::cxx_style(Ident::new().min_len(3), ("Q'", "("), (")", "'")))] - c2: Lexeme, -} - -#[test] -fn ident_too_small() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "%foo $bar % $oo") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/ident_too_small.stdout"); -} - -#[test] -fn rust_string_hashes_too_small() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "r#'foo'# r'foo' q###'bar'### q##'bar'##") - .lex(Spec::get().spec(), &report); - - testing::check_report( - &report, - "tests/ui/goldens/rust_string_hashes_too_small.stdout", - ); -} - -#[test] -fn cxx_string_tag_too_small() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo'") - .lex(Spec::get().spec(), &report); - - testing::check_report( - &report, - "tests/ui/goldens/cxx_string_tag_too_small.stdout", - ); -} diff --git a/ilex/tests/ui/goldens/cxx_string_tag_too_small.stdout b/ilex/tests/ui/too_small/cxx_tag.stderr similarity index 73% rename from ilex/tests/ui/goldens/cxx_string_tag_too_small.stdout rename to ilex/tests/ui/too_small/cxx_tag.stderr index c206acd..a68c080 100644 --- a/ilex/tests/ui/goldens/cxx_string_tag_too_small.stdout +++ b/ilex/tests/ui/too_small/cxx_tag.stderr @@ -1,16 +1,18 @@ error: expected at least 1 character in identifier, but found none - --> :1:14 + --> too_small/cxx_tag.txt:1:14 | 1 | R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo' | ^ expected at least 1 here | = help: this appears to be an empty identifier + = note: reported at: ilex/src/rt/emit2.rs:223:14 error: expected at least 3 characters in identifier, but found only 2 - --> :1:38 + --> too_small/cxx_tag.txt:1:38 | 1 | R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo' | ^^ expected at least 3 here | + = note: reported at: ilex/src/rt/emit2.rs:223:14 error: aborting due to 2 errors diff --git a/ilex/tests/ui/too_small/cxx_tag.txt b/ilex/tests/ui/too_small/cxx_tag.txt new file mode 100644 index 0000000..03beed6 --- /dev/null +++ b/ilex/tests/ui/too_small/cxx_tag.txt @@ -0,0 +1 @@ +R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo' diff --git a/ilex/tests/ui/goldens/ident_too_small.stdout b/ilex/tests/ui/too_small/ident.stderr similarity index 69% rename from ilex/tests/ui/goldens/ident_too_small.stdout rename to ilex/tests/ui/too_small/ident.stderr index 069ce18..449d5de 100644 --- a/ilex/tests/ui/goldens/ident_too_small.stdout +++ b/ilex/tests/ui/too_small/ident.stderr @@ -1,8 +1,9 @@ error: expected at least 3 characters in identifier, but found only 2 - --> :1:13 + --> too_small/ident.txt:1:13 | 1 | %foo $bar % $oo | ^^^ expected at least 3 here | + = note: reported at: ilex/src/rt/emit2.rs:315:28 error: aborting due to previous error diff --git a/ilex/tests/ui/too_small/ident.txt b/ilex/tests/ui/too_small/ident.txt new file mode 100644 index 0000000..9734547 --- /dev/null +++ b/ilex/tests/ui/too_small/ident.txt @@ -0,0 +1 @@ +%foo $bar % $oo diff --git a/ilex/tests/ui/goldens/rust_string_hashes_too_small.stdout b/ilex/tests/ui/too_small/rust_hashes.stderr similarity index 62% rename from ilex/tests/ui/goldens/rust_string_hashes_too_small.stdout rename to ilex/tests/ui/too_small/rust_hashes.stderr index 9573ce3..914b334 100644 --- a/ilex/tests/ui/goldens/rust_string_hashes_too_small.stdout +++ b/ilex/tests/ui/too_small/rust_hashes.stderr @@ -1,22 +1,25 @@ error: unrecognized characters - --> :1:10 + --> too_small/rust_hashes.txt:1:10 | 1 | r#'foo'# r'foo' q###'bar'### q##'bar'## | ^^^^^^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: unexpected closing `'##` - --> :1:37 + --> too_small/rust_hashes.txt:1:37 | 1 | r#'foo'# r'foo' q###'bar'### q##'bar'## | ^^^ expected to be opened by `r##'` | + = note: reported at: ilex/src/rt/emit2.rs:254:22 error: unrecognized characters - --> :1:30 + --> too_small/rust_hashes.txt:1:30 | 1 | r#'foo'# r'foo' q###'bar'### q##'bar'## | ^^^^^^^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: aborting due to 3 errors diff --git a/ilex/tests/ui/too_small/rust_hashes.txt b/ilex/tests/ui/too_small/rust_hashes.txt new file mode 100644 index 0000000..fd4ef1d --- /dev/null +++ b/ilex/tests/ui/too_small/rust_hashes.txt @@ -0,0 +1 @@ +r#'foo'# r'foo' q###'bar'### q##'bar'## diff --git a/ilex/tests/ui/unrecognized.rs b/ilex/tests/ui/unrecognized.rs deleted file mode 100644 index 327026b..0000000 --- a/ilex/tests/ui/unrecognized.rs +++ /dev/null @@ -1,23 +0,0 @@ -use ilex::rule::*; -use ilex::testing; -use ilex::Context; -use ilex::Lexeme; - -#[ilex::spec] -struct Spec { - null: Lexeme, - - #[rule("[", "]")] - cm: Lexeme, -} - -#[test] -fn unrecognized() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "multiple, null, [unrecognized], chunks!~ ") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/unrecognized.stdout"); -} diff --git a/ilex/tests/ui/goldens/unrecognized.stdout b/ilex/tests/ui/unrecognized/unrecognized.stderr similarity index 62% rename from ilex/tests/ui/goldens/unrecognized.stdout rename to ilex/tests/ui/unrecognized/unrecognized.stderr index b9d557c..b5d8944 100644 --- a/ilex/tests/ui/goldens/unrecognized.stdout +++ b/ilex/tests/ui/unrecognized/unrecognized.stderr @@ -1,36 +1,41 @@ error: unrecognized characters - --> :1:1 + --> unrecognized.txt:1:1 | 1 | multiple, null, [unrecognized], chunks!~ | ^^^^^^^^^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: unrecognized character - --> :1:15 + --> unrecognized.txt:1:15 | 1 | multiple, null, [unrecognized], chunks!~ | ^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: unrecognized characters - --> :1:18 + --> unrecognized.txt:1:18 | 1 | multiple, null, [unrecognized], chunks!~ | ^^^^^^^^^^^^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: unrecognized character - --> :1:31 + --> unrecognized.txt:1:31 | 1 | multiple, null, [unrecognized], chunks!~ | ^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: unrecognized characters - --> :1:33 + --> unrecognized.txt:1:33 | 1 | multiple, null, [unrecognized], chunks!~ | ^^^^^^^^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: aborting due to 5 errors diff --git a/ilex/tests/ui/unrecognized/unrecognized.txt b/ilex/tests/ui/unrecognized/unrecognized.txt new file mode 100644 index 0000000..449b03a --- /dev/null +++ b/ilex/tests/ui/unrecognized/unrecognized.txt @@ -0,0 +1 @@ +multiple, null, [unrecognized], chunks!~ From 0849d5a9fc217a50ea8649c31fc81eecb93f87b2 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Tue, 14 Jan 2025 14:24:31 -0800 Subject: [PATCH 05/11] ilex: Delete ilex::testing in favor of golden tests --- ilex/src/lib.rs | 1 - ilex/src/report/mod.rs | 15 - ilex/src/testing/mod.rs | 532 ---------------------------------- ilex/src/testing/recognize.rs | 354 ---------------------- 4 files changed, 902 deletions(-) delete mode 100644 ilex/src/testing/mod.rs delete mode 100644 ilex/src/testing/recognize.rs diff --git a/ilex/src/lib.rs b/ilex/src/lib.rs index 2a0c4dc..60f7a17 100644 --- a/ilex/src/lib.rs +++ b/ilex/src/lib.rs @@ -263,7 +263,6 @@ pub mod fp; pub mod ice; pub mod report; pub mod rule; -pub mod testing; pub mod token; pub use { diff --git a/ilex/src/report/mod.rs b/ilex/src/report/mod.rs index 2c19d0e..50463fb 100644 --- a/ilex/src/report/mod.rs +++ b/ilex/src/report/mod.rs @@ -141,21 +141,6 @@ impl Report { render::finish(self, sink) } - pub(crate) fn write_out_for_test(&self) -> String { - eprintln!("{}", self.fatal::<()>().unwrap_err()); - let mut sink = String::new(); - render::render_fmt( - self, - &Options { - color: false, - show_report_locations: false, - }, - &mut sink, - ) - .unwrap(); - sink - } - pub(crate) fn new(ctx: &Context, opts: Options) -> Self { Self { ctx: ctx.copy(), diff --git a/ilex/src/testing/mod.rs b/ilex/src/testing/mod.rs deleted file mode 100644 index 8528d9b..0000000 --- a/ilex/src/testing/mod.rs +++ /dev/null @@ -1,532 +0,0 @@ -//! Lexer testing helpers. -//! -//! This type provides testing-oriented matchers for matching on a -//! [`TokenStream`][`crate::token::Stream`]. -//! -//! These matchers are intended for writing *tests*. To write a parser, you\ -//! should use [`Cursor`][crate::token::Cursor] instead. - -use byteyarn::Yarn; -use std::env; -use std::fmt; -use std::fs; -use std::ops::Range; -use std::path::Path; - -use crate::file::Span; -use crate::file::Spanned; -use crate::report::Report; -use crate::rule; -use crate::spec::Lexeme; -use crate::token; -use crate::token::Content; -use crate::token::Sign; - -mod recognize; -use recognize::Kind; - -/// Checks that `report` contains the expected diagnostics in `path`, verbatim. -/// -/// If the contents do not match, it will print a diff to stderr and panic. -/// -/// If the `ILEX_REGENERATE` env var is set, instead of reading the file and -/// performing the check, it will write the expected contents to the file, -/// allowing for easy generation of test data. -#[track_caller] -pub fn check_report(report: &Report, path: &(impl AsRef + ?Sized)) { - let path = path.as_ref(); - let got = report.write_out_for_test(); - let want = if env::var("ILEX_REGENERATE").is_ok() { - if let Some(parent) = path.parent() { - fs::create_dir_all(parent).unwrap(); - } - fs::write(path, got).unwrap(); - return; - } else { - fs::read_to_string(path).unwrap() - }; - - eprintln!("checking against {}...", path.display()); - similar_asserts::assert_eq!(got, want); -} - -/// Checks that `report` contains no diagnostics. -/// -/// If it does, it will print them to stderr and panic. -#[track_caller] -pub fn check_report_ok(report: &Report) { - if let Err(e) = report.fatal_or(()) { - e.panic(); - } -} - -/// A matcher for a token stream. -/// -/// For usage examples, see the `ilex/tests` directory. -pub struct Matcher { - stream: Vec, -} - -impl Matcher { - /// Creates a new matcher. - pub fn new() -> Self { - Self { stream: Vec::new() } - } - - /// Adds a new expected token for this matcher, from a lexeme and an argument. - /// - /// What is allowed for `arg` for a particular rule type is specified by - /// the [`Match`] trait. You can even define your own! - pub fn then1, A1>( - mut self, - lexeme: Lexeme, - a1: A1, - ) -> Self { - R::add_token(&mut self, lexeme, (a1,)); - self - } - - /// Adds a new expected token for this matcher, from a lexeme and two - /// arguments. - /// - /// What is allowed for `arg` for a particular rule type is specified by - /// the [`Match`] trait. You can even define your own! - pub fn then2, A1, A2>( - mut self, - lexeme: Lexeme, - a1: A1, - a2: A2, - ) -> Self { - R::add_token(&mut self, lexeme, (a1, a2)); - self - } - - /// Like [`Matcher::then1()`], but adds a prefix matcher too. - pub fn prefix1, A1>( - self, - lexeme: Lexeme, - prefix: impl Into, - a1: A1, - ) -> Self { - self.then1(lexeme, a1).prefix(prefix) - } - - /// Like [`Matcher::then2()`], but adds a prefix matcher too. - pub fn prefix2, A1, A2>( - self, - lexeme: Lexeme, - prefix: impl Into, - a1: A1, - a2: A2, - ) -> Self { - self.then2(lexeme, a1, a2).prefix(prefix) - } - - /// Like [`Matcher::then1()`], but adds a suffix matcher too. - pub fn suffix1, A1>( - self, - lexeme: Lexeme, - a1: A1, - suffix: impl Into, - ) -> Self { - self.then1(lexeme, a1).suffix(suffix) - } - - /// Like [`Matcher::then2()`], but adds a suffix matcher too. - pub fn suffix2, A1, A2>( - self, - lexeme: Lexeme, - a1: A1, - a2: A2, - suffix: impl Into, - ) -> Self { - self.then2(lexeme, a1, a2).suffix(suffix) - } - - /// Like [`Matcher::then1()`], but adds a prefix matcher and a suffix matcher too. - pub fn affix1, A1>( - self, - lexeme: Lexeme, - prefix: impl Into, - a1: A1, - suffix: impl Into, - ) -> Self { - self.then1(lexeme, a1).prefix(prefix).suffix(suffix) - } - - /// Like [`Matcher::then2()`], but adds a prefix matcher and a suffix matcher too. - pub fn affix2, A1, A2>( - self, - lexeme: Lexeme, - prefix: impl Into, - a1: A1, - a2: A2, - suffix: impl Into, - ) -> Self { - self.then2(lexeme, a1, a2).prefix(prefix).suffix(suffix) - } - - /// Adds an EOF matcher. - /// - /// Every token stream ends with an EOF token, so you always need to include - /// one. - pub fn eof(mut self) -> Self { - self.stream.push(recognize::Matcher { - which: Some(Lexeme::eof().any()), - span: Text::any(), - comments: Vec::new(), - kind: Kind::Eof, - }); - self - } - - /// Matches `cursor` against this matcher, and panics if it doesn't. - #[track_caller] - pub fn assert_matches<'lex>( - &self, - that: impl IntoIterator>, - ) { - self.matches(that).unwrap() - } - - /// Sets an expectation for the overall span of the most recently added - /// token matcher. - /// - /// # Panics - /// - /// Panics if none of the matcher-adding methods has been called yet. - pub fn span(mut self, text: impl Into) -> Self { - self.stream.last_mut().unwrap().span = text.into(); - self - } - - /// Adds some expected comments to the most recently added token matcher. - /// - /// # Panics - /// - /// Panics if none of the matcher-adding methods has been called yet. - pub fn comments(mut self, iter: I) -> Self - where - I: IntoIterator, - I::Item: Into, - { - self - .stream - .last_mut() - .unwrap() - .comments - .extend(iter.into_iter().map(Into::into)); - self - } - - /// Matches `cursor` against this matcher. - /// - /// If matching fails, returns an error describing why. - pub fn matches<'lex>( - &self, - that: impl IntoIterator>, - ) -> Result<(), impl fmt::Debug> { - struct DebugBy(String); - impl fmt::Debug for DebugBy { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.0) - } - } - - let mut state = recognize::MatchState::new(); - recognize::zip_eq( - "token streams", - &mut state, - &self.stream, - that, - |state, ours, theirs| ours.recognizes(state, theirs), - ); - state.finish().map_err(DebugBy) - } - - /// Sets the prefix for the most recently added token matcher. - /// - /// # Panics - /// - /// Panics if [`Matcher::then()`] has not been called yet, or if the most - /// recent matcher is not for [`rule::Ident`], [`rule::Digital`], or - /// [`rule::Quoted`], - fn prefix(mut self, text: impl Into) -> Self { - match &mut self.stream.last_mut().unwrap().kind { - Kind::Ident { prefix, .. } | Kind::Quoted { prefix, .. } => { - *prefix = Some(text.into()); - } - Kind::Digital { digits, .. } => digits[0].prefix = Some(text.into()), - _ => panic!("cannot set prefix on this matcher"), - } - - self - } - - /// Sets the prefix for the most recently added token matcher. - /// - /// # Panics - /// - /// Panics if [`Matcher::then()`] has not been called yet, or if the most - /// recent matcher is not for [`rule::Ident`], [`rule::Digital`], or - /// [`rule::Quoted`], - fn suffix(mut self, text: impl Into) -> Self { - match &mut self.stream.last_mut().unwrap().kind { - Kind::Ident { suffix, .. } - | Kind::Quoted { suffix, .. } - | Kind::Digital { suffix, .. } => { - *suffix = Some(text.into()); - } - _ => panic!("cannot set suffix on this matcher"), - } - - self - } -} - -impl Default for Matcher { - fn default() -> Self { - Self::new() - } -} - -/// A matcher for a chunk of text from the input source. -/// -/// This is slightly more general than a span, since it can specify the content -/// of the text and the offsets separately, and optionally. `Text` values are -/// intended to *recognize* various spans. -/// -/// `&str` and `Range` are both convertible to `Text`. -#[derive(Clone)] -pub struct Text { - text: Option, - range: Option>, -} - -impl Text { - /// Returns a matcher that recognizes all spans. - pub fn any() -> Self { - Text { text: None, range: None } - } - - /// Returns a matcher that recognizes spans with the given text. - pub fn new(text: impl Into) -> Self { - Text { text: Some(text.into()), range: None } - } - - /// Returns a matcher that recognizes spans with the given byte range. - pub fn range(range: Range) -> Self { - Text { text: None, range: Some(range) } - } - - /// Returns a matcher that recognizes spans with the given byte range and - /// text. - pub fn text_and_range(text: impl Into, range: Range) -> Self { - Text { - text: Some(text.into()), - range: Some(range), - } - } - - /// Returns whether this span recognizes a particular span. - fn recognizes(&self, span: Span) -> bool { - self.text.as_ref().is_none_or(|text| text == span.text()) - && !self.range.as_ref().is_some_and(|range| { - let r = span.span(); - range != &(r.start()..r.end()) - }) - } -} - -impl> From for Text { - fn from(value: Y) -> Self { - Text::new(value) - } -} - -impl fmt::Debug for Text { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match (&self.text, &self.range) { - (Some(text), Some(range)) => write!(f, "{text:?} @ {range:?}"), - (Some(text), None) => fmt::Debug::fmt(text, f), - (None, Some(range)) => write!(f, " @ {range:?}"), - (None, None) => f.write_str(""), - } - } -} - -/// Records a way in which a matcher can be added for a particular token rule. -/// -/// See [`Matcher::then1()`]. -pub trait Match: rule::Rule { - /// Adds a new token to `matcher`. - fn add_token(matcher: &mut Matcher, lexeme: Lexeme, arg: Arg); -} - -impl> Match<(T,)> for rule::Keyword { - fn add_token(matcher: &mut Matcher, lexeme: Lexeme, (arg,): (T,)) { - matcher.stream.push(recognize::Matcher { - which: Some(lexeme.any()), - span: arg.into(), - comments: Vec::new(), - kind: Kind::Keyword, - }) - } -} - -impl Match<((Open, Close), Matcher)> for rule::Bracket -where - Open: Into, - Close: Into, -{ - fn add_token( - matcher: &mut Matcher, - lexeme: Lexeme, - ((open, close), contents): ((Open, Close), Matcher), - ) { - matcher.stream.push(recognize::Matcher { - which: Some(lexeme.any()), - span: Text::any(), - comments: Vec::new(), - kind: Kind::Delimited { - tokens: contents.stream, - delims: (open.into(), close.into()), - }, - }) - } -} - -impl> Match<(T,)> for rule::Ident { - fn add_token(matcher: &mut Matcher, lexeme: Lexeme, (arg,): (T,)) { - let arg = arg.into(); - matcher.stream.push(recognize::Matcher { - which: Some(lexeme.any()), - span: Text::any(), - comments: Vec::new(), - kind: Kind::Ident { name: arg, prefix: None, suffix: None }, - }) - } -} - -/// A complex digital token matcher. -/// -/// This type is used the matcher argument type for complex digital rules, such -/// as those that have signs and exponents. -#[derive(Default)] -pub struct DigitalMatcher { - chunks: Vec, -} - -impl DigitalMatcher { - /// Creates a new matcher, with the given radix and digit blocks for the - /// mantissa. - pub fn new>( - radix: u8, - digits: impl IntoIterator, - ) -> Self { - Self { - chunks: vec![recognize::DigitalMatcher { - radix, - sign: None, - digits: digits.into_iter().map(Into::into).collect(), - prefix: None, - }], - } - } - - /// Sets the sign for the most recently added chunk of digits. - pub fn sign(self, sign: Sign) -> Self { - self.sign_span(sign, Text::any()) - } - - /// Sets the sign (and sign span) for the most recently added chunk of digits. - pub fn sign_span(mut self, sign: Sign, span: impl Into) -> Self { - self - .chunks - .last_mut() - .unwrap() - .sign - .get_or_insert_with(|| (sign, span.into())); - self - } - - /// Adds an expected exponent. - /// - /// The exponent must be in the given radix, delimited by the given prefix, - /// and have the given digits. - pub fn exp>( - mut self, - radix: u8, - prefix: impl Into, - digits: impl IntoIterator, - ) -> Self { - self.chunks.push(recognize::DigitalMatcher { - radix, - sign: None, - digits: digits.into_iter().map(Into::into).collect(), - prefix: Some(prefix.into()), - }); - self - } -} - -impl Match<(u8, Digits)> for rule::Digital -where - Digits: IntoIterator, - Digits::Item: Into, -{ - fn add_token( - matcher: &mut Matcher, - lexeme: Lexeme, - (radix, digits): (u8, Digits), - ) { - Self::add_token(matcher, lexeme, (DigitalMatcher::new(radix, digits),)); - } -} - -impl Match<(DigitalMatcher,)> for rule::Digital { - fn add_token( - matcher: &mut Matcher, - lexeme: Lexeme, - digits: (DigitalMatcher,), - ) { - matcher.stream.push(recognize::Matcher { - which: Some(lexeme.any()), - span: Text::any(), - comments: Vec::new(), - kind: Kind::Digital { digits: digits.0.chunks, suffix: None }, - }) - } -} - -impl From<&'static str> for Content { - fn from(value: &'static str) -> Self { - Content::lit(value) - } -} - -impl Match<((Open, Close), Iter)> for rule::Quoted -where - Open: Into, - Close: Into, - Iter: IntoIterator, - Iter::Item: Into>, -{ - fn add_token( - matcher: &mut Matcher, - lexeme: Lexeme, - ((open, close), content): ((Open, Close), Iter), - ) { - matcher.stream.push(recognize::Matcher { - which: Some(lexeme.any()), - span: Text::any(), - comments: Vec::new(), - kind: Kind::Quoted { - content: content.into_iter().map(Into::into).collect(), - delims: (open.into(), close.into()), - prefix: None, - suffix: None, - }, - }) - } -} diff --git a/ilex/src/testing/recognize.rs b/ilex/src/testing/recognize.rs deleted file mode 100644 index dd7d495..0000000 --- a/ilex/src/testing/recognize.rs +++ /dev/null @@ -1,354 +0,0 @@ -//! Visitor code for token matching. -//! -//! This code is not very pretty or fast, since it's meant to generate -//! diagnostics in lexer/parser unit tests. - -use std::fmt; -use std::fmt::DebugStruct; -use std::fmt::Display; - -use crate::f; -use crate::file::Spanned; -use crate::rule; -use crate::spec::Lexeme; -use crate::testing::Text; -use crate::token; -use crate::token::Any; -use crate::token::Sign; -use crate::token::Token; - -pub struct Matcher { - pub which: Option>, - pub span: Text, - pub comments: Vec, - pub kind: Kind, -} - -pub enum Kind { - Eof, - Keyword, - Ident { - name: Text, - prefix: Option, - suffix: Option, - }, - Quoted { - content: Vec>, - delims: (Text, Text), - prefix: Option, - suffix: Option, - }, - Digital { - digits: Vec, - suffix: Option, - }, - Delimited { - delims: (Text, Text), - tokens: Vec, - }, -} - -#[derive(Debug)] -pub struct DigitalMatcher { - pub radix: u8, - pub sign: Option<(Sign, Text)>, - pub digits: Vec, - pub prefix: Option, -} - -impl Matcher { - pub fn recognizes(&self, state: &mut MatchState, tok: token::Any) { - state.match_spans("token span", &self.span, Spanned::span(&tok)); - - zip_eq("comments", state, &self.comments, tok.comments(), |state, t, s| { - state.match_spans("comment", t, s); - }); - - match (&self.kind, tok) { - (Kind::Eof, Any::Eof(..)) | (Kind::Keyword, Any::Keyword(..)) => {} - (Kind::Ident { name, prefix, suffix }, Any::Ident(tok)) => { - state.match_spans("identifier name", name, tok.name()); - state.match_options("prefix", prefix.as_ref(), tok.prefix()); - state.match_options("suffix", suffix.as_ref(), tok.suffix()); - } - (Kind::Quoted { delims, content, prefix, suffix }, Any::Quoted(tok)) => { - let [open, close] = tok.delimiters(); - state.match_spans("open quote", &delims.0, open); - state.match_spans("close quote", &delims.1, close); - state.match_options("prefix", prefix.as_ref(), tok.prefix()); - state.match_options("suffix", suffix.as_ref(), tok.suffix()); - - zip_eq( - "string contents", - state, - content, - tok.raw_content(), - |state, ours, theirs| match (ours, theirs) { - (token::Content::Lit(t), token::Content::Lit(s)) => { - state.match_spans("string content", t, s) - } - (token::Content::Esc(t, ours), token::Content::Esc(s, theirs)) => { - state.match_spans("string escape", t, s); - state.match_options("escape data", ours.as_ref(), theirs); - } - _ => state.error("mismatched string content types"), - }, - ); - } - (Kind::Digital { digits, suffix }, Any::Digital(tok)) => { - let recognize = |state: &mut MatchState, - mch: &DigitalMatcher, - tok: token::Digital| { - if mch.radix != tok.radix() { - state.error(f!( - "wrong radix; want {:?}, got {:?}", - mch.radix, - tok.radix() - )); - } - state.match_any_options( - "sign", - mch.sign.as_ref().map(|(s, _)| s), - tok.sign(), - |&a, b| a == b, - ); - state.match_options( - "sign span", - mch.sign.as_ref().map(|(_, sp)| sp), - tok.sign_span(), - ); - state.match_options("prefix", mch.prefix.as_ref(), tok.prefix()); - zip_eq( - "digit blocks", - state, - &mch.digits, - tok.digit_blocks(), - |state, t, s| { - state.match_spans("digit block", t, s); - }, - ); - }; - - recognize(state, &digits[0], tok); - zip_eq( - "exponent list", - state, - &digits[1..], - tok.exponents(), - |state, t, s| { - recognize(state, t, s); - }, - ); - - state.match_options("suffix", suffix.as_ref(), tok.suffix()); - } - (Kind::Delimited { delims, tokens }, Any::Bracket(tok)) => { - state.match_spans("open delimiter", &delims.0, tok.open()); - state.match_spans("close delimiter", &delims.1, tok.close()); - - zip_eq( - "bracket contents", - state, - tokens, - tok.contents(), - |state, ours, theirs| ours.recognizes(state, theirs), - ); - } - _ => state.error("mismatched token types"), - } - } -} - -impl fmt::Debug for Matcher { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let print_spans = - matches!(std::env::var("ILEX_SPANS").as_deref(), Ok("ranges" | "text")); - - let req_field = |d: &mut DebugStruct, name, span| { - if print_spans { - d.field(name, span); - } - }; - - let opt_field = |d: &mut DebugStruct, name, span: &Option| { - if print_spans && span.is_some() { - d.field(name, span.as_ref().unwrap()); - } - }; - - let vec_field = |d: &mut DebugStruct, name, spans: &Vec| { - if !spans.is_empty() { - d.field(name, spans); - } - }; - - let name = match &self.kind { - Kind::Eof => "Eof", - Kind::Keyword => "Keyword", - Kind::Ident { .. } => "Ident", - Kind::Quoted { .. } => "Quoted", - Kind::Digital { .. } => "Digital", - Kind::Delimited { .. } => "Delimited", - }; - - let name = match self.which { - Some(l) => format!("{name}({})", l.index()), - None => name.into(), - }; - - let mut d = f.debug_struct(&name); - - match &self.kind { - Kind::Ident { name, prefix, suffix } => { - req_field(&mut d, "name", name); - opt_field(&mut d, "prefix", prefix); - opt_field(&mut d, "suffix", suffix); - } - Kind::Quoted { content, delims, prefix, suffix } => { - d.field("content", content); - req_field(&mut d, "open", &delims.0); - req_field(&mut d, "close", &delims.1); - opt_field(&mut d, "prefix", prefix); - opt_field(&mut d, "suffix", suffix); - } - Kind::Digital { digits, suffix } => { - d.field("digits", digits); - opt_field(&mut d, "suffix", suffix); - } - Kind::Delimited { delims, tokens } => { - req_field(&mut d, "open", &delims.0); - req_field(&mut d, "close", &delims.1); - d.field("tokens", tokens); - } - _ => {} - }; - - req_field(&mut d, "span", &self.span); - vec_field(&mut d, "comments", &self.comments); - d.finish() - } -} - -pub struct MatchState { - errors: String, - stack: Vec, - error_count: usize, -} - -impl MatchState { - pub fn new() -> Self { - Self { - errors: String::new(), - stack: Vec::new(), - error_count: 0, - } - } - - fn error(&mut self, msg: impl Display) { - use std::fmt::Write; - - self.error_count += 1; - if self.error_count > 10 { - return; - } - - self.errors.push_str("at stream"); - for i in &self.stack { - let _ = write!(self.errors, "[{}]", i); - } - let _ = writeln!(self.errors, ": {msg}"); - } - - fn match_spans<'s>( - &mut self, - what: &str, - text: &Text, - span: impl Spanned<'s>, - ) { - let span = span.span(); - if !text.recognizes(span) { - self.error(f!("wrong {what}; want {:?}, got {:?}", text, span)); - } - } - - fn match_options<'s>( - &mut self, - what: &str, - text: Option<&Text>, - span: Option>, - ) { - let span = span.map(|s| s.span()); - if text.is_none() && span.is_none() { - return; - } - - if !text.zip(span).is_some_and(|(t, s)| t.recognizes(s)) { - self.error(f!("wrong {what}; want {:?}, got {:?}", text, span)); - } - } - - fn match_any_options( - &mut self, - what: &str, - text: Option, - span: Option, - eq: impl FnOnce(&T, &U) -> bool, - ) { - if text.is_none() && span.is_none() { - return; - } - - if !text - .as_ref() - .zip(span.as_ref()) - .is_some_and(|(t, s)| eq(t, s)) - { - self.error(f!("wrong {what}; want {:?}, got {:?}", text, span)); - } - } - - pub fn finish(mut self) -> Result<(), String> { - use std::fmt::Write; - - if self.error_count > 10 { - let _ = - writeln!(self.errors, "... and {} more errors", self.error_count - 1); - } - - if self.error_count > 0 { - return Err(self.errors); - } - Ok(()) - } -} - -pub fn zip_eq( - what: &str, - state: &mut MatchState, - ours: Ours, - theirs: Theirs, - mut cb: impl FnMut(&mut MatchState, Ours::Item, Theirs::Item), -) { - let mut ours = ours.into_iter(); - let mut theirs = theirs.into_iter(); - state.stack.push(0); - loop { - let ours = ours.next(); - let theirs = theirs.next(); - if ours.is_none() && theirs.is_none() { - state.stack.pop(); - break; - } - - if let (Some(ours), Some(theirs)) = (ours, theirs) { - cb(state, ours, theirs); - - *state.stack.last_mut().unwrap() += 1; - continue; - } - - let popped = state.stack.pop().unwrap(); - state.error(f!("{what} had unequal lengths (got to {popped})")); - break; - } -} From 9fc8e4295e450e850ca46a098d2994f36f2c8a41 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Fri, 24 Jan 2025 15:13:01 -0800 Subject: [PATCH 06/11] gilded: make Test::outputs() return handles for each output In the previous design of output(), you could get into a situation where the outputs for the test depended on which output path the test took, meaning that if an error occured, it would generate an error golden, but once the error was resolved, it would fail to delete the error golden. This change requires that all outputs be specified up-front. --- gilded/src/lib.rs | 85 ++++++++++++++++++++++++++++++-------- ilex/tests/greedy/main.rs | 13 ++---- ilex/tests/json/main.rs | 20 ++++----- ilex/tests/llvm/main.rs | 13 ++---- ilex/tests/numbers/main.rs | 24 ++++------- ilex/tests/ui/main.rs | 65 +++++++++-------------------- 6 files changed, 114 insertions(+), 106 deletions(-) diff --git a/gilded/src/lib.rs b/gilded/src/lib.rs index c5493ed..28005b4 100644 --- a/gilded/src/lib.rs +++ b/gilded/src/lib.rs @@ -31,7 +31,7 @@ //! the crate root) which matches the glob passed to the attribute. The input //! file's path and contents can be accessed through the [`Test`] accessors. //! -//! To specify a test output, use [`Test::output()`]. This specifies the +//! To specify golden outputs, use [`Test::outputs()`]. This specifies the //! file extension for the golden, and its computed contents. The extension is //! used to construct the path of the result. If the input is `foo/bar.txt`, and //! the extension for this output is `csv`, the output will be read/written to @@ -47,10 +47,21 @@ //! //! To regenerate a specific test, simply pass its name as a filter to the test. //! See `cargo test -- --help` for available flags.` +//! +//! Regenerating goldens will cause a `GILDED_CHANGED` file to be crated at the +//! crate root, which will cause all `gilded` tests in the crate to fail until +//! it is deleted. Deleting it forces the user to acknowledge that goldens have +//! been regenerated, to avoid blindly committing them. +//! +//! # Known Issues +//! +//! Golden tests can run under MIRI but have extremely large overhead. For the +//! time being, they are `#[cfg]`'d out in MIRI mode. +use std::cell::RefCell; +use std::cell::RefMut; use std::env; use std::fs; -use std::fs::File; use std::path::Path; use std::str; @@ -71,7 +82,7 @@ pub struct Suite { name: &'static str, crate_root: &'static Path, test_root: &'static Utf8Path, - run: fn(&mut Test), + run: fn(&Test), } impl Suite { @@ -86,7 +97,7 @@ impl Suite { pub fn new( name: &'static str, crate_root: &'static str, - run: fn(&mut Test), + run: fn(&Test), paths: &[&'static str], ) -> Suite { let crate_root = Path::new(crate_root); @@ -135,22 +146,21 @@ impl Suite { #[doc(hidden)] #[track_caller] pub fn run(&'static self, path: &'static str, text: &'static [u8]) { - let root = self.crate_root.join(self.test_root); let path = Utf8Path::new(path); let file = self.crate_root.join(path); - let lock = root.join("GILDED_CHANGED"); - let lock_name = self.test_root.join("GILDED_CHANGED"); + let lock = self.crate_root.join("GILDED_CHANGED"); + let lock_name = "GILDED_CHANGED"; // TODO: make sure this is normalized to being a Unix path on Windows. let name = path.strip_prefix(self.test_root).unwrap(); - let mut test = Test { + let test = Test { suite: self, path: name, text, - outputs: Vec::new(), + outputs: Default::default(), }; - (self.run)(&mut test); + (self.run)(&test); let regen = env::var_os(REGENERATE).is_some(); assert!( @@ -159,11 +169,17 @@ impl Suite { ); if regen { eprintln!("{}", lock.display()); - File::create(lock).unwrap(); + fs::write(lock, "delete this file to confirm changes to golden tests\n") + .unwrap() } + let outputs = test.outputs.borrow(); + let outputs = outputs + .as_ref() + .expect("test function failed to call Test::outputs()"); + let mut failed = false; - for (extn, text) in &test.outputs { + for (extn, text) in outputs { let file = file.with_extension(extn); let name = name.with_extension(extn); @@ -206,11 +222,11 @@ impl Suite { /// A handle for a single golden test case. pub struct Test<'t> { suite: &'t Suite, - path: &'t Utf8Path, text: &'t [u8], - outputs: Vec<(String, String)>, + #[allow(clippy::type_complexity)] + outputs: RefCell>>, } impl<'t> Test<'t> { @@ -233,16 +249,50 @@ impl<'t> Test<'t> { self.text } - /// Outputs a result for this test. + /// Declares the outputs for this test. /// /// A test may have many results, each of which has the same path as the input /// with an extra extension. For example, for a `foo.txt` input, the output /// might be `foo.txt.stderr`, in which case `extension` would be `stderr`. - pub fn output(&mut self, extension: &str, result: String) { - self.outputs.push((extension.into(), result)); + /// + /// Returns output functions for test, one for each output. They should be + /// called with the result of the test. + /// + /// # Panics + /// + /// The test must call this function exactly; calling it more than once or not + /// at all will cause the test to panic. + pub fn outputs<'a, const N: usize>( + &'a self, + extensions: [&str; N], + ) -> [impl FnOnce(String) + 'a; N] { + let outputs: RefMut> = self + .outputs + .try_borrow_mut() + .expect("called Test::outputs() more than once"); + assert!(outputs.is_none(), "called Test::outputs() more than once"); + + let outputs: RefMut<[_; N]> = RefMut::map(outputs, |o| { + o.insert(extensions.map(|extn| (extn.into(), String::new())).into()) + .as_mut() + .try_into() + .unwrap() + }); + + split(outputs).map(|mut slot| move |value| slot.1 = value) } } +fn split(orig: RefMut<[T; N]>) -> [RefMut; N] { + let mut orig: Option> = Some(orig); + [(); N].map(|_| { + let (elem, rest) = + RefMut::map_split(orig.take().unwrap(), |s| s.split_first_mut().unwrap()); + orig = Some(rest); + elem + }) +} + /// Implementation macro for `#[gilded::test]`. #[doc(hidden)] #[macro_export] @@ -289,6 +339,7 @@ macro_rules! __test__ { ) => { $(#[$attr])* #[::std::prelude::rust_2021::test] + #[cfg_attr(miri, ignore)] fn $test() { __SUITE__.run($path, $text) } $crate::__test__! { @tests $(#[$attr])* $($tt)* } }; diff --git a/ilex/tests/greedy/main.rs b/ilex/tests/greedy/main.rs index 044cb6e..c80989c 100644 --- a/ilex/tests/greedy/main.rs +++ b/ilex/tests/greedy/main.rs @@ -3,7 +3,7 @@ use ilex::Context; use ilex::Lexeme; #[gilded::test("tests/greedy/*.txt")] -fn greedy(test: &mut gilded::Test) { +fn greedy(test: &gilded::Test) { // This test verifies that lexing is greedy in *most* cases. #[ilex::spec] @@ -37,14 +37,9 @@ fn greedy(test: &mut gilded::Test) { .new_file_from_bytes(test.path(), test.text(), &report) .unwrap(); + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); match file.lex(Greedy::get().spec(), &report) { - Ok(stream) => { - test.output("tokens.yaml", stream.summary()); - test.output("stderr", "".into()); - } - Err(fatal) => { - test.output("tokens.yaml", "".into()); - test.output("stderr", format!("{fatal:?}")); - } + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), } } diff --git a/ilex/tests/json/main.rs b/ilex/tests/json/main.rs index 56140a5..3c23146 100644 --- a/ilex/tests/json/main.rs +++ b/ilex/tests/json/main.rs @@ -51,34 +51,32 @@ struct JsonSpec { } #[gilded::test("tests/json/*.json")] -fn check_tokens(test: &mut gilded::Test) { +fn check_tokens(test: &gilded::Test) { let ctx = Context::new(); let report = ctx.new_report(); let file = ctx .new_file_from_bytes(test.path(), test.text(), &report) .unwrap(); + let [tokens, ast, stderr] = + test.outputs(["tokens.yaml", "ast.txt", "stderr"]); + let stream = match file.lex(JsonSpec::get().spec(), &report) { Ok(stream) => stream, Err(fatal) => { - test.output("tokens.yaml", "".into()); - test.output("ast.txt", "".into()); - test.output("stderr", format!("{fatal:?}")); + stderr(fatal.to_string()); return; } }; - test.output("tokens.yaml", stream.summary()); + tokens(stream.summary()); let json = parse(&report, JsonSpec::get(), &mut stream.cursor()); + ast(format!("{json:#?}")); + if let Err(fatal) = report.fatal_or(()) { - test.output("ast.txt", "".into()); - test.output("stderr", format!("{fatal:?}")); - return; + stderr(fatal.to_string()); } - - test.output("ast.txt", format!("{json:#?}")); - test.output("stderr", "".into()); } #[derive(Clone, Debug, PartialEq)] diff --git a/ilex/tests/llvm/main.rs b/ilex/tests/llvm/main.rs index 478a3c9..18a1764 100644 --- a/ilex/tests/llvm/main.rs +++ b/ilex/tests/llvm/main.rs @@ -93,21 +93,16 @@ struct Llvm { } #[gilded::test("tests/llvm/*.ll")] -fn llvm(test: &mut gilded::Test) { +fn llvm(test: &gilded::Test) { let ctx = Context::new(); let report = ctx.new_report(); let file = ctx .new_file_from_bytes(test.path(), test.text(), &report) .unwrap(); + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); match file.lex(Llvm::get().spec(), &report) { - Ok(stream) => { - test.output("tokens.yaml", stream.summary()); - test.output("stderr", "".into()); - } - Err(fatal) => { - test.output("tokens.yaml", "".into()); - test.output("stderr", format!("{fatal:?}")); - } + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), } } diff --git a/ilex/tests/numbers/main.rs b/ilex/tests/numbers/main.rs index 715ac31..4272c3e 100644 --- a/ilex/tests/numbers/main.rs +++ b/ilex/tests/numbers/main.rs @@ -57,32 +57,26 @@ struct Numbers { } #[gilded::test("tests/numbers/*.txt")] -fn numbers(test: &mut gilded::Test) { +fn numbers(test: &gilded::Test) { let ctx = Context::new(); let report = ctx.new_report(); let file = ctx .new_file_from_bytes(test.path(), test.text(), &report) .unwrap(); + let [tokens, fp64, stderr] = + test.outputs(["tokens.yaml", "fp64.txt", "stderr"]); + match file.lex(Numbers::get().spec(), &report) { Ok(stream) => { - test.output("tokens.yaml", stream.summary()); + tokens(stream.summary()); match parse(Numbers::get(), stream.cursor(), &report) { - Ok(v) => { - test.output("fp64.txt", format!("{v:#?}")); - test.output("stderr", "".into()) - } - Err(fatal) => { - test.output("fp64.txt", "".into()); - test.output("stderr", format!("{fatal:?}")); - } + Ok(v) => fp64(format!("{v:#?}")), + Err(fatal) => stderr(fatal.to_string()), } } - Err(fatal) => { - test.output("tokens.yaml", "".into()); - test.output("stderr", format!("{fatal:?}")); - test.output("fp64.txt", "".into()); - } + + Err(fatal) => stderr(fatal.to_string()), } } diff --git a/ilex/tests/ui/main.rs b/ilex/tests/ui/main.rs index 922356b..735e4c2 100644 --- a/ilex/tests/ui/main.rs +++ b/ilex/tests/ui/main.rs @@ -4,7 +4,7 @@ use ilex::Context; use ilex::Lexeme; #[gilded::test("tests/ui/ambiguous/*.txt")] -fn ambiguous(test: &mut gilded::Test) { +fn ambiguous(test: &gilded::Test) { #[ilex::spec] struct Spec { #[rule("null")] @@ -59,20 +59,15 @@ fn ambiguous(test: &mut gilded::Test) { .new_file_from_bytes(test.path(), test.text(), &report) .unwrap(); + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); match file.lex(Spec::get().spec(), &report) { - Ok(stream) => { - test.output("tokens.yaml", stream.summary()); - test.output("stderr", "".into()); - } - Err(fatal) => { - test.output("tokens.yaml", "".into()); - test.output("stderr", format!("{fatal:?}")); - } + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), } } #[gilded::test("tests/ui/digital/*.txt")] -fn digital(test: &mut gilded::Test) { +fn digital(test: &gilded::Test) { #[ilex::spec] struct Spec { #[rule(Digital::new(16).prefix("0x"))] @@ -161,20 +156,15 @@ fn digital(test: &mut gilded::Test) { .new_file_from_bytes(test.path(), test.text(), &report) .unwrap(); + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); match file.lex(Spec::get().spec(), &report) { - Ok(stream) => { - test.output("tokens.yaml", stream.summary()); - test.output("stderr", "".into()); - } - Err(fatal) => { - test.output("tokens.yaml", "".into()); - test.output("stderr", format!("{fatal:?}")); - } + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), } } #[gilded::test("tests/ui/eof/*.txt")] -fn eof(test: &mut gilded::Test) { +fn eof(test: &gilded::Test) { #[ilex::spec] struct Spec { #[rule("/*", "*/")] @@ -197,20 +187,15 @@ fn eof(test: &mut gilded::Test) { .new_file_from_bytes(test.path(), test.text(), &report) .unwrap(); + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); match file.lex(Spec::get().spec(), &report) { - Ok(stream) => { - test.output("tokens.yaml", stream.summary()); - test.output("stderr", "".into()); - } - Err(fatal) => { - test.output("tokens.yaml", "".into()); - test.output("stderr", format!("{fatal:?}")); - } + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), } } #[gilded::test("tests/ui/too_small/*.txt")] -fn too_small(test: &mut gilded::Test) { +fn too_small(test: &gilded::Test) { #[ilex::spec] struct Spec { #[rule(Ident::new().prefix("%"))] @@ -236,20 +221,15 @@ fn too_small(test: &mut gilded::Test) { .new_file_from_bytes(test.path(), test.text(), &report) .unwrap(); + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); match file.lex(Spec::get().spec(), &report) { - Ok(stream) => { - test.output("tokens.yaml", stream.summary()); - test.output("stderr", "".into()); - } - Err(fatal) => { - test.output("tokens.yaml", "".into()); - test.output("stderr", format!("{fatal:?}")); - } + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), } } #[gilded::test("tests/ui/unrecognized/*.txt")] -fn unrecognized(test: &mut gilded::Test) { +fn unrecognized(test: &gilded::Test) { #[ilex::spec] struct Spec { null: Lexeme, @@ -265,14 +245,9 @@ fn unrecognized(test: &mut gilded::Test) { .new_file_from_bytes(test.path(), test.text(), &report) .unwrap(); + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); match file.lex(Spec::get().spec(), &report) { - Ok(stream) => { - test.output("tokens.yaml", stream.summary()); - test.output("stderr", "".into()); - } - Err(fatal) => { - test.output("tokens.yaml", "".into()); - test.output("stderr", format!("{fatal:?}")); - } + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), } } From 538263389bb0d4ac93a09a189c9413f315b17ab1 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Sat, 25 Jan 2025 14:57:20 -0800 Subject: [PATCH 07/11] gilded: Ensure that changes in test inputs are picked up Rust does not provide a way to specify that there are non-Rust files relevant to the build that should cause the build to stale if they change. We implement this ourselves through some gross mtime hacks. The best we can do is inform the user that inputs have changed and they must re-run cargo test to cause the tests to be rebuilt against the new inputs. --- gilded/Cargo.toml | 1 - gilded/src/lib.rs | 113 +++++++++++++++++++++++++++++++++++----------- 2 files changed, 86 insertions(+), 28 deletions(-) diff --git a/gilded/Cargo.toml b/gilded/Cargo.toml index 19e2cd1..6faa6b8 100644 --- a/gilded/Cargo.toml +++ b/gilded/Cargo.toml @@ -20,4 +20,3 @@ camino = "1.1.9" diffy = "0.4.0" nu-glob = "0.101.0" unicode-width = "0.2.0" - diff --git a/gilded/src/lib.rs b/gilded/src/lib.rs index 28005b4..368d250 100644 --- a/gilded/src/lib.rs +++ b/gilded/src/lib.rs @@ -62,8 +62,13 @@ use std::cell::RefCell; use std::cell::RefMut; use std::env; use std::fs; +use std::fs::File; +use std::io; +use std::io::Write; +use std::panic::Location; use std::path::Path; use std::str; +use std::time::SystemTime; use camino::Utf8Path; @@ -80,9 +85,12 @@ pub const REGENERATE: &str = "GILDED_REGENERATE"; /// [`#[gilded::test]`][test] macro. pub struct Suite { name: &'static str, + glob: &'static str, crate_root: &'static Path, test_root: &'static Utf8Path, + location: &'static Location<'static>, run: fn(&Test), + poisoned: bool, } impl Suite { @@ -96,47 +104,77 @@ impl Suite { #[doc(hidden)] pub fn new( name: &'static str, + glob: &'static str, crate_root: &'static str, + location: &'static Location<'static>, run: fn(&Test), paths: &[&'static str], ) -> Suite { let crate_root = Path::new(crate_root); - let Some(mut common_prefix) = paths.first().copied() else { - return Suite { - name, - crate_root, - run, - test_root: Utf8Path::new(""), - }; - }; + let common_prefix = paths.first().copied().map(|mut common_prefix| { + common_prefix = Utf8Path::new(common_prefix) + .parent() + .map(Utf8Path::as_str) + .unwrap_or(""); - common_prefix = Utf8Path::new(common_prefix) - .parent() - .map(Utf8Path::as_str) - .unwrap_or(""); + let sep = std::path::MAIN_SEPARATOR; + for path in &paths[1..] { + let common = common_prefix.split_inclusive(sep); + let chunks = path.split_inclusive(sep); - let sep = std::path::MAIN_SEPARATOR; - for path in &paths[1..] { - let common = common_prefix.split_inclusive(sep); - let chunks = path.split_inclusive(sep); + let len = common + .zip(chunks) + .take_while(|(a, b)| a == b) + .map(|(a, _)| a.len()) + .sum(); - let len = common - .zip(chunks) - .take_while(|(a, b)| a == b) - .map(|(a, _)| a.len()) - .sum(); + common_prefix = &common_prefix[..len]; + } - common_prefix = &common_prefix[..len]; - } + common_prefix = common_prefix.trim_end_matches(sep); + common_prefix + }); - common_prefix = common_prefix.trim_end_matches(sep); - Suite { + let mut suite = Suite { name, + glob, crate_root, + location, run, - test_root: Utf8Path::new(common_prefix), + test_root: Utf8Path::new(common_prefix.unwrap_or_default()), + poisoned: false, + }; + + if suite.inputs_have_changed() { + // Poke the mtime of the file that contains the #[gilded::test], and then + // fail it. + // + // Alas, finding this file may be non-trivial, since the Location path + // will be rooted at the Cargo workspace, but the cwd will be the manifest + // dir of the crate we are testing. + let mut cwd = env::current_dir().unwrap(); + let mut test_file = cwd.join(suite.location.file()); + while !test_file.exists() { + cwd.pop(); + test_file = cwd.join(suite.location.file()); + } + + // Bump the mtime. + File::open(test_file) + .unwrap() + .set_modified(SystemTime::now()) + .unwrap(); + + let _ = write!( + io::stderr(), // Dodge stderr capture. + "\nerror: #[gilded::test] inputs for `{}` are out of date; rerun the test to pick up updated inputs\n\n", + suite.name, + ); + suite.poisoned = true; } + + suite } /// Executes a test in this test suite with the given data. Panics to signal @@ -146,6 +184,10 @@ impl Suite { #[doc(hidden)] #[track_caller] pub fn run(&'static self, path: &'static str, text: &'static [u8]) { + if self.poisoned { + std::process::exit(128); + } + let path = Utf8Path::new(path); let file = self.crate_root.join(path); let lock = self.crate_root.join("GILDED_CHANGED"); @@ -217,6 +259,21 @@ impl Suite { "golden files have changed: verify changes and then delete {lock_name}", ) } + + /// Checks for files that ostensibly belong to this suite which have changed, + /// by comparing the mtime of the files with the mtime of the executable. + fn inputs_have_changed(&self) -> bool { + let this = env::current_exe().expect("argv[0] missing for test binary"); + let built_at = this.metadata().unwrap().modified().unwrap(); + + nu_glob::glob(self.glob) + .unwrap() + .filter_map(Result::ok) + .any(|path| { + let mtime = path.metadata().unwrap().modified().unwrap(); + mtime > built_at + }) + } } /// A handle for a single golden test case. @@ -298,7 +355,7 @@ fn split(orig: RefMut<[T; N]>) -> [RefMut; N] { #[macro_export] macro_rules! __test__ { ( - #[test($($_:tt)*)] + #[test($glob:literal)] $(#[$attr:meta])* fn $name:ident($($args:tt)*) { $($body:tt)* } $($tt:tt)* @@ -309,7 +366,9 @@ macro_rules! __test__ { pub static __SUITE__: ::std::sync::LazyLock<$crate::Suite> = ::std::sync::LazyLock::new(|| $crate::Suite::new( stringify!($name), + $glob, env!("CARGO_MANIFEST_DIR"), + ::std::panic::Location::caller(), |$($args)*| -> () { $($body)* }, &$crate::__test__!(@paths[] $($tt)*), )); From c9f654bc24bd210718edb47fa6e81f68cab67aca Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Sat, 25 Jan 2025 15:06:23 -0800 Subject: [PATCH 08/11] chore: Missing documentation --- gilded/attr/lib.rs | 5 +++++ ilex/src/lib.rs | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/gilded/attr/lib.rs b/gilded/attr/lib.rs index 5d69268..3ac56f6 100644 --- a/gilded/attr/lib.rs +++ b/gilded/attr/lib.rs @@ -1,6 +1,11 @@ //! Implementation detail of `gilded`. proc2decl::fs_bridge! { + /// Turns a function into a golden test suite. /// + /// See the [crate documentation][crate] for more information on how to use + /// this attribute. + /// + /// [crate]: https://docs.rs/gilded macro #[test] => gilded::__test__; } diff --git a/ilex/src/lib.rs b/ilex/src/lib.rs index 60f7a17..493fb38 100644 --- a/ilex/src/lib.rs +++ b/ilex/src/lib.rs @@ -59,9 +59,9 @@ //! can. This will make it easier for you to just pin a version and avoid //! thinking about this problem. //! -//! Diagnostics are completely unstable. Don't try to parse them, don't write -//! golden tests against them. If you must, use [`testing::check_report()`] so -//! that you can regenerate them. +//! Diagnostics are completely unstable. Don't try to parse them, and if you +//! need to test them, using something like [`gilded`](https://docs.rs/gilded) +//! to make it easy to regenerate when the output changes. //! //! # Quick Start //! From d1516623f9246361b9e186ab69591f2c1ca153cf Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Sat, 25 Jan 2025 15:10:22 -0800 Subject: [PATCH 09/11] gilded: Move Doc into its own module --- gilded/src/doc/emit.rs | 139 -------------------------------------- gilded/src/doc/json.rs | 24 +++---- gilded/src/doc/mod.rs | 57 ++++++++-------- gilded/src/doc/yaml.rs | 22 +++--- gilded/src/lib.rs | 6 +- ilex/src/token/summary.rs | 2 +- 6 files changed, 55 insertions(+), 195 deletions(-) delete mode 100644 gilded/src/doc/emit.rs diff --git a/gilded/src/doc/emit.rs b/gilded/src/doc/emit.rs deleted file mode 100644 index 1ec3c4a..0000000 --- a/gilded/src/doc/emit.rs +++ /dev/null @@ -1,139 +0,0 @@ -use std::fmt; -use std::io; -use std::mem; - -use byteyarn::YarnRef; -use unicode_width::UnicodeWidthStr; - -use crate::doc::DocFormat; -use crate::doc::DocOptions; - -/// An indentation-aware pretty-printer. -pub struct Printer<'a> { - out: &'a mut dyn io::Write, - options: &'a DocOptions, - indent: usize, - at_newline: bool, -} - -impl<'a> Printer<'a> { - /// Returns a new printer with the given output and options. - pub fn new(out: &'a mut dyn io::Write, options: &'a DocOptions) -> Self { - Self { - out, - options, - indent: 0, - at_newline: true, - } - } - - /// Updates the indentation level with the given diff. - pub fn indent(&mut self, diff: isize) { - self.indent = self.indent.checked_add_signed(diff).unwrap(); - } - - /// Writes indentation, if necessary. - pub fn write_indent(&mut self) -> io::Result<()> { - if !mem::take(&mut self.at_newline) { - return Ok(()); - } - self.at_newline = false; - self.write_spaces(self.indent * self.options.tab_width) - } - - /// Writes len ASCII spaces to the output. - pub fn write_spaces(&mut self, mut len: usize) -> io::Result<()> { - const SPACES: &[u8; 32] = b" "; - - while len > SPACES.len() { - self.out.write_all(SPACES)?; - len -= SPACES.len(); - } - self.out.write_all(&SPACES[..len])?; - Ok(()) - } - - pub fn yaml_list_item(&mut self) -> io::Result<()> { - writeln!(self)?; - self - .write_indent((self.indent * self.options.tab_width).saturating_sub(2))?; - write!(self, "- ") - } - - pub fn escaped_string(&mut self, data: YarnRef<[u8]>) -> io::Result<()> { - let yaml = self.options.format == DocFormat::Yaml; - - if yaml { - if let Some(ident) = is_ident(data) { - return write!(self, "{ident}"); - } - } - - write!(self, "\"")?; - for chunk in data.utf8_chunks() { - let chunk = match chunk { - Ok(s) => s, - Err(e) => { - for b in e { - write!(self, "\\x{b:02x}")?; - } - continue; - } - }; - for c in chunk.chars() { - match c { - '\0' if yaml => write!(self, "\\0")?, - '\n' => write!(self, "\\n")?, - '\r' => write!(self, "\\r")?, - '\t' => write!(self, "\\t")?, - '\\' => write!(self, "\\\\")?, - '\"' => write!(self, "\\\"")?, - c if !c.is_control() => write!(self, "{c}")?, - c if yaml && c.is_ascii() => write!(self, "\\x{:02x}", c as u32)?, - c => { - for u in c.encode_utf16(&mut [0, 0]) { - write!(self, "\\u{u:04x}")?; - } - } - } - } - } - write!(self, "\"") - } - -} - -impl io::Write for Printer<'_> { - fn write(&mut self, buf: &[u8]) -> io::Result { - for line in buf.split_inclusive(|&b| b == b'\n') { - self.write_indent()?; - self.out.write_all(line)?; - if line.ends_with(b"\n") { - self.at_newline = true; - } - } - Ok(buf.len()) - } - - fn flush(&mut self) -> io::Result<()> { - self.out.flush() - } -} - -/// Returns the number of terminal columns that the printed output of `d` takes -/// up. -pub fn width(d: &dyn fmt::Display) -> usize { - use fmt::Write; - - struct Counter(usize); - impl Write for Counter { - fn write_str(&mut self, s: &str) -> fmt::Result { - self.0 += s.width(); - Ok(()) - } - } - - let mut counter = Counter(0); - let _ = write!(&mut counter, "{}", d); - counter.0 -} \ No newline at end of file diff --git a/gilded/src/doc/json.rs b/gilded/src/doc/json.rs index fd2324c..214ce0b 100644 --- a/gilded/src/doc/json.rs +++ b/gilded/src/doc/json.rs @@ -7,14 +7,10 @@ use allman::Tag; use byteyarn::YarnRef; use crate::doc::Doc; -use crate::doc::DocOptions; -use crate::doc::Value; +use crate::doc::Elem; +use crate::doc::Options; -pub fn build<'t>( - options: &DocOptions, - doc: &Doc<'t>, - out: &mut allman::Doc<'t>, -) { +pub fn build<'t>(options: &Options, doc: &Doc<'t>, out: &mut allman::Doc<'t>) { let is_array = doc.entries.iter().all(|(k, _)| k.is_none()); if is_array { out.tag_with(Tag::Group(options.max_array_width), |out| { @@ -60,24 +56,24 @@ pub fn build<'t>( } } -fn value<'t>(options: &DocOptions, v: &Value<'t>, out: &mut allman::Doc<'t>) { +fn value<'t>(options: &Options, v: &Elem<'t>, out: &mut allman::Doc<'t>) { match v { - Value::Bool(v) => { + Elem::Bool(v) => { out.tag(v.to_string()); } - Value::Int(v) => { + Elem::Int(v) => { out.tag(v.to_string()); } - Value::UInt(v) => { + Elem::UInt(v) => { out.tag(v.to_string()); } - Value::Fp(v) => { + Elem::Fp(v) => { out.tag(v.to_string()); } - Value::String(v) => { + Elem::String(v) => { out.tag(Escape(v).to_string()); } - Value::Doc(v) => build(options, v, out), + Elem::Doc(v) => build(options, v, out), } } diff --git a/gilded/src/doc/mod.rs b/gilded/src/doc/mod.rs index 6189383..00762d8 100644 --- a/gilded/src/doc/mod.rs +++ b/gilded/src/doc/mod.rs @@ -1,3 +1,5 @@ +//! Readable test output generating from tree-structured data. + use std::io; use std::io::Write; @@ -11,26 +13,28 @@ mod yaml; /// Golden tests that output tree-shaped data can use `Doc` to generate /// diff-friendly, readable output. pub struct Doc<'a> { - entries: Vec<(Option>, Value<'a>)>, + entries: Vec<(Option>, Elem<'a>)>, } -// The format output to use when rendering a document. +/// The format output to use when rendering a document. #[derive(Clone, Copy, PartialEq, Eq, Debug)] -pub enum DocFormat { +pub enum Format { + /// Output as YAML. Yaml, + /// Output as JSON. Json, } -impl Default for DocFormat { +impl Default for Format { fn default() -> Self { Self::Yaml } } /// Options for rendering a [`Doc`] as a string. -pub struct DocOptions { +pub struct Options { // The format to output in; defaults to YAML. - pub format: DocFormat, + pub format: Format, // The number of spaces to use for indentation. pub tab_width: usize, @@ -42,10 +46,10 @@ pub struct DocOptions { pub max_object_width: usize, } -impl Default for DocOptions { +impl Default for Options { fn default() -> Self { Self { - format: DocFormat::default(), + format: Format::default(), tab_width: 2, max_columns: 80, max_array_width: 50, @@ -59,7 +63,7 @@ impl Default for DocOptions { /// All of the primitive number types and types which convert to `YarnBox<[u8]>` /// can be used as `Doc` values. `Option` for `T: DocValue` can also be /// used, and will only be inserted if it is `Some`. -pub trait DocValue<'a> { +pub trait Value<'a> { fn append_to(self, doc: &mut Doc<'a>); } @@ -72,16 +76,13 @@ impl<'a> Doc<'a> { /// Returns a new `Doc` with a single entry. pub fn single( name: impl Into>, - value: impl DocValue<'a>, + value: impl Value<'a>, ) -> Self { Self::new().entry(name, value) } /// Appends a sequence of values to this document. - pub fn push( - mut self, - elements: impl IntoIterator>, - ) -> Self { + pub fn push(mut self, elements: impl IntoIterator>) -> Self { for e in elements { e.append_to(&mut self); } @@ -92,7 +93,7 @@ impl<'a> Doc<'a> { pub fn entry( mut self, name: impl Into>, - value: impl DocValue<'a>, + value: impl Value<'a>, ) -> Self { let prev = self.entries.len(); value.append_to(&mut self); @@ -106,13 +107,13 @@ impl<'a> Doc<'a> { pub fn array( self, name: impl Into>, - elements: impl IntoIterator>, + elements: impl IntoIterator>, ) -> Self { self.entry(name, Self::new().push(elements)) } // Converts this document into a string, using the given options. - pub fn to_string(&self, options: &DocOptions) -> String { + pub fn to_string(&self, options: &Options) -> String { let mut out = Vec::new(); let _ = self.render(&mut out, options); String::from_utf8(out).unwrap() @@ -123,17 +124,17 @@ impl<'a> Doc<'a> { pub fn render( &self, out: &mut dyn Write, - options: &DocOptions, + options: &Options, ) -> io::Result<()> { let mut doc = allman::Doc::new(); match options.format { - DocFormat::Yaml => yaml::build( + Format::Yaml => yaml::build( yaml::Args { options, root: true, in_list: false }, self, &mut doc, ), - DocFormat::Json => json::build(options, self, &mut doc), + Format::Json => json::build(options, self, &mut doc), } doc.render(out, &allman::Options { max_columns: options.max_columns }) @@ -146,7 +147,7 @@ impl Default for Doc<'_> { } } -enum Value<'a> { +enum Elem<'a> { Bool(bool), Int(i128), UInt(u128), @@ -155,24 +156,24 @@ enum Value<'a> { Doc(Doc<'a>), } -impl<'a, T: DocValue<'a>> DocValue<'a> for Option { +impl<'a, T: Value<'a>> Value<'a> for Option { fn append_to(self, doc: &mut Doc<'a>) { if let Some(v) = self { v.append_to(doc) } } } -impl<'a> DocValue<'a> for Doc<'a> { +impl<'a> Value<'a> for Doc<'a> { fn append_to(self, doc: &mut Doc<'a>) { - doc.entries.push((None, Value::Doc(self))) + doc.entries.push((None, Elem::Doc(self))) } } macro_rules! impl_from { ($({$($T:ty),*} => $V:ident,)*) => {$($( - impl<'a> DocValue<'a> for $T { + impl<'a> Value<'a> for $T { fn append_to(self, doc: &mut Doc<'a>) { - doc.entries.push((None, Value::$V(self as _))) + doc.entries.push((None, Elem::$V(self as _))) } } )*)*} @@ -187,9 +188,9 @@ impl_from! { macro_rules! impl_from_yarn { ($(for<$lt:lifetime> $($T:ty),* => $U:ty,)*) => {$($( - impl<$lt> DocValue<$lt> for $T { + impl<$lt> Value<$lt> for $T { fn append_to(self, doc: &mut Doc<$lt>) { - doc.entries.push((None, Value::String(<$U>::from(self).into_bytes()))) + doc.entries.push((None, Elem::String(<$U>::from(self).into_bytes()))) } } )*)*} diff --git a/gilded/src/doc/yaml.rs b/gilded/src/doc/yaml.rs index 6c85af0..68e5105 100644 --- a/gilded/src/doc/yaml.rs +++ b/gilded/src/doc/yaml.rs @@ -7,13 +7,13 @@ use allman::Tag; use byteyarn::YarnRef; use crate::doc::Doc; -use crate::doc::DocOptions; -use crate::doc::Value; +use crate::doc::Elem; +use crate::doc::Options; pub struct Args<'a> { pub root: bool, pub in_list: bool, - pub options: &'a DocOptions, + pub options: &'a Options, } pub fn build<'t>(args: Args, doc: &'t Doc<'t>, out: &mut allman::Doc<'t>) { @@ -62,7 +62,7 @@ pub fn build<'t>(args: Args, doc: &'t Doc<'t>, out: &mut allman::Doc<'t>) { out.tag(ident.to_box()); let mut entry = entry; - while let Value::Doc(d) = entry { + while let Elem::Doc(d) = entry { let [(Some(k), v)] = d.entries.as_slice() else { break }; let Some(ident) = is_ident(k.as_bytes()) else { break }; @@ -83,21 +83,21 @@ pub fn build<'t>(args: Args, doc: &'t Doc<'t>, out: &mut allman::Doc<'t>) { } } -fn value<'t>(args: Args, v: &'t Value<'t>, out: &mut allman::Doc<'t>) { +fn value<'t>(args: Args, v: &'t Elem<'t>, out: &mut allman::Doc<'t>) { match v { - Value::Bool(v) => { + Elem::Bool(v) => { out.tag(v.to_string()); } - Value::Int(v) => { + Elem::Int(v) => { out.tag(v.to_string()); } - Value::UInt(v) => { + Elem::UInt(v) => { out.tag(v.to_string()); } - Value::Fp(v) => { + Elem::Fp(v) => { out.tag(v.to_string()); } - Value::String(v) => { + Elem::String(v) => { if is_raw_string(v.as_ref()) { out.tag("|").tag(Tag::Break(1)).tag_with( Tag::Indent(args.options.tab_width as isize), @@ -109,7 +109,7 @@ fn value<'t>(args: Args, v: &'t Value<'t>, out: &mut allman::Doc<'t>) { } out.tag(Escape(v).to_string()); } - Value::Doc(v) => build(args, v, out), + Elem::Doc(v) => build(args, v, out), } } diff --git a/gilded/src/lib.rs b/gilded/src/lib.rs index 368d250..a62a632 100644 --- a/gilded/src/lib.rs +++ b/gilded/src/lib.rs @@ -16,6 +16,9 @@ //! changes can quickly regenerate the test output by using the output of the //! test itself. Diffs can be examined in code review directly. //! +//! This crate also provides the [`doc::Doc`] type, enabling quick-and-dirty +//! construction of highly readable structured tree data for golden outputs. +//! //! # Defining a Test //! //! A `gilded` test is defined like so: @@ -74,8 +77,7 @@ use camino::Utf8Path; pub use gilded_attr::test; -mod doc; -pub use doc::*; +pub mod doc; /// The environment variable that is checked to decide whether or not to /// regenerate goldens. diff --git a/ilex/src/token/summary.rs b/ilex/src/token/summary.rs index 459a9ea..5a2fed1 100644 --- a/ilex/src/token/summary.rs +++ b/ilex/src/token/summary.rs @@ -1,6 +1,6 @@ //! Implementation of `Stream::summary()`. -use gilded::Doc; +use gilded::doc::Doc; use crate::file::Span; use crate::file::Spanned; From c456960255df1abd39798bef618b041f104dad2b Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Sat, 25 Jan 2025 15:11:39 -0800 Subject: [PATCH 10/11] gilded: Derive Clone for Doc --- gilded/src/doc/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gilded/src/doc/mod.rs b/gilded/src/doc/mod.rs index 00762d8..7d3874d 100644 --- a/gilded/src/doc/mod.rs +++ b/gilded/src/doc/mod.rs @@ -12,6 +12,7 @@ mod yaml; /// /// Golden tests that output tree-shaped data can use `Doc` to generate /// diff-friendly, readable output. +#[derive(Clone)] pub struct Doc<'a> { entries: Vec<(Option>, Elem<'a>)>, } @@ -147,6 +148,7 @@ impl Default for Doc<'_> { } } +#[derive(Clone)] enum Elem<'a> { Bool(bool), Int(i128), From 6a207ceb0009e5fdd3649114d7e54e1b87055421 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Sat, 25 Jan 2025 15:24:19 -0800 Subject: [PATCH 11/11] docs: Update the READMEs --- README.md | 9 +++++++ allman/README.md | 42 +++++++++++++++++++++++++++++ allman/src/lib.rs | 2 +- gilded/README.md | 64 ++++++++++++++++++++++++++++++++++++++++++++ proc2decl/Cargo.toml | 2 ++ proc2decl/README.md | 28 +++++++++++++++++++ 6 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 allman/README.md create mode 100644 gilded/README.md create mode 100644 proc2decl/README.md diff --git a/README.md b/README.md index cb95104..3c61296 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,15 @@ depend on each other. - â›Šī¸ [`ilex`](https://github.com/mcy/strings/tree/main/ilex) - The last lexer I ever want to write. +- đŸ—’ī¸ [`allman`](https://github.com/mcy/strings/tree/main/allman) - A DOM for + code formatters. + +- 👑 [`gilded`](https://github.com/mcy/strings/tree/main/gilded) - How I learned + to stop worrying and love golden testing. + +- đŸ’ĸ [`proc2decl`](https://github.com/mcy/strings/tree/main/proc2decl) - Proc + macros suck! + --- All libraries are Apache-2.0 licensed. diff --git a/allman/README.md b/allman/README.md new file mode 100644 index 0000000..aa9e33c --- /dev/null +++ b/allman/README.md @@ -0,0 +1,42 @@ +# allman + +`allman` - A code formatting and line reflowing toolkit. đŸ—’ī¸đŸ–‹ī¸ + +`allman::Doc` is a DOM-like structure that specifies how indentation, +like breaking, and reflowing should be handled. It is a tree of `Tag`s +that dictate layout information for the source code to format. + +For example, the Allman brace style (for which this crate is named) can +be implemented as follows: + +```rust +// flat: fn foo() { ... } +// +// broken: +// fn foo() +// { +// // ... +// } +Doc::new() + .tag("fn") + .tag(Tag::Space) + .tag("foo") + .tag("(").tag(")") + .tag_with(Tag::Group(40), |doc| { + doc + .tag_if(Tag::Space, If::Flat) + .tag_if(Tag::Break(1), If::Broken) + .tag("{") + .tag_if(Tag::Space, If::Flat) + .tag_if(Tag::Break(1), If::Broken) + .tag_with(Tag::Indent(2), |doc| { + // Brace contents here... + }) + .tag_if(Tag::Space, If::Flat) + .tag_if(Tag::Break(1), If::Broken) + .tag("}"); + }); +``` + +When calling `Doc::render()`, the layout algorithm will determine whether +`Tag::Group`s should be "broken", i.e., laid out with newlines inside. diff --git a/allman/src/lib.rs b/allman/src/lib.rs index f3bb0bf..439457c 100644 --- a/allman/src/lib.rs +++ b/allman/src/lib.rs @@ -1,4 +1,4 @@ -//! `allman` đŸ—’ī¸đŸ–‹ī¸ - A code formatting and line reflowing toolkit. +//! `allman` - A code formatting and line reflowing toolkit. đŸ—’ī¸đŸ–‹ī¸ //! //! [`allman::Doc`][Doc] is a DOM-like structure that specifies how indentation, //! like breaking, and reflowing should be handled. It is a tree of [`Tag`]s diff --git a/gilded/README.md b/gilded/README.md new file mode 100644 index 0000000..97a10e2 --- /dev/null +++ b/gilded/README.md @@ -0,0 +1,64 @@ +# gilded + +`gilded` - Easy-peesy golden testing. 👑 + +## Why Golden Testing? + +A "golden test" is a test that transforms data in some way, and validates it +by diffing it against an expected result: the "golden". + +This is especially useful for testing scenarios that consume an input file +(say, a source code file, for testing a compiler) and generate structured, +diffable textual output (such as JSON or CSV data, or even a `Debug`). + +Golden tests are best for cases where the output must be deterministic, and +where capturing fine-grained detail is valuable. + +Because they simply compare the result to an expected value byte-for-byte, +changes can quickly regenerate the test output by using the output of the +test itself. Diffs can be examined in code review directly. + +This crate also provides the `doc::Doc` type, enabling quick-and-dirty +construction of highly readable structured tree data for golden outputs. + +## Defining a Test + +A `gilded` test is defined like so: + +```rust +#[gilded::test("testdata/**/*.txt")] +fn my_test(test: &gilded::Test) { + // ... +} +``` + +`my_test` will be run as a separate unit test for every file (relative to +the crate root) which matches the glob passed to the attribute. The input +file's path and contents can be accessed through the `Test` accessors. + +To specify golden outputs, use `Test::outputs()`. This specifies the +file extension for the golden, and its computed contents. The extension is +used to construct the path of the result. If the input is `foo/bar.txt`, and +the extension for this output is `csv`, the output will be read/written to +`foo/bar.csv`. + +Panicking within the test body will fail the test as normal, tests should +not contain output assertions; those are handled by the framework. + +## Generating Goldens + +Once the test is created, simply set the `GILDED_REGENERATE` environment +variable: `GILDED_REGENERATE=1 cargo test`. + +To regenerate a specific test, simply pass its name as a filter to the test. +See `cargo test -- --help` for available flags.` + +Regenerating goldens will cause a `GILDED_CHANGED` file to be crated at the +crate root, which will cause all `gilded` tests in the crate to fail until +it is deleted. Deleting it forces the user to acknowledge that goldens have +been regenerated, to avoid blindly committing them. + +## Known Issues + +Golden tests can run under MIRI but have extremely large overhead. For the +time being, they are `#[cfg]`'d out in MIRI mode. diff --git a/proc2decl/Cargo.toml b/proc2decl/Cargo.toml index 3a14d56..30ddd97 100644 --- a/proc2decl/Cargo.toml +++ b/proc2decl/Cargo.toml @@ -1,6 +1,8 @@ [package] name = "proc2decl" version = "0.1.0" +description = "Write less frickin' proc macro code" + edition.workspace = true authors.workspace = true homepage.workspace = true diff --git a/proc2decl/README.md b/proc2decl/README.md new file mode 100644 index 0000000..5177891 --- /dev/null +++ b/proc2decl/README.md @@ -0,0 +1,28 @@ +# proc2decl + +`proc2decl` exists for one reason only: because proc macros are a toxic +ecosystem. + +Sometimes, you want to use an attribute to define a macro. Unfortunately, +Rust does not support declarative macros (also called macros-by-example) +for attributes, for reasons that essentially boil down to cookie-licking. + +This crate exists for one purpose only, and that is ot facilitate writing +declarative macros that an attribute converts into. + +## How Uo Use + +1. Define the macro-by-example you wish to use as the main implementation of + your attribute or derive. + +2. Crate a proc-macro crate. This is where the documentation for your + attribute will need to live. Your actual crate should depend on this + crate. + +3. Use `bridge!()` to define your bridge proc macros. These + macro calls should be documented, since their doc comments are the ones + that will appear in rustdoc for your macros. + +4. `pub use` the macros in your actual crate. + +Proc macros suck!