diff --git a/Makefile b/Makefile index d3173910..be90c40c 100644 --- a/Makefile +++ b/Makefile @@ -74,6 +74,7 @@ afl_quick: cargo afl build --no-default-features --release --config profile.release.debug-assertions=true && \ AFL_NO_UI=1 AFL_BENCH_UNTIL_CRASH=1 \ cargo afl fuzz -i in -o out -V 60 target/release/${AFL_TARGET}) + [ -z "$$(find tests/afl/out/default/crashes -type f -name 'id:*')" ] afl_crash: set +e; \ diff --git a/README.md b/README.md index 1809a7d6..b1176366 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Jotdown supports Rust edition 2021, i.e. Rust 1.56 and above. ## Usage Jotdown is primarily a parsing library but also has a minimal CLI -implementation and a simple online demo version. +implementation and a simple web demo version. ### Library @@ -62,9 +62,12 @@ It will be placed in `~/.cargo/bin/jotdown`. ### Web demo -A version of Jotdown compiled to WebAssembly and runnable in a web browser is -available at . It can also be run -locally: +The web demo is a version of Jotdown compiled to WebAssembly and runnable in a +web browser. It is useful for experimenting with the djot syntax and exploring +what events are emitted or what output is rendered. + +An online version is available at . It +can also be run locally: ``` $ cd examples/jotdown_wasm diff --git a/bench/criterion/main.rs b/bench/criterion/main.rs index 5ff477f0..42835a53 100644 --- a/bench/criterion/main.rs +++ b/bench/criterion/main.rs @@ -51,7 +51,9 @@ fn gen_html(c: &mut criterion::Criterion) { || jotdown::Parser::new(input).collect::>(), |p| { let mut s = String::new(); - jotdown::html::Renderer.push(p.into_iter(), &mut s).unwrap(); + jotdown::html::Renderer::default() + .push(p.into_iter(), &mut s) + .unwrap(); s }, criterion::BatchSize::SmallInput, @@ -62,6 +64,60 @@ fn gen_html(c: &mut criterion::Criterion) { } criterion_group!(html, gen_html); +fn gen_html_borrow(c: &mut criterion::Criterion) { + let mut group = c.benchmark_group("html_borrow"); + for (name, input) in bench_input::INPUTS { + group.throughput(criterion::Throughput::Elements( + jotdown::Parser::new(input).count() as u64, + )); + group.bench_with_input( + criterion::BenchmarkId::from_parameter(name), + input, + |b, &input| { + b.iter_batched( + || jotdown::Parser::new(input).collect::>(), + |p| { + let mut s = String::new(); + jotdown::html::Renderer::default() + .push_borrowed(p.as_slice().iter(), &mut s) + .unwrap(); + s + }, + criterion::BatchSize::SmallInput, + ); + }, + ); + } +} +criterion_group!(html_borrow, gen_html_borrow); + +fn gen_html_clone(c: &mut criterion::Criterion) { + let mut group = c.benchmark_group("html_clone"); + for (name, input) in bench_input::INPUTS { + group.throughput(criterion::Throughput::Elements( + jotdown::Parser::new(input).count() as u64, + )); + group.bench_with_input( + criterion::BenchmarkId::from_parameter(name), + input, + |b, &input| { + b.iter_batched( + || jotdown::Parser::new(input).collect::>(), + |p| { + let mut s = String::new(); + jotdown::html::Renderer::default() + .push(p.iter().cloned(), &mut s) + .unwrap(); + s + }, + criterion::BatchSize::SmallInput, + ); + }, + ); + } +} +criterion_group!(html_clone, gen_html_clone); + fn gen_full(c: &mut criterion::Criterion) { let mut group = c.benchmark_group("full"); for (name, input) in bench_input::INPUTS { @@ -72,7 +128,7 @@ fn gen_full(c: &mut criterion::Criterion) { |b, &input| { b.iter_with_large_drop(|| { let mut s = String::new(); - jotdown::html::Renderer + jotdown::html::Renderer::default() .push(jotdown::Parser::new(input), &mut s) .unwrap(); s @@ -83,4 +139,4 @@ fn gen_full(c: &mut criterion::Criterion) { } criterion_group!(full, gen_full); -criterion_main!(block, inline, html, full); +criterion_main!(block, inline, html, html_borrow, html_clone, full); diff --git a/bench/iai/main.rs b/bench/iai/main.rs index e606d5f3..d948bb68 100644 --- a/bench/iai/main.rs +++ b/bench/iai/main.rs @@ -12,7 +12,7 @@ fn block_inline() -> Option> { fn full() -> String { let mut s = String::new(); - jotdown::html::Renderer + jotdown::html::Renderer::default() .push(jotdown::Parser::new(bench_input::ALL), &mut s) .unwrap(); s diff --git a/examples/jotdown_wasm/Makefile b/examples/jotdown_wasm/Makefile index 5c0c34c1..d2a2b92e 100644 --- a/examples/jotdown_wasm/Makefile +++ b/examples/jotdown_wasm/Makefile @@ -7,9 +7,19 @@ ${WASM}: ${SRC} wasm: ${WASM} -run: ${WASM} +index.html: Makefile demo.html + echo '' > $@ + echo '' >> $@ + echo 'Jotdown Demo' >> $@ + echo '' >> $@ + echo '' >> $@ + cat demo.html >> $@ + echo '' >> $@ + echo '' >> $@ + +run: ${WASM} index.html python -m http.server clean: - rm -rf pkg + rm -rf pkg index.html cargo clean diff --git a/examples/jotdown_wasm/demo.html b/examples/jotdown_wasm/demo.html new file mode 100644 index 00000000..1cd918ec --- /dev/null +++ b/examples/jotdown_wasm/demo.html @@ -0,0 +1,50 @@ +
+ +
+ +
+
+
*Hello world!*
+

+  
+
diff --git a/examples/jotdown_wasm/index.html b/examples/jotdown_wasm/index.html deleted file mode 100644 index 91550963..00000000 --- a/examples/jotdown_wasm/index.html +++ /dev/null @@ -1,33 +0,0 @@ - -
- -
*Hello world!*
-
- diff --git a/examples/jotdown_wasm/src/lib.rs b/examples/jotdown_wasm/src/lib.rs index 3ab7fb01..ba72c80f 100644 --- a/examples/jotdown_wasm/src/lib.rs +++ b/examples/jotdown_wasm/src/lib.rs @@ -1,12 +1,64 @@ use wasm_bindgen::prelude::*; use jotdown::Render; +use std::fmt::Write; #[must_use] #[wasm_bindgen] pub fn jotdown_render(djot: &str) -> String { let events = jotdown::Parser::new(djot); let mut html = String::new(); - jotdown::html::Renderer.push(events, &mut html).unwrap(); + jotdown::html::Renderer::default() + .push(events, &mut html) + .unwrap(); html } + +#[must_use] +#[wasm_bindgen] +pub fn jotdown_parse(djot: &str) -> String { + jotdown::Parser::new(djot) + .map(|e| format!("{:?}\n", e)) + .collect() +} + +#[must_use] +#[wasm_bindgen] +pub fn jotdown_parse_indent(djot: &str) -> String { + let mut level = 0; + let mut out = String::new(); + for e in jotdown::Parser::new(djot) { + if !matches!(e, jotdown::Event::End(..)) { + // use non-breaking space for indent because normal spaces gets squeezed by browser + let nbsp = '\u{00a0}'; + (0..4 * level).for_each(|_| out.push(nbsp)); + } + match e { + jotdown::Event::Start(c, attrs) => { + level += 1; + if c.is_block() { + out.push('['); + } else { + out.push('('); + } + out.write_fmt(format_args!("{:?}", c)).unwrap(); + if c.is_block() { + out.push(']'); + } else { + out.push(')'); + } + if !attrs.is_empty() { + out.write_fmt(format_args!(" {:?}", attrs)).unwrap(); + } + out.push('\n'); + } + jotdown::Event::End(..) => { + level -= 1; + } + e => { + out.write_fmt(format_args!("{:?}\n", e)).unwrap(); + } + }; + } + out +} diff --git a/src/attr.rs b/src/attr.rs index cd6eaaa7..1220d3ae 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -1,29 +1,33 @@ use crate::CowStr; -use crate::DiscontinuousString; -use crate::Span; -use std::borrow::Cow; use std::fmt; -use State::*; - -pub(crate) fn parse<'s, S: DiscontinuousString<'s>>(chars: S) -> Attributes<'s> { +pub(crate) fn parse(src: &str) -> Attributes { let mut a = Attributes::new(); - a.parse(chars); + a.parse(src); a } pub fn valid>(chars: I) -> (usize, bool) { + use State::*; + let mut has_attr = false; - let mut p = Parser::new(chars); - for e in &mut p { - match e { - Element::Class(..) | Element::Identifier(..) | Element::Attribute(..) => { - has_attr = true; - } - Element::Invalid => return (0, false), + let mut n = 0; + let mut state = Start; + for c in chars { + n += 1; + state = state.step(c); + match state { + Class | Identifier | Value | ValueQuoted => has_attr = true, + Done | Invalid => break, + _ => {} } } - (p.pos, has_attr) + + if matches!(state, Done) { + (n, has_attr) + } else { + (0, false) + } } /// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying, @@ -39,6 +43,15 @@ impl<'s> AttributeValue<'s> { pub fn parts(&'s self) -> AttributeValueParts<'s> { AttributeValueParts { ahead: &self.raw } } + + fn extend(&mut self, s: &'s str) { + if self.raw.is_empty() { + self.raw = s.into(); + } else { + self.raw.push(' '); + self.raw.push_str(s); + } + } } impl<'s> From<&'s str> for AttributeValue<'s> { @@ -98,7 +111,7 @@ impl<'s> Iterator for AttributeValueParts<'s> { // Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra // indirection instead of always 24 bytes. #[allow(clippy::box_vec)] -#[derive(Debug, Clone, PartialEq, Eq, Default)] +#[derive(Clone, PartialEq, Eq, Default)] pub struct Attributes<'s>(Option)>>>); impl<'s> Attributes<'s> { @@ -113,24 +126,15 @@ impl<'s> Attributes<'s> { Self(self.0.take()) } - pub(crate) fn parse>(&mut self, input: S) -> bool { - #[inline] - fn borrow(cow: CowStr) -> &str { - match cow { - Cow::Owned(_) => panic!(), - Cow::Borrowed(s) => s, - } - } - - for elem in Parser::new(input.chars()) { - match elem { - Element::Class(c) => self.insert("class", input.src(c).into()), - Element::Identifier(i) => self.insert("id", input.src(i).into()), - Element::Attribute(a, v) => self.insert(borrow(input.src(a)), input.src(v).into()), - Element::Invalid => return false, + pub(crate) fn parse(&mut self, input: &'s str) -> bool { + let mut parser = Parser::new(self, input); + loop { + match parser.step() { + StepResult::Done => return true, + StepResult::Invalid | StepResult::More => return false, + StepResult::Valid => {} } } - true } /// Combine all attributes from both objects, prioritizing self on conflicts. @@ -160,7 +164,8 @@ impl<'s> Attributes<'s> { if let Some(i) = attrs.iter().position(|(k, _)| *k == key) { let prev = &mut attrs[i].1; if key == "class" { - *prev = format!("{} {}", prev, val).into(); + prev.raw.push(' '); + prev.raw.push_str(&val.raw); } else { *prev = val; } @@ -169,6 +174,31 @@ impl<'s> Attributes<'s> { } } + fn insert_str(&mut self, key: &'s str, val: &'s str) -> usize { + if self.0.is_none() { + self.0 = Some(Vec::new().into()); + }; + + let attrs = self.0.as_mut().unwrap(); + if let Some(i) = attrs.iter().position(|(k, _)| *k == key) { + let prev = &mut attrs[i].1; + if key == "class" { + prev.extend(val); + } else { + *prev = val.into(); + } + i + } else { + let i = attrs.len(); + attrs.push((key, val.into())); + i + } + } + + fn append_str(&mut self, i: usize, val: &'s str) { + self.0.as_mut().unwrap()[i].1.extend(val) + } + /// Returns true if the collection contains no attributes. #[must_use] pub fn is_empty(&self) -> bool { @@ -202,199 +232,176 @@ impl<'s> FromIterator<(&'s str, &'s str)> for Attributes<'s> { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum State { - Start, - Whitespace, - Comment, - ClassFirst, - Class, - IdentifierFirst, - Identifier, - Attribute, - ValueFirst, - Value, - ValueQuoted, +impl<'s> std::fmt::Debug for Attributes<'s> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{{")?; + let mut first = true; + for (k, v) in self.iter() { + if !first { + write!(f, ", ")?; + } + first = false; + write!(f, "{}=\"{}\"", k, v.raw)?; + } + write!(f, "}}") + } +} + +pub enum StepResult { + /// Attributes are valid and completed. Done, + /// Attributes are invalid. Invalid, + /// Attributes are valid so far. + Valid, + /// Attributes are valid so far, more input is needed. + More, } -struct Parser { - chars: I, +pub struct Parser<'a, 's> { + input: &'s str, + chars: std::str::Chars<'s>, + attrs: &'a mut Attributes<'s>, + i_last: usize, + pos: usize, pos_prev: usize, state: State, } -impl> Parser { - fn new(chars: I) -> Self { - Parser { - chars, +impl<'a, 's> Parser<'a, 's> { + pub fn new(attrs: &'a mut Attributes<'s>, input: &'s str) -> Self { + Self { + input, + chars: input.chars(), + attrs, + i_last: usize::MAX, pos: 0, pos_prev: 0, - state: Start, + state: State::Start, } } - fn step_char(&mut self) -> Option { - self.chars.next().map(|c| { - self.pos_prev = self.pos; - self.pos += c.len_utf8(); - match self.state { - Start => match c { - '{' => Whitespace, - _ => Invalid, - }, - Whitespace => match c { - '}' => Done, - '.' => ClassFirst, - '#' => IdentifierFirst, - '%' => Comment, - c if c.is_ascii_alphanumeric() || matches!(c, '_' | ':' | '-') => Attribute, - c if c.is_whitespace() => Whitespace, - _ => Invalid, - }, - Comment => { - if c == '%' { - Whitespace - } else { - Comment - } - } - s @ (ClassFirst | IdentifierFirst) => { - if is_name(c) { - match s { - ClassFirst => Class, - IdentifierFirst => Identifier, - _ => panic!(), - } - } else { - Invalid - } - } - s @ (Class | Identifier | Value) => { - if is_name(c) { - s - } else if c.is_whitespace() { - Whitespace - } else if c == '}' { - Done - } else { - Invalid - } - } - Attribute => { - if is_name(c) { - Attribute - } else if c == '=' { - ValueFirst - } else { - Invalid - } - } - ValueFirst => { - if is_name(c) { - Value - } else if c == '"' { - ValueQuoted - } else { - Invalid - } - } - ValueQuoted => match c { - '\\' => { - if let Some(c) = self.chars.next() { - self.pos_prev = self.pos; - self.pos += c.len_utf8(); - } - ValueQuoted - } - '"' => Whitespace, - _ => ValueQuoted, - }, - Invalid | Done => panic!("{:?}", self.state), - } - }) + pub fn restart(&mut self) { + self.state = State::Start; } - fn step(&mut self) -> (State, Span) { - let start = self.pos_prev; + pub fn set_input(&mut self, input: &'s str) { + debug_assert_eq!(self.chars.next(), None); + self.input = input; + self.chars = input.chars(); + self.pos = 0; + self.pos_prev = 0; + } - if self.state == Done { - return (Done, Span::empty_at(start)); - } + pub fn step(&mut self) -> StepResult { + self.chars.next().map_or(StepResult::More, |c| { + use State::*; - if self.state == Invalid { - return (Invalid, Span::empty_at(start)); - } + let state_next = self.state.step(c); + let st = std::mem::replace(&mut self.state, state_next); - while let Some(state_next) = self.step_char() { - if self.state != state_next { - return ( - std::mem::replace(&mut self.state, state_next), - Span::new(start, self.pos_prev), - ); - } - } + if st != self.state && !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape)) + { + let content = &self.input[self.pos_prev..self.pos]; + self.pos_prev = self.pos; + match st { + Class => { + self.attrs.insert_str("class", content); + } + Identifier => { + self.attrs.insert_str("id", content); + } + Key => self.i_last = self.attrs.insert_str(content, ""), + Value | ValueQuoted | ValueContinued => self.attrs.append_str( + self.i_last, + &content[usize::from(matches!(st, ValueQuoted))..], + ), + _ => {} + } + }; - ( - if self.state == Done { Done } else { Invalid }, - Span::new(start, self.pos_prev), - ) + self.pos += c.len_utf8(); + + match self.state { + State::Done => StepResult::Done, + State::Invalid => StepResult::Invalid, + _ => StepResult::Valid, + } + }) } -} -pub fn is_name(c: char) -> bool { - c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-') + pub fn len(&self) -> usize { + self.input.len() - self.chars.as_str().len() + } } -enum Element { - Class(Span), - Identifier(Span), - Attribute(Span, Span), +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum State { + Start, + Whitespace, + Comment, + ClassFirst, + Class, + IdentifierFirst, + Identifier, + Key, + ValueFirst, + Value, + ValueQuoted, + ValueEscape, + ValueNewline, + ValueContinued, + Done, Invalid, } -impl> Iterator for Parser { - type Item = Element; - - fn next(&mut self) -> Option { - loop { - let (st, span0) = self.step(); - return match st { - ClassFirst | IdentifierFirst => { - let (st, span1) = self.step(); - Some(match st { - Class => Element::Class(span1), - Identifier => Element::Identifier(span1), - _ => return Some(Element::Invalid), - }) - } - Attribute => { - let (st, _span1) = self.step(); - match st { - ValueFirst => { - let (st, span2) = self.step(); - match st { - Value => Some(Element::Attribute(span0, span2)), - ValueQuoted => Some(Element::Attribute(span0, span2.skip(1))), - Invalid => Some(Element::Invalid), - _ => panic!("{:?}", st), - } - } - Invalid => Some(Element::Invalid), - _ => panic!("{:?}", st), - } - } - Comment | Start | Whitespace => continue, - Done => None, - Invalid => Some(Element::Invalid), - _ => panic!("{:?}", st), - }; +impl State { + fn step(self, c: char) -> State { + use State::*; + + match self { + Start if c == '{' => Whitespace, + Start => Invalid, + Whitespace => match c { + '}' => Done, + '.' => ClassFirst, + '#' => IdentifierFirst, + '%' => Comment, + c if is_name(c) => Key, + c if c.is_whitespace() => Whitespace, + _ => Invalid, + }, + Comment if c == '%' => Whitespace, + Comment => Comment, + ClassFirst if is_name(c) => Class, + ClassFirst => Invalid, + IdentifierFirst if is_name(c) => Identifier, + IdentifierFirst => Invalid, + s @ (Class | Identifier | Value) if is_name(c) => s, + Class | Identifier | Value if c.is_whitespace() => Whitespace, + Class | Identifier | Value if c == '}' => Done, + Class | Identifier | Value => Invalid, + Key if is_name(c) => Key, + Key if c == '=' => ValueFirst, + Key => Invalid, + ValueFirst if is_name(c) => Value, + ValueFirst if c == '"' => ValueQuoted, + ValueFirst => Invalid, + ValueQuoted | ValueNewline | ValueContinued if c == '"' => Whitespace, + ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == '\n' => ValueNewline, + ValueQuoted if c == '\\' => ValueEscape, + ValueQuoted | ValueEscape => ValueQuoted, + ValueNewline | ValueContinued => ValueContinued, + Invalid | Done => panic!("{:?}", self), } } } +pub fn is_name(c: char) -> bool { + c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-') +} + #[cfg(test)] mod test { macro_rules! test_attr { @@ -456,6 +463,11 @@ mod test { ); } + #[test] + fn value_newline() { + test_attr!("{attr0=\"abc\ndef\"}", ("attr0", "abc def")); + } + #[test] fn comment() { test_attr!("{%%}"); diff --git a/src/block.rs b/src/block.rs index d108c62a..80247822 100644 --- a/src/block.rs +++ b/src/block.rs @@ -215,7 +215,7 @@ impl<'s> TreeParser<'s> { // close list if a non list item or a list item of new type appeared if let Some(OpenList { ty, depth, .. }) = self.open_lists.last() { - assert!(usize::from(*depth) <= self.tree.depth()); + debug_assert!(usize::from(*depth) <= self.tree.depth()); if self.tree.depth() == (*depth).into() && !matches!(kind, Kind::ListItem { ty: ty_new, .. } if *ty == ty_new) { @@ -405,7 +405,7 @@ impl<'s> TreeParser<'s> { } if let Some(OpenList { depth, .. }) = self.open_lists.last() { - assert!(usize::from(*depth) <= self.tree.depth()); + debug_assert!(usize::from(*depth) <= self.tree.depth()); if self.tree.depth() == (*depth).into() { self.prev_blankline = false; self.prev_loose = false; @@ -447,7 +447,7 @@ impl<'s> TreeParser<'s> { .tree .enter(Node::Container(TableRow { head: false }), row.with_len(1)); let rem = row.skip(1); // | - let lex = lex::Lexer::new(rem.of(self.src).chars()); + let lex = lex::Lexer::new(rem.of(self.src)); let mut pos = rem.start(); let mut cell_start = pos; let mut separator_row = true; diff --git a/src/html.rs b/src/html.rs index 1a1d9e08..2dfd22ac 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,45 +1,13 @@ //! An HTML renderer that takes an iterator of [`Event`]s and emits HTML. -//! -//! The HTML can be written to either a [`std::fmt::Write`] or a [`std::io::Write`] object. -//! -//! # Examples -//! -//! Push to a [`String`] (implements [`std::fmt::Write`]): -//! -//! ``` -//! # use jotdown::Render; -//! # let events = std::iter::empty(); -//! let mut html = String::new(); -//! jotdown::html::Renderer.push(events, &mut html); -//! ``` -//! -//! Write to standard output with buffering ([`std::io::Stdout`] implements [`std::io::Write`]): -//! -//! ``` -//! # use jotdown::Render; -//! # let events = std::iter::empty(); -//! let mut out = std::io::BufWriter::new(std::io::stdout()); -//! jotdown::html::Renderer.write(events, &mut out).unwrap(); -//! ``` use crate::Alignment; use crate::Container; use crate::Event; +use crate::LinkType; use crate::ListKind; use crate::OrderedListNumbering::*; use crate::Render; - -pub struct Renderer; - -impl Render for Renderer { - fn push<'s, I: Iterator>, W: std::fmt::Write>( - &self, - events: I, - out: W, - ) -> std::fmt::Result { - Writer::new(events, out).write() - } -} +use crate::SpanLinkType; enum Raw { None, @@ -47,427 +15,433 @@ enum Raw { Other, } -struct FilteredEvents { - events: I, -} - -impl<'s, I: Iterator>> Iterator for FilteredEvents { - type Item = Event<'s>; - - fn next(&mut self) -> Option { - let mut ev = self.events.next(); - while matches!(ev, Some(Event::Blankline | Event::Escape)) { - ev = self.events.next(); - } - ev - } -} - -struct Writer<'s, I: Iterator>, W> { - events: std::iter::Peekable>, - out: W, +pub struct Renderer { raw: Raw, img_alt_text: usize, list_tightness: Vec, encountered_footnote: bool, footnote_number: Option, - footnote_backlink_written: bool, first_line: bool, + close_para: bool, } -impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { - fn new(events: I, out: W) -> Self { +impl Default for Renderer { + fn default() -> Self { Self { - events: FilteredEvents { events }.peekable(), - out, raw: Raw::None, img_alt_text: 0, list_tightness: Vec::new(), encountered_footnote: false, footnote_number: None, - footnote_backlink_written: false, first_line: true, + close_para: false, } } +} - fn write(&mut self) -> std::fmt::Result { - while let Some(e) = self.events.next() { - match e { - Event::Start(c, attrs) => { - if c.is_block() && !self.first_line { - self.out.write_char('\n')?; - } - if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) { - continue; - } - match &c { - Container::Blockquote => self.out.write_str(" { - self.list_tightness.push(*tight); - match kind { - ListKind::Unordered | ListKind::Task => { - self.out.write_str("(&mut self, e: &Event<'s>, mut out: W) -> std::fmt::Result + where + W: std::fmt::Write, + { + if matches!(&e, Event::Blankline | Event::Escape) { + return Ok(()); + } + + let close_para = self.close_para; + if close_para { + self.close_para = false; + if !matches!(&e, Event::End(Container::Footnote { .. })) { + // no need to add href before para close + out.write_str("

")?; + } + } + + match e { + Event::Start(c, attrs) => { + if c.is_block() && !self.first_line { + out.write_char('\n')?; + } + if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) { + return Ok(()); + } + match &c { + Container::Blockquote => out.write_str(" { + self.list_tightness.push(*tight); + match kind { + ListKind::Unordered | ListKind::Task => out.write_str(" { + out.write_str(" 1 { + write!(out, r#" start="{}""#, start)?; } - ListKind::Ordered { - numbering, start, .. - } => { - self.out.write_str(" 1 { - write!(self.out, r#" start="{}""#, start)?; - } - if let Some(ty) = match numbering { - Decimal => None, - AlphaLower => Some('a'), - AlphaUpper => Some('A'), - RomanLower => Some('i'), - RomanUpper => Some('I'), - } { - write!(self.out, r#" type="{}""#, ty)?; - } + if let Some(ty) = match numbering { + Decimal => None, + AlphaLower => Some('a'), + AlphaUpper => Some('A'), + RomanLower => Some('i'), + RomanUpper => Some('I'), + } { + write!(out, r#" type="{}""#, ty)?; } } } - Container::ListItem | Container::TaskListItem { .. } => { - self.out.write_str(" self.out.write_str(" self.out.write_str(" { - assert!(self.footnote_number.is_none()); - self.footnote_number = Some((*number).try_into().unwrap()); - if !self.encountered_footnote { - self.encountered_footnote = true; - self.out - .write_str("
\n
\n
    \n")?; - } - write!(self.out, "
  1. ", number)?; - self.footnote_backlink_written = false; - continue; - } - Container::Table => self.out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str(" { - if matches!(self.list_tightness.last(), Some(true)) { - continue; - } - self.out.write_str(" { + out.write_str(" out.write_str(" out.write_str(" { + debug_assert!(self.footnote_number.is_none()); + self.footnote_number = Some((*number).try_into().unwrap()); + if !self.encountered_footnote { + self.encountered_footnote = true; + out.write_str("
    \n
    \n
      \n")?; } - Container::Heading { level, .. } => write!(self.out, " self.out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str(" { - if dst.is_empty() { - self.out.write_str("", number)?; + return Ok(()); + } + Container::Table => out.write_str(" out.write_str(" out.write_str(" out.write_str(" { + if matches!(self.list_tightness.last(), Some(true)) { + return Ok(()); } - Container::Image(..) => { - self.img_alt_text += 1; - if self.img_alt_text == 1 { - self.out.write_str(" write!(out, " out.write_str(" out.write_str(" out.write_str(" out.write_str(" out.write_str(" out.write_str(" { + if matches!(ty, LinkType::Span(SpanLinkType::Unresolved)) { + out.write_str(" self.out.write_str(" { - self.raw = if format == &"html" { - Raw::Html - } else { - Raw::Other - }; - continue; + } + Container::Image(..) => { + self.img_alt_text += 1; + if self.img_alt_text == 1 { + out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str(" out.write_str(" { + self.raw = if format == &"html" { + Raw::Html + } else { + Raw::Other + }; + return Ok(()); } + Container::Subscript => out.write_str(" out.write_str(" out.write_str(" out.write_str(" out.write_str(" out.write_str(" out.write_str(" Some("task-list"), - Container::TaskListItem { checked: false } => Some("unchecked"), - Container::TaskListItem { checked: true } => Some("checked"), - Container::Math { display: false } => Some("math inline"), - Container::Math { display: true } => Some("math display"), - _ => None, - } { - first_written = true; - self.out.write_str(cls)?; - } - for cls in attrs - .iter() - .filter(|(a, _)| a == &"class") - .map(|(_, cls)| cls) - { - if first_written { - self.out.write_char(' ')?; } - first_written = true; - cls.parts().try_for_each(|part| self.write_attr(part))?; + | Container::TaskListItem { .. } + ) + { + out.write_str(r#" class=""#)?; + let mut first_written = false; + if let Some(cls) = match c { + Container::List { + kind: ListKind::Task, + .. + } => Some("task-list"), + Container::TaskListItem { checked: false } => Some("unchecked"), + Container::TaskListItem { checked: true } => Some("checked"), + Container::Math { display: false } => Some("math inline"), + Container::Math { display: true } => Some("math display"), + _ => None, + } { + first_written = true; + out.write_str(cls)?; + } + for cls in attrs + .iter() + .filter(|(a, _)| a == &"class") + .map(|(_, cls)| cls) + { + if first_written { + out.write_char(' ')?; } - // div class goes after classes from attrs - if let Container::Div { class: Some(cls) } = c { - if first_written { - self.out.write_char(' ')?; - } - self.out.write_str(cls)?; + first_written = true; + cls.parts() + .try_for_each(|part| write_attr(part, &mut out))?; + } + // div class goes after classes from attrs + if let Container::Div { class: Some(cls) } = c { + if first_written { + out.write_char(' ')?; } - self.out.write_char('"')?; + out.write_str(cls)?; } + out.write_char('"')?; + } - match c { - Container::TableCell { alignment, .. } - if !matches!(alignment, Alignment::Unspecified) => - { - let a = match alignment { - Alignment::Unspecified => unreachable!(), - Alignment::Left => "left", - Alignment::Center => "center", - Alignment::Right => "right", - }; - write!(self.out, r#" style="text-align: {};">"#, a)?; - } - Container::CodeBlock { lang } => { - if let Some(l) = lang { - self.out.write_str(r#">"#)?; - } else { - self.out.write_str(">")?; - } - } - Container::Image(..) => { - if self.img_alt_text == 1 { - self.out.write_str(r#" alt=""#)?; - } + match c { + Container::TableCell { alignment, .. } + if !matches!(alignment, Alignment::Unspecified) => + { + let a = match alignment { + Alignment::Unspecified => unreachable!(), + Alignment::Left => "left", + Alignment::Center => "center", + Alignment::Right => "right", + }; + write!(out, r#" style="text-align: {};">"#, a)?; + } + Container::CodeBlock { lang } => { + if let Some(l) = lang { + out.write_str(r#">"#)?; + } else { + out.write_str(">")?; } - Container::Math { display } => { - self.out - .write_str(if display { r#">\["# } else { r#">\("# })?; + } + Container::Image(..) => { + if self.img_alt_text == 1 { + out.write_str(r#" alt=""#)?; } - _ => self.out.write_char('>')?, } + Container::Math { display } => { + out.write_str(if *display { r#">\["# } else { r#">\("# })?; + } + _ => out.write_char('>')?, + } + } + Event::End(c) => { + if c.is_block_container() && !matches!(c, Container::Footnote { .. }) { + out.write_char('\n')?; + } + if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) { + return Ok(()); } - Event::End(c) => { - if c.is_block_container() && !matches!(c, Container::Footnote { .. }) { - self.out.write_char('\n')?; + match c { + Container::Blockquote => out.write_str("")?, + Container::List { + kind: ListKind::Unordered | ListKind::Task, + .. + } => { + self.list_tightness.pop(); + out.write_str("")?; } - if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) { - continue; + Container::List { + kind: ListKind::Ordered { .. }, + .. + } => out.write_str("
    ")?, + Container::ListItem | Container::TaskListItem { .. } => { + out.write_str("
  2. ")?; } - match c { - Container::Blockquote => self.out.write_str("")?, - Container::List { - kind: ListKind::Unordered | ListKind::Task, - .. - } => { - self.list_tightness.pop(); - self.out.write_str("")?; + Container::DescriptionList => out.write_str("")?, + Container::DescriptionDetails => out.write_str("")?, + Container::Footnote { number, .. } => { + if !close_para { + // create a new paragraph + out.write_str("\n

    ")?; } - Container::List { - kind: ListKind::Ordered { .. }, - .. - } => self.out.write_str("

")?, - Container::ListItem | Container::TaskListItem { .. } => { - self.out.write_str("")?; - } - Container::DescriptionList => self.out.write_str("")?, - Container::DescriptionDetails => self.out.write_str("")?, - Container::Footnote { number, .. } => { - if !self.footnote_backlink_written { - write!( - self.out, - "\n

↩︎︎

", - number, - )?; - } - self.out.write_str("\n")?; - self.footnote_number = None; + write!( + out, + r##"↩︎︎

"##, + number, + )?; + out.write_str("\n")?; + self.footnote_number = None; + } + Container::Table => out.write_str("")?, + Container::TableRow { .. } => out.write_str("")?, + Container::Section { .. } => out.write_str("
")?, + Container::Div { .. } => out.write_str("")?, + Container::Paragraph => { + if matches!(self.list_tightness.last(), Some(true)) { + return Ok(()); } - Container::Table => self.out.write_str("")?, - Container::TableRow { .. } => self.out.write_str("")?, - Container::Section { .. } => self.out.write_str("")?, - Container::Div { .. } => self.out.write_str("")?, - Container::Paragraph => { - if matches!(self.list_tightness.last(), Some(true)) { - continue; - } - if let Some(num) = self.footnote_number { - if matches!( - self.events.peek(), - Some(Event::End(Container::Footnote { .. })) - ) { - write!( - self.out, - r##"↩︎︎"##, - num - )?; - self.footnote_backlink_written = true; - } - } - self.out.write_str("

")?; + if self.footnote_number.is_none() { + out.write_str("

")?; + } else { + self.close_para = true; } - Container::Heading { level, .. } => write!(self.out, "", level)?, - Container::TableCell { head: false, .. } => self.out.write_str("")?, - Container::TableCell { head: true, .. } => self.out.write_str("")?, - Container::Caption => self.out.write_str("")?, - Container::DescriptionTerm => self.out.write_str("")?, - Container::CodeBlock { .. } => self.out.write_str("
")?, - Container::Span => self.out.write_str("")?, - Container::Link(..) => self.out.write_str("")?, - Container::Image(src, ..) => { - if self.img_alt_text == 1 { - if !src.is_empty() { - self.out.write_str(r#"" src=""#)?; - self.write_attr(&src)?; - } - self.out.write_str(r#"">"#)?; + } + Container::Heading { level, .. } => write!(out, "", level)?, + Container::TableCell { head: false, .. } => out.write_str("")?, + Container::TableCell { head: true, .. } => out.write_str("")?, + Container::Caption => out.write_str("")?, + Container::DescriptionTerm => out.write_str("")?, + Container::CodeBlock { .. } => out.write_str("
")?, + Container::Span => out.write_str("")?, + Container::Link(..) => out.write_str("")?, + Container::Image(src, ..) => { + if self.img_alt_text == 1 { + if !src.is_empty() { + out.write_str(r#"" src=""#)?; + write_attr(src, &mut out)?; } - self.img_alt_text -= 1; - } - Container::Verbatim => self.out.write_str("
")?, - Container::Math { display } => { - self.out.write_str(if display { - r#"\]"# - } else { - r#"\)"# - })?; - } - Container::RawBlock { .. } | Container::RawInline { .. } => { - self.raw = Raw::None; + out.write_str(r#"">"#)?; } - Container::Subscript => self.out.write_str("")?, - Container::Superscript => self.out.write_str("")?, - Container::Insert => self.out.write_str("")?, - Container::Delete => self.out.write_str("")?, - Container::Strong => self.out.write_str("")?, - Container::Emphasis => self.out.write_str("")?, - Container::Mark => self.out.write_str("")?, + self.img_alt_text -= 1; } - } - Event::Str(s) => match self.raw { - Raw::None if self.img_alt_text > 0 => self.write_attr(&s)?, - Raw::None => self.write_text(&s)?, - Raw::Html => self.out.write_str(&s)?, - Raw::Other => {} - }, - Event::FootnoteReference(_tag, number) => { - if self.img_alt_text == 0 { - write!( - self.out, - r##"{}"##, - number, number, number - )?; + Container::Verbatim => out.write_str("
")?, + Container::Math { display } => { + out.write_str(if *display { + r#"\]"# + } else { + r#"\)"# + })?; } - } - Event::Symbol(sym) => write!(self.out, ":{}:", sym)?, - Event::LeftSingleQuote => self.out.write_str("‘")?, - Event::RightSingleQuote => self.out.write_str("’")?, - Event::LeftDoubleQuote => self.out.write_str("“")?, - Event::RightDoubleQuote => self.out.write_str("”")?, - Event::Ellipsis => self.out.write_str("…")?, - Event::EnDash => self.out.write_str("–")?, - Event::EmDash => self.out.write_str("—")?, - Event::NonBreakingSpace => self.out.write_str(" ")?, - Event::Hardbreak => self.out.write_str("
\n")?, - Event::Softbreak => self.out.write_char('\n')?, - Event::Escape | Event::Blankline => unreachable!("filtered out"), - Event::ThematicBreak(attrs) => { - self.out.write_str("\n { + self.raw = Raw::None; } - self.out.write_str(">")?; + Container::Subscript => out.write_str("")?, + Container::Superscript => out.write_str("")?, + Container::Insert => out.write_str("")?, + Container::Delete => out.write_str("")?, + Container::Strong => out.write_str("")?, + Container::Emphasis => out.write_str("")?, + Container::Mark => out.write_str("")?, } } - self.first_line = false; - } - if self.encountered_footnote { - self.out.write_str("\n\n")?; + Event::Str(s) => match self.raw { + Raw::None if self.img_alt_text > 0 => write_attr(s, &mut out)?, + Raw::None => write_text(s, &mut out)?, + Raw::Html => out.write_str(s)?, + Raw::Other => {} + }, + Event::FootnoteReference(_tag, number) => { + if self.img_alt_text == 0 { + write!( + out, + r##"{}"##, + number, number, number + )?; + } + } + Event::Symbol(sym) => write!(out, ":{}:", sym)?, + Event::LeftSingleQuote => out.write_str("‘")?, + Event::RightSingleQuote => out.write_str("’")?, + Event::LeftDoubleQuote => out.write_str("“")?, + Event::RightDoubleQuote => out.write_str("”")?, + Event::Ellipsis => out.write_str("…")?, + Event::EnDash => out.write_str("–")?, + Event::EmDash => out.write_str("—")?, + Event::NonBreakingSpace => out.write_str(" ")?, + Event::Hardbreak => out.write_str("
\n")?, + Event::Softbreak => out.write_char('\n')?, + Event::Escape | Event::Blankline => unreachable!("filtered out"), + Event::ThematicBreak(attrs) => { + out.write_str("\n")?; + } } - self.out.write_char('\n')?; + self.first_line = false; + Ok(()) } - fn write_escape(&mut self, mut s: &str, escape_quotes: bool) -> std::fmt::Result { - let mut ent = ""; - while let Some(i) = s.find(|c| { - match c { - '<' => Some("<"), - '>' => Some(">"), - '&' => Some("&"), - '"' if escape_quotes => Some("""), - _ => None, - } - .map_or(false, |s| { - ent = s; - true - }) - }) { - self.out.write_str(&s[..i])?; - self.out.write_str(ent)?; - s = &s[i + 1..]; + fn render_epilogue(&mut self, mut out: W) -> std::fmt::Result + where + W: std::fmt::Write, + { + if self.encountered_footnote { + out.write_str("\n\n")?; } - self.out.write_str(s) - } + out.write_char('\n')?; - fn write_text(&mut self, s: &str) -> std::fmt::Result { - self.write_escape(s, false) + Ok(()) } +} + +fn write_text(s: &str, out: W) -> std::fmt::Result +where + W: std::fmt::Write, +{ + write_escape(s, false, out) +} - fn write_attr(&mut self, s: &str) -> std::fmt::Result { - self.write_escape(s, true) +fn write_attr(s: &str, out: W) -> std::fmt::Result +where + W: std::fmt::Write, +{ + write_escape(s, true, out) +} + +fn write_escape(mut s: &str, escape_quotes: bool, mut out: W) -> std::fmt::Result +where + W: std::fmt::Write, +{ + let mut ent = ""; + while let Some(i) = s.find(|c| { + match c { + '<' => Some("<"), + '>' => Some(">"), + '&' => Some("&"), + '"' if escape_quotes => Some("""), + _ => None, + } + .map_or(false, |s| { + ent = s; + true + }) + }) { + out.write_str(&s[..i])?; + out.write_str(ent)?; + s = &s[i + 1..]; } + out.write_str(s) } diff --git a/src/inline.rs b/src/inline.rs index 6e8f5851..3741ff37 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -1,12 +1,15 @@ use crate::attr; use crate::lex; +use crate::CowStr; use crate::Span; use lex::Delimiter; +use lex::Sequence; use lex::Symbol; use Atom::*; use Container::*; +use ControlFlow::*; #[derive(Debug, Clone, PartialEq, Eq)] pub enum Atom { @@ -37,18 +40,16 @@ pub enum Container { RawFormat, InlineMath, DisplayMath, - /// Span is the reference link tag. - ReferenceLink, - /// Span is the reference link tag. - ReferenceImage, - /// Span is the URL. - InlineLink, - /// Span is the URL. - InlineImage, + ReferenceLink(CowStrIndex), + ReferenceImage(CowStrIndex), + InlineLink(CowStrIndex), + InlineImage(CowStrIndex), /// Open delimiter span is URL, closing is '>'. Autolink, } +type CowStrIndex = u32; + #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum QuoteType { Single, @@ -61,39 +62,79 @@ pub enum EventKind { Exit(Container), Atom(Atom), Str, - Whitespace, - Attributes, + Attributes { + container: bool, + attrs: AttributesIndex, + }, Placeholder, } +type AttributesIndex = u32; + #[derive(Clone, Debug, PartialEq, Eq)] pub struct Event { pub kind: EventKind, pub span: Span, } -pub struct Parser { - /// Lexer, hosting upcoming source. - lexer: lex::Lexer, +#[derive(Clone)] +pub struct Input<'s> { + src: &'s str, + /// Lexer. + lexer: lex::Lexer<'s>, + /// The block is complete, the final line has been provided. + complete: bool, + /// Span of current line. + span_line: Span, + /// Upcoming lines within the current block. + ahead: std::collections::VecDeque, /// Span of current event. span: Span, - /// Stack with kind and index of _potential_ openers for containers. - openers: Vec<(Delim, usize)>, - /// Buffer queue for next events. Events are buffered until no modifications due to future - /// characters are needed. - events: std::collections::VecDeque, } -impl + Clone> Parser { - pub fn new(chars: I) -> Self { +impl<'s> Input<'s> { + fn new(src: &'s str) -> Self { Self { - lexer: lex::Lexer::new(chars), - span: Span::new(0, 0), - openers: Vec::new(), - events: std::collections::VecDeque::new(), + src, + lexer: lex::Lexer::new(""), + complete: false, + span_line: Span::new(0, 0), + ahead: std::collections::VecDeque::new(), + span: Span::empty_at(0), } } + fn feed_line(&mut self, line: Span, last: bool) { + self.complete = last; + if self.lexer.ahead().is_empty() { + if let Some(next) = self.ahead.pop_front() { + self.set_current_line(next); + self.ahead.push_back(line); + } else { + self.set_current_line(line); + } + } else { + self.ahead.push_back(line); + } + } + + fn set_current_line(&mut self, line: Span) { + self.lexer = lex::Lexer::new(line.of(self.src)); + self.span = line.empty_before(); + self.span_line = line; + } + + fn reset(&mut self) { + self.lexer = lex::Lexer::new(""); + self.complete = false; + self.ahead.clear(); + self.span = Span::empty_at(0); + } + + fn last(&self) -> bool { + self.complete && self.ahead.is_empty() + } + fn eat(&mut self) -> Option { let tok = self.lexer.next(); if let Some(t) = &tok { @@ -110,227 +151,344 @@ impl + Clone> Parser { self.span = self.span.empty_after(); } - fn parse_event(&mut self) -> Option { - self.reset_span(); - self.eat().map(|first| { - self.parse_verbatim(&first) - .or_else(|| self.parse_attributes(&first)) + fn ahead_raw_format(&mut self) -> Option { + if matches!( + self.lexer.peek().map(|t| &t.kind), + Some(lex::Kind::Open(Delimiter::BraceEqual)) + ) { + let mut ahead = self.lexer.ahead().chars(); + let mut end = false; + let len = (&mut ahead) + .skip(2) // {= + .take_while(|c| { + if *c == '{' { + return false; + } + if *c == '}' { + end = true; + }; + !end && !c.is_whitespace() + }) + .map(char::len_utf8) + .sum(); + (len > 0 && end).then(|| { + let tok = self.eat(); + debug_assert_eq!( + tok, + Some(lex::Token { + kind: lex::Kind::Open(Delimiter::BraceEqual), + len: 2, + }) + ); + self.lexer = lex::Lexer::new(ahead.as_str()); + self.span.after(len) + }) + } else { + None + } + } +} + +#[derive(Clone)] +pub struct VerbatimState { + event_opener: usize, + len_opener: u8, + non_whitespace_encountered: bool, + non_whitespace_last: Option<(lex::Kind, usize)>, +} + +#[derive(Clone)] +pub enum AttributesElementType { + Container { e_placeholder: usize }, + Word, +} + +#[derive(Clone)] +pub struct Parser<'s> { + input: Input<'s>, + /// Stack with kind and index of _potential_ openers for containers. + openers: Vec<(Opener, usize)>, + /// Buffer queue for next events. Events are buffered until no modifications due to future + /// characters are needed. + events: std::collections::VecDeque, + /// State if inside a verbatim container. + verbatim: Option, + /// State if currently parsing potential attributes. + attributes: Option, + /// Storage of cow strs, used to reduce size of [`Container`]. + pub(crate) cow_strs: Vec>, + /// Storage of attributes, used to reduce size of [`Container`]. + pub(crate) attributes_store: Vec>, +} + +pub enum ControlFlow { + /// At least one event has been emitted, continue parsing the line. + Continue, + /// Next line is needed to emit an event. + Next, + /// More lines are needed to emit an event. Unlike for the `Next` variant, the internal ahead + /// buffer has already been examined, and more lines need to retrieved from the block parser. + More, + /// Parsing of the line is completed. + Done, +} + +impl<'s> Parser<'s> { + pub fn new(src: &'s str) -> Self { + Self { + input: Input::new(src), + openers: Vec::new(), + events: std::collections::VecDeque::new(), + verbatim: None, + attributes: None, + cow_strs: Vec::new(), + attributes_store: Vec::new(), + } + } + + pub fn feed_line(&mut self, line: Span, last: bool) { + self.input.feed_line(line, last); + } + + pub fn reset(&mut self) { + debug_assert!(self.events.is_empty()); + self.input.reset(); + self.openers.clear(); + debug_assert!(self.attributes.is_none()); + debug_assert!(self.verbatim.is_none()); + } + + fn push_sp(&mut self, kind: EventKind, span: Span) -> Option { + self.events.push_back(Event { kind, span }); + Some(Continue) + } + + fn push(&mut self, kind: EventKind) -> Option { + self.push_sp(kind, self.input.span) + } + + fn parse_event(&mut self) -> ControlFlow { + self.input.reset_span(); + + if let Some(first) = self.input.eat() { + self.parse_attributes(&first) + .or_else(|| self.parse_verbatim(&first)) .or_else(|| self.parse_autolink(&first)) .or_else(|| self.parse_symbol(&first)) .or_else(|| self.parse_footnote_reference(&first)) .or_else(|| self.parse_container(&first)) .or_else(|| self.parse_atom(&first)) - .unwrap_or(Event { - kind: if matches!(first.kind, lex::Kind::Whitespace) { - EventKind::Whitespace - } else { - EventKind::Str - }, - span: self.span, - }) - }) + .unwrap_or_else(|| self.push(EventKind::Str).unwrap()) + } else if self.input.last() { + Done + } else { + Next + } } - fn parse_verbatim(&mut self, first: &lex::Token) -> Option { - match first.kind { - lex::Kind::Seq(lex::Sequence::Dollar) => { - let math_opt = (first.len <= 2) - .then(|| { - if let Some(lex::Token { - kind: lex::Kind::Seq(lex::Sequence::Backtick), - len, - }) = self.peek() - { - Some(( - if first.len == 2 { - DisplayMath - } else { - InlineMath - }, - *len, - )) - } else { - None - } + fn parse_verbatim(&mut self, first: &lex::Token) -> Option { + if let Some(VerbatimState { + event_opener, + len_opener, + non_whitespace_encountered, + non_whitespace_last, + }) = &mut self.verbatim + { + let event_opener = *event_opener; + let len_opener = *len_opener; + if usize::from(len_opener) == first.len + && matches!(first.kind, lex::Kind::Seq(Sequence::Backtick)) + { + let raw_format = self.input.ahead_raw_format(); + let mut span_closer = self.input.span; + if let Some(span_format) = raw_format { + self.events[event_opener].kind = EventKind::Enter(RawFormat); + self.events[event_opener].span = span_format; + self.input.span = span_format.translate(1); + span_closer = span_format; + }; + let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind { + debug_assert!(matches!( + ty, + Verbatim | RawFormat | InlineMath | DisplayMath + )); + ty + } else { + panic!() + }; + if let Some((lex::Kind::Seq(Sequence::Backtick), event_skip)) = non_whitespace_last + { + self.events.drain(*event_skip..); + } + self.push_sp(EventKind::Exit(ty_opener), span_closer); + if raw_format.is_none() + && self.input.peek().map_or(false, |t| { + matches!(t.kind, lex::Kind::Open(Delimiter::Brace)) }) - .flatten(); - if math_opt.is_some() { - self.eat(); // backticks + { + self.ahead_attributes( + AttributesElementType::Container { + e_placeholder: event_opener - 1, + }, + false, + ); } - math_opt - } - lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, first.len)), - _ => None, - } - .map(|(mut kind, opener_len)| { - let e_attr = self.events.len(); - self.events.push_back(Event { - kind: EventKind::Placeholder, - span: Span::empty_at(self.span.start()), - }); - let opener_event = self.events.len(); - self.events.push_back(Event { - kind: EventKind::Enter(kind), - span: self.span, + self.verbatim = None; + } else { + // continue verbatim + let is_whitespace = self + .input + .span + .of(self.input.src) + .chars() + .all(char::is_whitespace); + if is_whitespace { + if !*non_whitespace_encountered + && self.input.peek().map_or(false, |t| { + matches!( + t.kind, + lex::Kind::Seq(Sequence::Backtick) if t.len != len_opener.into(), + ) + }) + { + return Some(Continue); // skip whitespace + } + } else { + *non_whitespace_encountered = true; + *non_whitespace_last = Some((first.kind, self.events.len() + 1)); + } + self.push(EventKind::Str); + }; + Some(Continue) + } else { + let (ty, len_opener) = match first.kind { + lex::Kind::DollarBacktick(l) if first.len - l as usize == 1 => { + Some((InlineMath, l)) + } + lex::Kind::DollarBacktick(l) if first.len - l as usize == 2 => { + Some((DisplayMath, l)) + } + lex::Kind::Seq(Sequence::Backtick) if first.len < 256 => { + Some((Verbatim, first.len as u8)) + } + _ => None, + }?; + self.push_sp(EventKind::Placeholder, self.input.span.empty_before()); + self.verbatim = Some(VerbatimState { + event_opener: self.events.len(), + len_opener, + non_whitespace_encountered: false, + non_whitespace_last: None, }); + self.attributes = None; + self.push(EventKind::Enter(ty)) + } + } - let mut span_inner = self.span.empty_after(); - let mut span_outer = None; - - let mut non_whitespace_first = None; - let mut non_whitespace_last = None; + fn parse_attributes(&mut self, first: &lex::Token) -> Option { + if first.kind == lex::Kind::Open(Delimiter::Brace) { + let elem_ty = self + .attributes + .take() + .unwrap_or(AttributesElementType::Word); + self.ahead_attributes(elem_ty, true) + } else { + None + } + } - while let Some(t) = self.eat() { - if matches!(t.kind, lex::Kind::Seq(lex::Sequence::Backtick)) && t.len == opener_len - { - if matches!(kind, Verbatim) - && matches!( - self.lexer.peek().map(|t| &t.kind), - Some(lex::Kind::Open(Delimiter::BraceEqual)) - ) - { - let mut ahead = self.lexer.chars(); - let mut end = false; - let len = (&mut ahead) - .skip(2) // {= - .take_while(|c| { - if *c == '{' { - return false; - } - if *c == '}' { - end = true; - }; - !end && !c.is_whitespace() - }) - .map(char::len_utf8) - .sum(); - if len > 0 && end { - let tok = self.eat(); - debug_assert_eq!( - tok, - Some(lex::Token { - kind: lex::Kind::Open(Delimiter::BraceEqual), - len: 2, - }) - ); - self.lexer = lex::Lexer::new(ahead); - let span_format = self.span.after(len); - kind = RawFormat; - self.events[opener_event].kind = EventKind::Enter(kind); - self.events[opener_event].span = span_format; - self.span = span_format.translate(1); // } - span_outer = Some(span_format); - } + fn ahead_attributes( + &mut self, + elem_ty: AttributesElementType, + opener_eaten: bool, + ) -> Option { + let start_attr = self.input.span.end() - usize::from(opener_eaten); + debug_assert!(self.input.src[start_attr..].starts_with('{')); + + let mut line_start = start_attr; + let mut line_end = self.input.span_line.end(); + let mut line = 0; + let mut end_attr = start_attr; + let mut attrs = attr::Attributes::new(); + let mut parser = attr::Parser::new(&mut attrs, &self.input.src[start_attr..line_end]); + + loop { + match parser.step() { + attr::StepResult::Done => { + end_attr = line_start + parser.len(); + if self.input.src[end_attr..].starts_with('{') { + parser.restart(); + } else { + break; } - break; } - if !matches!(t.kind, lex::Kind::Whitespace) { - if non_whitespace_first.is_none() { - non_whitespace_first = Some((t.kind, span_inner.end())); + attr::StepResult::Invalid => break, + attr::StepResult::Valid => {} + attr::StepResult::More => { + if let Some(l) = self.input.ahead.get(line) { + line += 1; + line_start = l.start(); + line_end = l.end(); + parser.set_input(l.of(self.input.src)); + } else if self.input.complete { + // no need to ask for more input + break; + } else { + self.attributes = Some(elem_ty); + if opener_eaten { + self.input.span = Span::empty_at(start_attr); + self.input.lexer = lex::Lexer::new( + &self.input.src[start_attr..self.input.span_line.end()], + ); + } + return Some(More); } - non_whitespace_last = Some((t.kind, span_inner.end() + t.len)); } - span_inner = span_inner.extend(t.len); - self.reset_span(); - } - - if let Some((lex::Kind::Seq(lex::Sequence::Backtick), pos)) = non_whitespace_first { - span_inner = span_inner.with_start(pos); - } - if let Some((lex::Kind::Seq(lex::Sequence::Backtick), pos)) = non_whitespace_last { - span_inner = span_inner.with_end(pos); } + } - self.events.push_back(Event { - kind: EventKind::Str, - span: span_inner, - }); + if start_attr == end_attr { + return None; + } - let ev = Event { - kind: EventKind::Exit(kind), - span: span_outer.unwrap_or(self.span), + for _ in 0..line { + let l = self.input.ahead.pop_front().unwrap(); + self.input.set_current_line(l); + } + self.input.span = Span::new(start_attr, end_attr); + self.input.lexer = lex::Lexer::new(&self.input.src[end_attr..line_end]); + + if !attrs.is_empty() { + let attr_index = self.attributes_store.len() as AttributesIndex; + self.attributes_store.push(attrs); + let attr_event = Event { + kind: EventKind::Attributes { + container: matches!(elem_ty, AttributesElementType::Container { .. }), + attrs: attr_index, + }, + span: self.input.span, }; - - if let Some((non_empty, span)) = self.ahead_attributes() { - self.span = span; - if non_empty { - self.events[e_attr] = Event { - kind: EventKind::Attributes, - span, - }; + match elem_ty { + AttributesElementType::Container { e_placeholder } => { + self.events[e_placeholder] = attr_event; + if matches!(self.events[e_placeholder + 1].kind, EventKind::Str) { + self.events[e_placeholder + 1].kind = EventKind::Enter(Span); + let last = self.events.len() - 1; + self.events[last].kind = EventKind::Exit(Span); + } } - } - - ev - }) - } - - fn parse_attributes(&mut self, first: &lex::Token) -> Option { - if first.kind == lex::Kind::Open(Delimiter::Brace) { - let mut ahead = self.lexer.chars(); - let (mut attr_len, mut has_attr) = attr::valid(std::iter::once('{').chain(&mut ahead)); - attr_len = attr_len.saturating_sub(1); // rm { - if attr_len > 0 { - while attr_len > 0 { - self.span = self.span.extend(attr_len); - self.lexer = lex::Lexer::new(ahead.clone()); - - let (l, non_empty) = attr::valid(&mut ahead); - attr_len = l; - has_attr |= non_empty; + AttributesElementType::Word => { + self.events.push_back(attr_event); } - - let set_attr = has_attr - && self - .events - .back() - .map_or(false, |e| e.kind == EventKind::Str); - - Some(if set_attr { - let i = self - .events - .iter() - .rposition(|e| e.kind != EventKind::Str) - .map_or(0, |i| i + 1); - let span_str = self.events[i] - .span - .union(self.events[self.events.len() - 1].span); - self.events.drain(i..); - - self.events.push_back(Event { - kind: EventKind::Attributes, - span: self.span, - }); - self.events.push_back(Event { - kind: EventKind::Enter(Container::Span), - span: span_str.empty_before(), - }); - self.events.push_back(Event { - kind: EventKind::Str, - span: span_str, - }); - - Event { - kind: EventKind::Exit(Container::Span), - span: span_str.empty_after(), - } - } else { - Event { - kind: EventKind::Placeholder, - span: self.span.empty_before(), - } - }) - } else { - None } - } else { - None } + + Some(Continue) } - fn parse_autolink(&mut self, first: &lex::Token) -> Option { + fn parse_autolink(&mut self, first: &lex::Token) -> Option { if first.kind == lex::Kind::Sym(Symbol::Lt) { - let mut ahead = self.lexer.chars(); + let mut ahead = self.input.lexer.ahead().chars(); let mut end = false; let mut is_url = false; let len = (&mut ahead) @@ -348,31 +506,21 @@ impl + Clone> Parser { }) .map(char::len_utf8) .sum(); - (end && is_url).then(|| { - self.lexer = lex::Lexer::new(ahead); - self.span = self.span.after(len); - self.events.push_back(Event { - kind: EventKind::Enter(Autolink), - span: self.span, - }); - self.events.push_back(Event { - kind: EventKind::Str, - span: self.span, - }); - self.span = self.span.after(1); - Event { - kind: EventKind::Exit(Autolink), - span: self.span, - } - }) - } else { - None + if end && is_url { + self.input.lexer = lex::Lexer::new(ahead.as_str()); + self.input.span = self.input.span.after(len); + self.push(EventKind::Enter(Autolink)); + self.push(EventKind::Str); + self.input.span = self.input.span.after(1); + return self.push(EventKind::Exit(Autolink)); + } } + None } - fn parse_symbol(&mut self, first: &lex::Token) -> Option { + fn parse_symbol(&mut self, first: &lex::Token) -> Option { if first.kind == lex::Kind::Sym(Symbol::Colon) { - let mut ahead = self.lexer.chars(); + let mut ahead = self.input.lexer.ahead().chars(); let mut end = false; let mut valid = true; let len = (&mut ahead) @@ -386,32 +534,28 @@ impl + Clone> Parser { }) .map(char::len_utf8) .sum(); - (end && valid).then(|| { - self.lexer = lex::Lexer::new(ahead); - self.span = self.span.after(len); - let span = self.span; - self.span = self.span.after(1); - Event { - kind: EventKind::Atom(Symbol), - span, - } - }) - } else { - None + if end && valid { + self.input.lexer = lex::Lexer::new(ahead.as_str()); + self.input.span = self.input.span.after(len); + self.push(EventKind::Atom(Symbol)); + self.input.span = self.input.span.after(1); + return Some(Continue); + } } + None } - fn parse_footnote_reference(&mut self, first: &lex::Token) -> Option { + fn parse_footnote_reference(&mut self, first: &lex::Token) -> Option { if first.kind == lex::Kind::Open(Delimiter::Bracket) && matches!( - self.peek(), + self.input.peek(), Some(lex::Token { kind: lex::Kind::Sym(Symbol::Caret), .. }) ) { - let tok = self.eat(); + let tok = self.input.eat(); debug_assert_eq!( tok, Some(lex::Token { @@ -419,7 +563,7 @@ impl + Clone> Parser { len: 1, }) ); - let mut ahead = self.lexer.chars(); + let mut ahead = self.input.lexer.ahead().chars(); let mut end = false; let len = (&mut ahead) .take_while(|c| { @@ -433,280 +577,343 @@ impl + Clone> Parser { }) .map(char::len_utf8) .sum(); - end.then(|| { - self.lexer = lex::Lexer::new(ahead); - self.span = self.span.after(len); - let ev = Event { - kind: EventKind::Atom(FootnoteReference), - span: self.span, - }; - self.span = self.span.after(1); - ev - }) - } else { - None + if end { + self.input.lexer = lex::Lexer::new(ahead.as_str()); + self.input.span = self.input.span.after(len); + self.push(EventKind::Atom(FootnoteReference)); + self.input.span = self.input.span.after(1); + return Some(Continue); + } } + None } - fn parse_container(&mut self, first: &lex::Token) -> Option { - Delim::from_token(first.kind).and_then(|(delim, dir)| { - self.openers - .iter() - .rposition(|(d, _)| { - *d == delim || matches!((d, delim), (Delim::Span(..), Delim::Span(..))) - }) - .and_then(|o| { - if matches!(dir, Dir::Open) { - return None; - } - let whitespace_after = self.events.back().map_or(false, |ev| { - matches!(ev.kind, EventKind::Whitespace | EventKind::Atom(Softbreak)) - }); - if matches!(dir, Dir::Both) && whitespace_after { - return None; - } + fn parse_container(&mut self, first: &lex::Token) -> Option { + self.openers + .iter() + .rposition(|(o, _)| o.closed_by(first.kind)) + .and_then(|o| { + let (opener, e) = self.openers[o]; + let (e_attr, e_opener) = if let Opener::Link { event_span, .. } = opener { + (event_span - 1, e) + } else { + (e, e + 1) + }; - let (d, e) = self.openers[o]; - let e_attr = e; - let e_opener = e + 1; - if e_opener == self.events.len() - 1 { - // empty container - return None; - } + if e_opener == self.events.len() - 1 && !matches!(opener, Opener::Link { .. }) { + // empty container + return None; + } + let whitespace_before = self.events.back().map_or(false, |ev| { + ev.span + .of(self.input.src) + .chars() + .last() + .map_or(false, char::is_whitespace) + }); + if opener.bidirectional() && whitespace_before { + return None; + } - let inner_span = self.events[e_opener].span.between(self.span); - let mut event_closer = match DelimEventKind::from(d) { - DelimEventKind::Container(cont) => { - self.events[e_opener].kind = EventKind::Enter(cont); - Some(Event { - kind: EventKind::Exit(cont), - span: self.span, - }) - } - DelimEventKind::Quote(ty) => { - self.events[e_opener].kind = - EventKind::Atom(Atom::Quote { ty, left: true }); - Some(Event { - kind: EventKind::Atom(Atom::Quote { ty, left: false }), - span: self.span, - }) - } - DelimEventKind::Span(ty) => self.post_span(ty, e_opener), - }; - self.openers.drain(o..); - - if let Some(event_closer) = &mut event_closer { - if event_closer.span.is_empty() - && matches!( - event_closer.kind, - EventKind::Exit( - Container::ReferenceLink | Container::ReferenceImage - ) - ) + self.openers.drain(o..); + let closed = match DelimEventKind::from(opener) { + DelimEventKind::Container(cont) => { + self.events[e_opener].kind = EventKind::Enter(cont); + self.push(EventKind::Exit(cont)) + } + DelimEventKind::Quote(ty) => { + self.events[e_opener].kind = EventKind::Atom(Quote { ty, left: true }); + self.push(EventKind::Atom(Quote { ty, left: false })) + } + DelimEventKind::Span(ty) => { + if let Some(lex::Kind::Open(d @ (Delimiter::Bracket | Delimiter::Paren))) = + self.input.peek().map(|t| t.kind) { - assert_eq!(self.events[e_opener].span, event_closer.span); - event_closer.span = inner_span; - self.events[e_opener].span = inner_span; + self.push(EventKind::Str); // ] + self.openers.push(( + Opener::Link { + event_span: e_opener, + image: matches!(ty, SpanType::Image), + inline: matches!(d, Delimiter::Paren), + }, + self.events.len(), + )); + self.input.reset_span(); + self.input.eat(); // [ or ( + return self.push(EventKind::Str); + } else { + self.push(EventKind::Str) // ] } } - - if let Some((non_empty, span)) = self.ahead_attributes() { - if non_empty { - self.events[e_attr] = Event { - kind: EventKind::Attributes, - span, + DelimEventKind::Link { + event_span, + inline, + image, + } => { + let span_spec = self.events[e_opener].span.between(self.input.span); + let multiline = + self.events[e_opener].span.start() < self.input.span_line.start(); + + let spec: CowStr = if span_spec.is_empty() && !inline { + let span_spec = self.events[event_span] + .span + .between(self.events[e_opener - 1].span); + let events_text = self + .events + .iter() + .skip(event_span + 1) + .take(e_opener - event_span - 2); + + if multiline + || events_text.clone().any(|ev| { + !matches!(ev.kind, EventKind::Str | EventKind::Atom(..)) + }) + { + let mut spec = CowStr::Borrowed(""); + for text in events_text + .filter(|ev| { + matches!(ev.kind, EventKind::Str | EventKind::Atom(..)) + }) + .map(|ev| ev.span.of(self.input.src)) + { + spec.push_str(text); + } + spec + } else { + span_spec.of(self.input.src).into() + } + } else if multiline { + let mut spec = CowStr::Borrowed(""); + let mut first_part = true; + let mut span = self.events[e_opener].span.empty_after(); + + let mut append = |span: Span| { + span.of(self.input.src).split('\n').for_each(|s| { + if !s.is_empty() { + if !inline && !first_part { + spec.push(' '); + } + spec.push_str(s); + first_part = false; + } + }) }; - } - if event_closer.is_none() { - self.events[e_opener].kind = EventKind::Enter(Container::Span); - event_closer = Some(Event { - kind: EventKind::Exit(Container::Span), - span: self.span, - }); - } + for ev in self.events.iter().skip(e_opener + 1) { + if span.end() == ev.span.start() { + span = Span::new(span.start(), ev.span.end()); + } else { + append(span); + span = ev.span; + } + } + append(span); - self.span = span; - } + spec + } else { + span_spec.of(self.input.src).into() + }; - event_closer - }) - .or_else(|| { - if matches!(dir, Dir::Close) { - return None; - } - if matches!(dir, Dir::Both) - && self - .peek() - .map_or(true, |t| matches!(t.kind, lex::Kind::Whitespace)) - { - return None; - } - if matches!(delim, Delim::SingleQuoted | Delim::DoubleQuoted) - && self - .events - .back() - .map_or(false, |ev| matches!(ev.kind, EventKind::Str)) - { - return None; + let idx = self.cow_strs.len() as CowStrIndex; + self.cow_strs.push(spec); + let container = match (image, inline) { + (false, false) => ReferenceLink(idx), + (false, true) => InlineLink(idx), + (true, false) => ReferenceImage(idx), + (true, true) => InlineImage(idx), + }; + self.events[event_span].kind = EventKind::Enter(container); + self.events[e_opener - 1] = Event { + kind: EventKind::Exit(container), + span: Span::new( + self.events[e_opener - 1].span.start(), + span_spec.end() + 1, + ), + }; + self.events.drain(e_opener..); + Some(Continue) } - self.openers.push((delim, self.events.len())); - // push dummy event in case attributes are encountered after closing delimiter - self.events.push_back(Event { - kind: EventKind::Placeholder, - span: Span::empty_at(self.span.start()), - }); - // use non-opener for now, replace if closed later - Some(Event { - kind: match delim { - Delim::SingleQuoted => EventKind::Atom(Quote { - ty: QuoteType::Single, - left: false, - }), - Delim::DoubleQuoted => EventKind::Atom(Quote { - ty: QuoteType::Double, - left: true, - }), - _ => EventKind::Str, + }; + + if self.input.peek().map_or(false, |t| { + matches!(t.kind, lex::Kind::Open(Delimiter::Brace)) + }) { + self.ahead_attributes( + AttributesElementType::Container { + e_placeholder: e_attr, }, - span: self.span, - }) + false, + ) + } else { + closed + } + }) + .or_else(|| { + let opener = Opener::from_token(first.kind)?; + let whitespace_after = self + .input + .lexer + .ahead() + .chars() + .next() + .map_or(true, char::is_whitespace); + if opener.bidirectional() && whitespace_after { + return None; + } + let whitespace_before = self.events.back().map_or(false, |ev| { + ev.span + .of(self.input.src) + .chars() + .last() + .map_or(false, char::is_whitespace) + }); + if matches!(opener, Opener::SingleQuoted | Opener::DoubleQuoted) + && self + .events + .back() + .map_or(false, |ev| matches!(ev.kind, EventKind::Str)) + && !whitespace_before + { + return None; + } + self.openers.push((opener, self.events.len())); + // push dummy event in case attributes are encountered after closing delimiter + self.push_sp( + EventKind::Placeholder, + Span::empty_at(self.input.span.start()), + ); + // use non-opener for now, replace if closed later + self.push(match opener { + Opener::SingleQuoted => EventKind::Atom(Quote { + ty: QuoteType::Single, + left: false, + }), + Opener::DoubleQuoted => EventKind::Atom(Quote { + ty: QuoteType::Double, + left: true, + }), + _ => EventKind::Str, }) - }) + }) } - fn ahead_attributes(&mut self) -> Option<(bool, Span)> { - let mut span = self.span.empty_after(); - let mut ahead = self.lexer.chars(); - let (mut attr_len, mut has_attr) = attr::valid(&mut ahead); - if attr_len > 0 { - while attr_len > 0 { - span = span.extend(attr_len); - self.lexer = lex::Lexer::new(ahead.clone()); - - let (l, non_empty) = attr::valid(&mut ahead); - has_attr |= non_empty; - attr_len = l; + fn parse_atom(&mut self, first: &lex::Token) -> Option { + let atom = match first.kind { + lex::Kind::Newline => Softbreak, + lex::Kind::Hardbreak => Hardbreak, + lex::Kind::Escape => Escape, + lex::Kind::Nbsp => Nbsp, + lex::Kind::Seq(Sequence::Period) if first.len >= 3 => { + while self.input.span.len() > 3 { + self.push_sp(EventKind::Atom(Ellipsis), self.input.span.with_len(3)); + self.input.span = self.input.span.skip(3); + } + if self.input.span.len() == 3 { + Ellipsis + } else { + return self.push(EventKind::Str); + } } - Some((has_attr, span)) - } else { - None - } - } - - fn post_span(&mut self, ty: SpanType, opener_event: usize) -> Option { - let mut ahead = self.lexer.chars(); - match ahead.next() { - Some(opener @ ('[' | '(')) => { - let img = ty == SpanType::Image; - let (closer, kind) = match opener { - '[' => (']', if img { ReferenceImage } else { ReferenceLink }), - '(' => (')', if img { InlineImage } else { InlineLink }), - _ => unreachable!(), + lex::Kind::Seq(Sequence::Hyphen) if first.len >= 2 => { + let (m, n) = if first.len % 3 == 0 { + (first.len / 3, 0) + } else if first.len % 2 == 0 { + (0, first.len / 2) + } else { + let n = (1..).find(|n| (first.len - 2 * n) % 3 == 0).unwrap(); + ((first.len - 2 * n) / 3, n) }; - let mut end = false; - let len = (&mut ahead) - .take_while(|c| { - if *c == opener { - return false; - } - if *c == closer { - end = true; - }; - !end - }) - .map(char::len_utf8) - .sum(); - end.then(|| { - let span = self.span.after(len).translate(1); - (kind, span) - }) + std::iter::repeat(EmDash) + .take(m) + .chain(std::iter::repeat(EnDash).take(n)) + .for_each(|atom| { + let l = if matches!(atom, EnDash) { 2 } else { 3 }; + self.push_sp(EventKind::Atom(atom), self.input.span.with_len(l)); + self.input.span = self.input.span.skip(l); + }); + return Some(Continue); } - _ => None, + lex::Kind::Open(Delimiter::BraceQuote1) => Quote { + ty: QuoteType::Single, + left: true, + }, + lex::Kind::Sym(Symbol::Quote1) | lex::Kind::Close(Delimiter::BraceQuote1) => Quote { + ty: QuoteType::Single, + left: false, + }, + lex::Kind::Open(Delimiter::BraceQuote2) => Quote { + ty: QuoteType::Double, + left: true, + }, + lex::Kind::Sym(Symbol::Quote2) | lex::Kind::Close(Delimiter::BraceQuote2) => Quote { + ty: QuoteType::Double, + left: false, + }, + _ => return None, + }; + + self.push(EventKind::Atom(atom)) + } + + fn merge_str_events(&mut self, span_str: Span) -> Event { + let mut span = span_str; + let should_merge = |e: &Event, span: Span| { + matches!(e.kind, EventKind::Str | EventKind::Placeholder) + && span.end() == e.span.start() + }; + while self.events.front().map_or(false, |e| should_merge(e, span)) { + let ev = self.events.pop_front().unwrap(); + span = span.union(ev.span); } - .map(|(kind, span)| { - self.lexer = lex::Lexer::new(ahead); - self.events[opener_event].kind = EventKind::Enter(kind); - self.events[opener_event].span = span; - self.span = span.translate(1); + + if matches!( + self.events.front().map(|ev| &ev.kind), + Some(EventKind::Attributes { + container: false, + .. + }) + ) { + self.apply_word_attributes(span) + } else { Event { - kind: EventKind::Exit(kind), + kind: EventKind::Str, span, } - }) + } } - fn parse_atom(&mut self, first: &lex::Token) -> Option { - let atom = - match first.kind { - lex::Kind::Newline => Softbreak, - lex::Kind::Hardbreak => Hardbreak, - lex::Kind::Escape => Escape, - lex::Kind::Nbsp => Nbsp, - lex::Kind::Seq(lex::Sequence::Period) if first.len >= 3 => { - while self.span.len() > 3 { - self.events.push_back(Event { - kind: EventKind::Atom(Ellipsis), - span: self.span.with_len(3), - }); - self.span = self.span.skip(3); - } - if self.span.len() == 3 { - Ellipsis - } else { - return Some(Event { - kind: EventKind::Str, - span: self.span, - }); - } - } - lex::Kind::Seq(lex::Sequence::Hyphen) if first.len >= 2 => { - let (m, n) = if first.len % 3 == 0 { - (first.len / 3, 0) - } else if first.len % 2 == 0 { - (0, first.len / 2) - } else { - let n = (1..).find(|n| (first.len - 2 * n) % 3 == 0).unwrap(); - ((first.len - 2 * n) / 3, n) - }; - std::iter::repeat(EmDash) - .take(m) - .chain(std::iter::repeat(EnDash).take(n)) - .for_each(|atom| { - let l = if matches!(atom, EnDash) { 2 } else { 3 }; - self.events.push_back(Event { - kind: EventKind::Atom(atom), - span: self.span.with_len(l), - }); - self.span = self.span.skip(l); - }); - return self.events.pop_back(); - } - lex::Kind::Open(lex::Delimiter::BraceQuote1) => Quote { - ty: QuoteType::Single, - left: true, - }, - lex::Kind::Sym(lex::Symbol::Quote1) - | lex::Kind::Close(lex::Delimiter::BraceQuote1) => Quote { - ty: QuoteType::Single, - left: false, - }, - lex::Kind::Open(lex::Delimiter::BraceQuote2) => Quote { - ty: QuoteType::Double, - left: true, - }, - lex::Kind::Sym(lex::Symbol::Quote2) - | lex::Kind::Close(lex::Delimiter::BraceQuote2) => Quote { - ty: QuoteType::Double, - left: false, - }, - _ => return None, - }; - - Some(Event { - kind: EventKind::Atom(atom), - span: self.span, - }) + fn apply_word_attributes(&mut self, span_str: Span) -> Event { + if let Some(i) = span_str + .of(self.input.src) + .bytes() + .rposition(|c| c.is_ascii_whitespace()) + { + let before = span_str.with_len(i + 1); + let word = span_str.skip(i + 1); + self.events.push_front(Event { + kind: EventKind::Str, + span: word, + }); + Event { + kind: EventKind::Str, + span: before, + } + } else { + let attr = self.events.pop_front().unwrap(); + self.events.push_front(Event { + kind: EventKind::Exit(Span), + span: span_str.empty_after(), + }); + self.events.push_front(Event { + kind: EventKind::Str, + span: span_str, + }); + self.events.push_front(Event { + kind: EventKind::Enter(Span), + span: span_str.empty_before(), + }); + attr + } } } @@ -723,7 +930,7 @@ enum SpanType { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum Delim { +enum Opener { Span(SpanType), Strong(Directionality), Emphasis(Directionality), @@ -734,123 +941,170 @@ enum Delim { Insert, SingleQuoted, DoubleQuoted, + Link { + event_span: usize, + image: bool, + inline: bool, + }, } -#[derive(Debug, Clone, Copy)] -enum Dir { - Open, - Close, - Both, -} - -impl Delim { - fn from_token(kind: lex::Kind) -> Option<(Self, Dir)> { - use Delim::*; - use Dir::{Both, Close, Open}; +impl Opener { + fn from_token(kind: lex::Kind) -> Option { use Directionality::{Bi, Uni}; + use Opener::*; use SpanType::{General, Image}; match kind { - lex::Kind::Sym(Symbol::Asterisk) => Some((Strong(Bi), Both)), - lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis(Bi), Both)), - lex::Kind::Sym(Symbol::Caret) => Some((Superscript(Bi), Both)), - lex::Kind::Sym(Symbol::Tilde) => Some((Subscript(Bi), Both)), - lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Both)), - lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Both)), - lex::Kind::Sym(Symbol::ExclaimBracket) => Some((Span(Image), Open)), - lex::Kind::Open(Delimiter::Bracket) => Some((Span(General), Open)), - lex::Kind::Close(Delimiter::Bracket) => Some((Span(General), Close)), - lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong(Uni), Open)), - lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong(Uni), Close)), - lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis(Uni), Open)), - lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis(Uni), Close)), - lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript(Uni), Open)), - lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript(Uni), Close)), - lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript(Uni), Open)), - lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript(Uni), Close)), - lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Open)), - lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Close)), - lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Open)), - lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Close)), - lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Open)), - lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Close)), - lex::Kind::Open(Delimiter::BraceQuote1) => Some((SingleQuoted, Open)), - lex::Kind::Close(Delimiter::BraceQuote1) => Some((SingleQuoted, Close)), - lex::Kind::Open(Delimiter::BraceQuote2) => Some((DoubleQuoted, Open)), - lex::Kind::Close(Delimiter::BraceQuote2) => Some((DoubleQuoted, Close)), + lex::Kind::Sym(Symbol::Asterisk) => Some(Strong(Bi)), + lex::Kind::Sym(Symbol::Underscore) => Some(Emphasis(Bi)), + lex::Kind::Sym(Symbol::Caret) => Some(Superscript(Bi)), + lex::Kind::Sym(Symbol::Tilde) => Some(Subscript(Bi)), + lex::Kind::Sym(Symbol::Quote1) => Some(SingleQuoted), + lex::Kind::Sym(Symbol::Quote2) => Some(DoubleQuoted), + lex::Kind::Sym(Symbol::ExclaimBracket) => Some(Span(Image)), + lex::Kind::Open(Delimiter::Bracket) => Some(Span(General)), + lex::Kind::Open(Delimiter::BraceAsterisk) => Some(Strong(Uni)), + lex::Kind::Open(Delimiter::BraceUnderscore) => Some(Emphasis(Uni)), + lex::Kind::Open(Delimiter::BraceCaret) => Some(Superscript(Uni)), + lex::Kind::Open(Delimiter::BraceTilde) => Some(Subscript(Uni)), + lex::Kind::Open(Delimiter::BraceEqual) => Some(Mark), + lex::Kind::Open(Delimiter::BraceHyphen) => Some(Delete), + lex::Kind::Open(Delimiter::BracePlus) => Some(Insert), + lex::Kind::Open(Delimiter::BraceQuote1) => Some(SingleQuoted), + lex::Kind::Open(Delimiter::BraceQuote2) => Some(DoubleQuoted), _ => None, } } + + fn closed_by(&self, kind: lex::Kind) -> bool { + use Directionality::{Bi, Uni}; + use Opener::*; + + match self { + Span(..) => matches!(kind, lex::Kind::Close(Delimiter::Bracket)), + Strong(Bi) => matches!(kind, lex::Kind::Sym(Symbol::Asterisk)), + Strong(Uni) => matches!(kind, lex::Kind::Close(Delimiter::BraceAsterisk)), + Emphasis(Bi) => matches!(kind, lex::Kind::Sym(Symbol::Underscore)), + Emphasis(Uni) => matches!(kind, lex::Kind::Close(Delimiter::BraceUnderscore)), + Superscript(Bi) => matches!(kind, lex::Kind::Sym(Symbol::Caret)), + Superscript(Uni) => matches!(kind, lex::Kind::Close(Delimiter::BraceCaret)), + Subscript(Bi) => matches!(kind, lex::Kind::Sym(Symbol::Tilde)), + Subscript(Uni) => matches!(kind, lex::Kind::Close(Delimiter::BraceTilde)), + Mark => matches!(kind, lex::Kind::Close(Delimiter::BraceEqual)), + Delete => matches!(kind, lex::Kind::Close(Delimiter::BraceHyphen)), + Insert => matches!(kind, lex::Kind::Close(Delimiter::BracePlus)), + SingleQuoted => matches!( + kind, + lex::Kind::Sym(Symbol::Quote1) | lex::Kind::Close(Delimiter::BraceQuote1) + ), + DoubleQuoted => matches!( + kind, + lex::Kind::Sym(Symbol::Quote2) | lex::Kind::Close(Delimiter::BraceQuote2) + ), + Link { inline: false, .. } => matches!(kind, lex::Kind::Close(Delimiter::Bracket)), + Link { inline: true, .. } => matches!(kind, lex::Kind::Close(Delimiter::Paren)), + } + } + + fn bidirectional(&self) -> bool { + matches!( + self, + Opener::Strong(Directionality::Bi) + | Opener::Emphasis(Directionality::Bi) + | Opener::Superscript(Directionality::Bi) + | Opener::Subscript(Directionality::Bi) + | Opener::SingleQuoted + | Opener::DoubleQuoted + ) + } } enum DelimEventKind { Container(Container), Span(SpanType), Quote(QuoteType), + Link { + event_span: usize, + image: bool, + inline: bool, + }, } -impl From for DelimEventKind { - fn from(d: Delim) -> Self { +impl From for DelimEventKind { + fn from(d: Opener) -> Self { match d { - Delim::Span(ty) => Self::Span(ty), - Delim::Strong(..) => Self::Container(Strong), - Delim::Emphasis(..) => Self::Container(Emphasis), - Delim::Superscript(..) => Self::Container(Superscript), - Delim::Subscript(..) => Self::Container(Subscript), - Delim::Mark => Self::Container(Mark), - Delim::Delete => Self::Container(Delete), - Delim::Insert => Self::Container(Insert), - Delim::SingleQuoted => Self::Quote(QuoteType::Single), - Delim::DoubleQuoted => Self::Quote(QuoteType::Double), + Opener::Span(ty) => Self::Span(ty), + Opener::Strong(..) => Self::Container(Strong), + Opener::Emphasis(..) => Self::Container(Emphasis), + Opener::Superscript(..) => Self::Container(Superscript), + Opener::Subscript(..) => Self::Container(Subscript), + Opener::Mark => Self::Container(Mark), + Opener::Delete => Self::Container(Delete), + Opener::Insert => Self::Container(Insert), + Opener::SingleQuoted => Self::Quote(QuoteType::Single), + Opener::DoubleQuoted => Self::Quote(QuoteType::Double), + Opener::Link { + event_span, + image, + inline, + } => Self::Link { + event_span, + image, + inline, + }, } } } -impl + Clone> Iterator for Parser { +impl<'s> Iterator for Parser<'s> { type Item = Event; fn next(&mut self) -> Option { - #[allow(clippy::blocks_in_if_conditions)] while self.events.is_empty() || !self.openers.is_empty() + || self.verbatim.is_some() + || self.attributes.is_some() || self // for merge or attributes .events .back() - .map_or(false, |ev| { - matches!(ev.kind, EventKind::Str | EventKind::Whitespace) - }) + .map_or(false, |ev| matches!(ev.kind, EventKind::Str)) { - if let Some(ev) = self.parse_event() { - self.events.push_back(ev); - } else { - break; - } - } - - self.events.pop_front().and_then(|e| { - match e.kind { - EventKind::Str if e.span.is_empty() => self.next(), - EventKind::Str | EventKind::Whitespace => { - // merge str events - let mut span = e.span; - let should_merge = |e: &Event, span: Span| { - matches!( - e.kind, - EventKind::Str | EventKind::Whitespace | EventKind::Placeholder - ) && span.end() == e.span.start() - }; - while self.events.front().map_or(false, |e| should_merge(e, span)) { - let ev = self.events.pop_front().unwrap(); - span = span.union(ev.span); + match self.parse_event() { + Continue => {} + Done => break, + Next => { + if let Some(l) = self.input.ahead.pop_front() { + self.input.set_current_line(l); + } else { + return None; } - Some(Event { - kind: EventKind::Str, - span, - }) } - EventKind::Placeholder => self.next(), - _ => Some(e), + More => return None, } + } + + // automatically close unclosed verbatim + if let Some(VerbatimState { event_opener, .. }) = self.verbatim.take() { + let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind { + debug_assert!(matches!( + ty, + Verbatim | RawFormat | InlineMath | DisplayMath + )); + ty + } else { + panic!() + }; + self.push(EventKind::Exit(ty_opener)); + } + + self.events.pop_front().and_then(|e| match e.kind { + EventKind::Str if e.span.is_empty() => self.next(), + EventKind::Str => Some(self.merge_str_events(e.span)), + EventKind::Placeholder + | EventKind::Attributes { + container: false, .. + } => self.next(), + _ => Some(e), }) } } @@ -860,12 +1114,14 @@ mod test { use super::Atom::*; use super::Container::*; use super::EventKind::*; + use super::QuoteType; use super::Verbatim; macro_rules! test_parse { ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { #[allow(unused)] - let mut p = super::Parser::new($src.chars()); + let mut p = super::Parser::new($src); + p.feed_line(super::Span::by_len(0, $src.len()), true); let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::>(); let expected = &[$($($token),*,)?]; assert_eq!(actual, expected, "\n\n{}\n\n", $src); @@ -931,7 +1187,13 @@ mod test { test_parse!( "pre `raw`{#id} post", (Str, "pre "), - (Attributes, "{#id}"), + ( + Attributes { + container: true, + attrs: 0, + }, + "{#id}" + ), (Enter(Verbatim), "`"), (Str, "raw"), (Exit(Verbatim), "`"), @@ -1033,31 +1295,31 @@ mod test { fn span_tag() { test_parse!( "[text][tag]", - (Enter(ReferenceLink), "tag"), + (Enter(ReferenceLink(0)), "["), (Str, "text"), - (Exit(ReferenceLink), "tag"), + (Exit(ReferenceLink(0)), "][tag]"), ); test_parse!( "![text][tag]", - (Enter(ReferenceImage), "tag"), + (Enter(ReferenceImage(0)), "!["), (Str, "text"), - (Exit(ReferenceImage), "tag"), + (Exit(ReferenceImage(0)), "][tag]"), ); test_parse!( "before [text][tag] after", (Str, "before "), - (Enter(ReferenceLink), "tag"), + (Enter(ReferenceLink(0)), "["), (Str, "text"), - (Exit(ReferenceLink), "tag"), + (Exit(ReferenceLink(0)), "][tag]"), (Str, " after"), ); test_parse!( "[[inner][i]][o]", - (Enter(ReferenceLink), "o"), - (Enter(ReferenceLink), "i"), + (Enter(ReferenceLink(1)), "["), + (Enter(ReferenceLink(0)), "["), (Str, "inner"), - (Exit(ReferenceLink), "i"), - (Exit(ReferenceLink), "o"), + (Exit(ReferenceLink(0)), "][i]"), + (Exit(ReferenceLink(1)), "][o]"), ); } @@ -1065,15 +1327,15 @@ mod test { fn span_tag_empty() { test_parse!( "[text][]", - (Enter(ReferenceLink), "text"), + (Enter(ReferenceLink(0)), "["), (Str, "text"), - (Exit(ReferenceLink), "text"), + (Exit(ReferenceLink(0)), "][]"), ); test_parse!( "![text][]", - (Enter(ReferenceImage), "text"), + (Enter(ReferenceImage(0)), "!["), (Str, "text"), - (Exit(ReferenceImage), "text"), + (Exit(ReferenceImage(0)), "][]"), ); } @@ -1082,12 +1344,12 @@ mod test { // TODO strip non str from tag? test_parse!( "[some _text_][]", - (Enter(ReferenceLink), "some _text_"), + (Enter(ReferenceLink(0)), "["), (Str, "some "), (Enter(Emphasis), "_"), (Str, "text"), (Exit(Emphasis), "_"), - (Exit(ReferenceLink), "some _text_"), + (Exit(ReferenceLink(0)), "][]"), ); } @@ -1096,19 +1358,46 @@ mod test { test_parse!( "before [text](url) after", (Str, "before "), - (Enter(InlineLink), "url"), + (Enter(InlineLink(0)), "["), (Str, "text"), - (Exit(InlineLink), "url"), + (Exit(InlineLink(0)), "](url)"), (Str, " after"), ); test_parse!( "[outer [inner](i)](o)", - (Enter(InlineLink), "o"), + (Enter(InlineLink(1)), "["), (Str, "outer "), - (Enter(InlineLink), "i"), + (Enter(InlineLink(0)), "["), (Str, "inner"), - (Exit(InlineLink), "i"), - (Exit(InlineLink), "o"), + (Exit(InlineLink(0)), "](i)"), + (Exit(InlineLink(1)), "](o)"), + ); + } + + #[test] + fn span_url_attr_unclosed() { + test_parse!( + "[text]({.cls}", + ( + Attributes { + container: false, + attrs: 0, + }, + "{.cls}" + ), + (Enter(Span), ""), + (Str, "[text]("), + (Exit(Span), ""), + ); + } + + #[test] + fn span_url_attr_closed() { + test_parse!( + "[text]({.cls})", + (Enter(InlineLink(0)), "["), + (Str, "text"), + (Exit(InlineLink(0)), "]({.cls})"), ); } @@ -1117,13 +1406,18 @@ mod test { test_parse!( "before [text]() after", (Str, "before "), - (Enter(InlineLink), ""), + (Enter(InlineLink(0)), "["), (Str, "text"), - (Exit(InlineLink), ""), + (Exit(InlineLink(0)), "]()"), (Str, " after"), ); } + #[test] + fn span_url_unclosed() { + test_parse!("[text](url", (Str, "[text](url")); + } + #[test] fn span() { test_parse!("[abc]", (Str, "[abc]")); @@ -1133,7 +1427,13 @@ mod test { fn span_attr() { test_parse!( "[abc]{.def}", - (Attributes, "{.def}"), + ( + Attributes { + container: true, + attrs: 0, + }, + "{.def}" + ), (Enter(Span), "["), (Str, "abc"), (Exit(Span), "]"), @@ -1141,6 +1441,23 @@ mod test { test_parse!("not a [span] {#id}.", (Str, "not a [span] "), (Str, ".")); } + #[test] + fn span_attr_cont() { + test_parse!( + "[x_y]{.bar_}", + ( + Attributes { + container: true, + attrs: 0, + }, + "{.bar_}" + ), + (Enter(Span), "["), + (Str, "x_y"), + (Exit(Span), "]"), + ); + } + #[test] fn autolink() { test_parse!( @@ -1238,7 +1555,13 @@ mod test { fn container_attr() { test_parse!( "_abc def_{.attr}", - (Attributes, "{.attr}"), + ( + Attributes { + container: true, + attrs: 0, + }, + "{.attr}" + ), (Enter(Emphasis), "_"), (Str, "abc def"), (Exit(Emphasis), "_"), @@ -1266,7 +1589,13 @@ mod test { fn container_attr_multiple() { test_parse!( "_abc def_{.a}{.b}{.c} {.d}", - (Attributes, "{.a}{.b}{.c}"), + ( + Attributes { + container: true, + attrs: 0, + }, + "{.a}{.b}{.c}" + ), (Enter(Emphasis), "_"), (Str, "abc def"), (Exit(Emphasis), "_"), @@ -1278,7 +1607,13 @@ mod test { fn attr() { test_parse!( "word{a=b}", - (Attributes, "{a=b}"), + ( + Attributes { + container: false, + attrs: 0, + }, + "{a=b}" + ), (Enter(Span), ""), (Str, "word"), (Exit(Span), ""), @@ -1286,7 +1621,13 @@ mod test { test_parse!( "some word{.a}{.b} with attrs", (Str, "some "), - (Attributes, "{.a}{.b}"), + ( + Attributes { + container: false, + attrs: 0, + }, + "{.a}{.b}" + ), (Enter(Span), ""), (Str, "word"), (Exit(Span), ""), @@ -1299,6 +1640,7 @@ mod test { test_parse!("word {%comment%}", (Str, "word ")); test_parse!("word {%comment%} word", (Str, "word "), (Str, " word")); test_parse!("word {a=b}", (Str, "word ")); + test_parse!("word {.d}", (Str, "word ")); } #[test] @@ -1306,4 +1648,46 @@ mod test { test_parse!("word{}", (Str, "word")); test_parse!("word{ % comment % } trail", (Str, "word"), (Str, " trail")); } + + #[test] + fn quote() { + test_parse!( + "'a'", + ( + Atom(Quote { + ty: QuoteType::Single, + left: true, + }), + "'", + ), + (Str, "a"), + ( + Atom(Quote { + ty: QuoteType::Single, + left: false, + }), + "'", + ), + ); + test_parse!( + " 'a' ", + (Str, " "), + ( + Atom(Quote { + ty: QuoteType::Single, + left: true, + }), + "'", + ), + (Str, "a"), + ( + Atom(Quote { + ty: QuoteType::Single, + left: false, + }), + "'", + ), + (Str, " "), + ); + } } diff --git a/src/lex.rs b/src/lex.rs index 25f11e1c..d56fe1a9 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -13,7 +13,6 @@ pub(crate) struct Token { pub enum Kind { Text, Newline, - Whitespace, Nbsp, Hardbreak, Escape, @@ -21,6 +20,7 @@ pub enum Kind { Close(Delimiter), Sym(Symbol), Seq(Sequence), + DollarBacktick(u8), } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -36,6 +36,7 @@ pub enum Delimiter { Bracket, BraceQuote1, BraceQuote2, + Paren, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -55,7 +56,6 @@ pub enum Symbol { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Sequence { Backtick, - Dollar, Hyphen, Period, } @@ -64,7 +64,6 @@ impl Sequence { fn ch(self) -> char { match self { Self::Backtick => '`', - Self::Dollar => '$', Self::Period => '.', Self::Hyphen => '-', } @@ -72,9 +71,9 @@ impl Sequence { } #[derive(Clone)] -pub(crate) struct Lexer { - chars: I, - chars_non_peeked: I, +pub(crate) struct Lexer<'s> { + src: &'s str, + chars: std::str::Chars<'s>, /// Next character should be escaped. escape: bool, /// Token to be peeked or next'ed. @@ -83,11 +82,11 @@ pub(crate) struct Lexer { len: usize, } -impl + Clone> Lexer { - pub fn new(chars: I) -> Lexer { +impl<'s> Lexer<'s> { + pub fn new(src: &'s str) -> Self { Lexer { - chars: chars.clone(), - chars_non_peeked: chars, + src, + chars: src.chars(), escape: false, next: None, len: 0, @@ -103,13 +102,16 @@ impl + Clone> Lexer { self.next.as_ref() } - pub fn chars(&self) -> I { - self.chars_non_peeked.clone() + pub fn ahead(&self) -> &'s str { + &self.src[self.pos()..] + } + + fn pos(&self) -> usize { + self.src.len() - self.chars.as_str().len() - self.next.as_ref().map_or(0, |t| t.len) } fn next_token(&mut self) -> Option { let mut current = self.token(); - self.chars_non_peeked = self.chars.clone(); // concatenate text tokens if let Some(Token { kind: Text, len }) = &mut current { @@ -148,7 +150,6 @@ impl + Clone> Lexer { } fn token(&mut self) -> Option { - self.chars_non_peeked = self.chars.clone(); self.len = 0; let first = self.eat_char()?; @@ -167,6 +168,8 @@ impl + Clone> Lexer { _ if escape && first == ' ' => Nbsp, _ if escape => Text, + '\n' => Newline, + '\\' => { if self .peek_char() @@ -179,14 +182,10 @@ impl + Clone> Lexer { } } - '\n' => Newline, - _ if first.is_whitespace() => { - self.eat_while(char::is_whitespace); - Whitespace - } - '[' => Open(Bracket), ']' => Close(Bracket), + '(' => Open(Paren), + ')' => Close(Paren), '{' => { let explicit = match self.peek_char() { Some('*') => Some(Open(BraceAsterisk)), @@ -207,6 +206,7 @@ impl + Clone> Lexer { Open(Brace) } } + '}' => Close(Brace), '*' => self.maybe_eat_close_brace(Sym(Asterisk), BraceAsterisk), '^' => self.maybe_eat_close_brace(Sym(Caret), BraceCaret), '=' => self.maybe_eat_close_brace(Text, BraceEqual), @@ -236,8 +236,21 @@ impl + Clone> Lexer { ':' => Sym(Colon), '`' => self.eat_seq(Backtick), - '$' => self.eat_seq(Dollar), '.' => self.eat_seq(Period), + '$' => { + self.eat_while(|c| c == '$'); + let mut n_ticks: u8 = 0; + self.eat_while(|c| { + if c == '`' { + if let Some(l) = n_ticks.checked_add(1) { + n_ticks = l; + return true; + } + } + false + }); + DollarBacktick(n_ticks) + } _ => Text, }; @@ -267,17 +280,11 @@ impl + Clone> Lexer { } } -impl + Clone> Iterator for Lexer { +impl<'s> Iterator for Lexer<'s> { type Item = Token; fn next(&mut self) -> Option { - self.next - .take() - .map(|x| { - self.chars_non_peeked = self.chars.clone(); - x - }) - .or_else(|| self.next_token()) + self.next.take().or_else(|| self.next_token()) } } @@ -291,7 +298,7 @@ mod test { macro_rules! test_lex { ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { #[allow(unused)] - let actual = super::Lexer::new($src.chars()).collect::>(); + let actual = super::Lexer::new($src).collect::>(); let expected = vec![$($($token),*,)?]; assert_eq!(actual, expected, "{}", $src); }; @@ -313,18 +320,11 @@ mod test { test_lex!("abc", Text.l(3)); test_lex!( "para w/ some _emphasis_ and *strong*.", - Text.l(4), - Whitespace.l(1), - Text.l(2), - Whitespace.l(1), - Text.l(4), - Whitespace.l(1), + Text.l(13), Sym(Underscore).l(1), Text.l(8), Sym(Underscore).l(1), - Whitespace.l(1), - Text.l(3), - Whitespace.l(1), + Text.l(5), Sym(Asterisk).l(1), Text.l(6), Sym(Asterisk).l(1), @@ -383,11 +383,17 @@ mod test { test_lex!("`", Seq(Backtick).l(1)); test_lex!("```", Seq(Backtick).l(3)); test_lex!( - "`$-.", + "`-.", Seq(Backtick).l(1), - Seq(Dollar).l(1), Seq(Hyphen).l(1), Seq(Period).l(1), ); } + + #[test] + fn dollar_backtick() { + test_lex!("$`", DollarBacktick(1).l(2)); + test_lex!("$$$`", DollarBacktick(1).l(4)); + test_lex!("$$````", DollarBacktick(4).l(6)); + } } diff --git a/src/lib.rs b/src/lib.rs index a8022be6..3cc62604 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,7 @@ //! let djot_input = "hello *world*!"; //! let events = jotdown::Parser::new(djot_input); //! let mut html = String::new(); -//! jotdown::html::Renderer.push(events, &mut html); +//! jotdown::html::Renderer::default().push(events, &mut html); //! assert_eq!(html, "

hello world!

\n"); //! # } //! ``` @@ -36,16 +36,18 @@ //! let events = //! jotdown::Parser::new("a [link](https://example.com)").map(|e| match e { //! Event::Start(Link(dst, ty), attrs) => { -//! Event::Start(Link(dst.replace(".com", ".net").into(), ty), attrs) +//! Event::Start(Link(dst.replace(".com", ".net"), ty), attrs) //! } //! e => e, //! }); //! let mut html = String::new(); -//! jotdown::html::Renderer.push(events, &mut html); +//! jotdown::html::Renderer::default().push(events, &mut html); //! assert_eq!(html, "

a link

\n"); //! # } //! ``` +#![allow(clippy::blocks_in_if_conditions)] + use std::fmt; use std::fmt::Write as FmtWrite; use std::io; @@ -58,61 +60,170 @@ mod block; mod inline; mod lex; mod span; +mod string; mod tree; -use span::DiscontinuousString; use span::Span; pub use attr::{AttributeValue, AttributeValueParts, Attributes}; +pub use string::CowStr; -type CowStr<'s> = std::borrow::Cow<'s, str>; - +/// A trait for rendering [`Event`]s to an output format. +/// +/// The output can be written to either a [`std::fmt::Write`] or a [`std::io::Write`] object. +/// +/// If ownership of the [`Event`]s cannot be given to the renderer, use [`Render::push_borrowed`] +/// or [`Render::write_borrowed`]. +/// +/// An implementor needs to at least implement the [`Render::render_event`] function that renders a +/// single event to the output. If anything needs to be rendered at the beginning or end of the +/// output, the [`Render::render_prologue`] and [`Render::render_epilogue`] can be implemented as +/// well. +/// +/// # Examples +/// +/// Push to a [`String`] (implements [`std::fmt::Write`]): +/// +/// ``` +/// # use jotdown::Render; +/// # let events = std::iter::empty(); +/// let mut output = String::new(); +/// let mut renderer = jotdown::html::Renderer::default(); +/// renderer.push(events, &mut output); +/// ``` +/// +/// Write to standard output with buffering ([`std::io::Stdout`] implements [`std::io::Write`]): +/// +/// ``` +/// # use jotdown::Render; +/// # let events = std::iter::empty(); +/// let mut out = std::io::BufWriter::new(std::io::stdout()); +/// let mut renderer = jotdown::html::Renderer::default(); +/// renderer.write(events, &mut out).unwrap(); +/// ``` pub trait Render { - /// Push [`Event`]s to a unicode-accepting buffer or stream. - fn push<'s, I: Iterator>, W: fmt::Write>( - &self, - events: I, - out: W, - ) -> fmt::Result; - - /// Write [`Event`]s to a byte sink, encoded as UTF-8. + /// Render a single event. + fn render_event<'s, W>(&mut self, e: &Event<'s>, out: W) -> std::fmt::Result + where + W: std::fmt::Write; + + /// Render something before any events have been provided. + /// + /// This does nothing by default, but an implementation may choose to prepend data at the + /// beginning of the output if needed. + fn render_prologue(&mut self, _out: W) -> std::fmt::Result + where + W: std::fmt::Write, + { + Ok(()) + } + + /// Render something after all events have been provided. + /// + /// This does nothing by default, but an implementation may choose to append extra data at the + /// end of the output if needed. + fn render_epilogue(&mut self, _out: W) -> std::fmt::Result + where + W: std::fmt::Write, + { + Ok(()) + } + + /// Push owned [`Event`]s to a unicode-accepting buffer or stream. + fn push<'s, I, W>(&mut self, mut events: I, mut out: W) -> fmt::Result + where + I: Iterator>, + W: fmt::Write, + { + self.render_prologue(&mut out)?; + events.try_for_each(|e| self.render_event(&e, &mut out))?; + self.render_epilogue(&mut out) + } + + /// Write owned [`Event`]s to a byte sink, encoded as UTF-8. /// /// NOTE: This performs many small writes, so IO writes should be buffered with e.g. /// [`std::io::BufWriter`]. - fn write<'s, I: Iterator>, W: io::Write>( - &self, - events: I, - out: W, - ) -> io::Result<()> { - struct Adapter { - inner: T, - error: io::Result<()>, - } + fn write<'s, I, W>(&mut self, events: I, out: W) -> io::Result<()> + where + I: Iterator>, + W: io::Write, + { + let mut out = WriteAdapter { + inner: out, + error: Ok(()), + }; - impl fmt::Write for Adapter { - fn write_str(&mut self, s: &str) -> fmt::Result { - match self.inner.write_all(s.as_bytes()) { - Ok(()) => Ok(()), - Err(e) => { - self.error = Err(e); - Err(fmt::Error) - } - } - } - } + self.push(events, &mut out).map_err(|_| match out.error { + Err(e) => e, + _ => io::Error::new(io::ErrorKind::Other, "formatter error"), + }) + } + + /// Push borrowed [`Event`]s to a unicode-accepting buffer or stream. + /// + /// # Examples + /// + /// Render a borrowed slice of [`Event`]s. + /// ``` + /// # use jotdown::Render; + /// # let events: &[jotdown::Event] = &[]; + /// let mut output = String::new(); + /// let mut renderer = jotdown::html::Renderer::default(); + /// renderer.push_borrowed(events.iter(), &mut output); + /// ``` + fn push_borrowed<'s, E, I, W>(&mut self, mut events: I, mut out: W) -> fmt::Result + where + E: AsRef>, + I: Iterator, + W: fmt::Write, + { + self.render_prologue(&mut out)?; + events.try_for_each(|e| self.render_event(e.as_ref(), &mut out))?; + self.render_epilogue(&mut out) + } - let mut out = Adapter { + /// Write borrowed [`Event`]s to a byte sink, encoded as UTF-8. + /// + /// NOTE: This performs many small writes, so IO writes should be buffered with e.g. + /// [`std::io::BufWriter`]. + fn write_borrowed<'s, E, I, W>(&mut self, events: I, out: W) -> io::Result<()> + where + E: AsRef>, + I: Iterator, + W: io::Write, + { + let mut out = WriteAdapter { inner: out, error: Ok(()), }; - match self.push(events, &mut out) { - Ok(()) => Ok(()), - Err(_) => match out.error { - Err(_) => out.error, - _ => Err(io::Error::new(io::ErrorKind::Other, "formatter error")), - }, - } + self.push_borrowed(events, &mut out) + .map_err(|_| match out.error { + Err(e) => e, + _ => io::Error::new(io::ErrorKind::Other, "formatter error"), + }) + } +} + +struct WriteAdapter { + inner: T, + error: io::Result<()>, +} + +impl fmt::Write for WriteAdapter { + fn write_str(&mut self, s: &str) -> fmt::Result { + self.inner.write_all(s.as_bytes()).map_err(|e| { + self.error = Err(e); + fmt::Error + }) + } +} + +// XXX why is this not a blanket implementation? +impl<'s> AsRef> for &Event<'s> { + fn as_ref(&self) -> &Event<'s> { + self } } @@ -213,9 +324,10 @@ pub enum Container<'s> { CodeBlock { lang: Option<&'s str> }, /// An inline divider element. Span, - /// An inline link with a destination URL. + /// An inline link, the first field is either a destination URL or an unresolved tag. Link(CowStr<'s>, LinkType), - /// An inline image with a source URL. Inner Str objects compose the alternative text. + /// An inline image, the first field is either a destination URL or an unresolved tag. Inner + /// Str objects compose the alternative text. Image(CowStr<'s>, SpanLinkType), /// An inline verbatim string. Verbatim, @@ -333,6 +445,8 @@ pub enum SpanLinkType { Inline, /// In the form `[text][tag]` or `[tag][]`. Reference, + /// Like reference, but the tag is unresolved. + Unresolved, } /// The type of an inline link. @@ -463,6 +577,9 @@ type Set = std::collections::BTreeSet; /// structure that will be kept for the duration of the parser's lifetime. Then, when the iterator /// is advanced, the parser will start from the beginning of the document and parse inline elements /// and emit [`Event`]s. +/// +/// It is possible to clone the parser to avoid performing the block parsing multiple times. +#[derive(Clone)] pub struct Parser<'s> { src: &'s str, @@ -478,6 +595,9 @@ pub struct Parser<'s> { /// Current table row is a head row. table_head_row: bool, + /// Currently within a verbatim code block. + verbatim: bool, + /// Footnote references in the order they were encountered, without duplicates. footnote_references: Vec<&'s str>, /// Cache of footnotes to emit at the end. @@ -487,12 +607,11 @@ pub struct Parser<'s> { /// Currently within a footnote. footnote_active: bool, - /// Spans to the inlines in the leaf block currently being parsed. - inlines: span::InlineSpans<'s>, - /// Inline parser, recreated for each new inline. - inline_parser: Option>>, + /// Inline parser. + inline_parser: inline::Parser<'s>, } +#[derive(Clone)] struct Heading { /// Location of heading in src. location: usize, @@ -503,6 +622,7 @@ struct Heading { } /// Because of potential future references, an initial pass is required to obtain all definitions. +#[derive(Clone)] struct PrePass<'s> { /// Link definitions and their attributes. link_definitions: Map<&'s str, (CowStr<'s>, attr::Attributes<'s>)>, @@ -514,13 +634,11 @@ struct PrePass<'s> { impl<'s> PrePass<'s> { #[must_use] - fn new(src: &'s str, mut tree: block::Tree) -> Self { + fn new(src: &'s str, mut tree: block::Tree, inline_parser: &mut inline::Parser<'s>) -> Self { let mut link_definitions = Map::new(); let mut headings: Vec = Vec::new(); let mut used_ids: Set<&str> = Set::new(); - let mut inlines = span::InlineSpans::new(src); - let mut attr_prev: Option = None; while let Some(e) = tree.next() { match e.kind { @@ -533,7 +651,13 @@ impl<'s> PrePass<'s> { let url = match tree.count_children() { 0 => "".into(), 1 => tree.take_inlines().next().unwrap().of(src).trim().into(), - _ => tree.take_inlines().map(|sp| sp.of(src).trim()).collect(), + _ => { + let mut url = CowStr::Borrowed(""); + tree.take_inlines() + .map(|sp| sp.of(src).trim()) + .for_each(|s| url.push_str(s)); + url + } }; link_definitions.insert(tag, (url, attrs)); } @@ -549,31 +673,35 @@ impl<'s> PrePass<'s> { .and_then(|attrs| attrs.get("id")) .map(ToString::to_string); - inlines.set_spans(tree.take_inlines()); let mut id_auto = String::new(); let mut last_whitespace = true; - inline::Parser::new(inlines.chars()).for_each(|ev| match ev.kind { - inline::EventKind::Str => { - let mut chars = inlines.slice(ev.span).chars().peekable(); - while let Some(c) = chars.next() { - if c.is_whitespace() { - while chars.peek().map_or(false, |c| c.is_whitespace()) { - chars.next(); - } - if !last_whitespace { - last_whitespace = true; - id_auto.push('-'); + let inlines = tree.take_inlines().collect::>(); + inline_parser.reset(); + inlines.iter().enumerate().for_each(|(i, sp)| { + inline_parser.feed_line(*sp, i == inlines.len() - 1); + inline_parser.for_each(|ev| match ev.kind { + inline::EventKind::Str => { + let mut chars = ev.span.of(src).chars().peekable(); + while let Some(c) = chars.next() { + if c.is_whitespace() { + while chars.peek().map_or(false, |c| c.is_whitespace()) { + chars.next(); + } + if !last_whitespace { + last_whitespace = true; + id_auto.push('-'); + } + } else if !c.is_ascii_punctuation() || matches!(c, '-' | '_') { + id_auto.push(c); + last_whitespace = false; } - } else if !c.is_ascii_punctuation() || matches!(c, '-' | '_') { - id_auto.push(c); - last_whitespace = false; } } - } - inline::EventKind::Atom(inline::Atom::Softbreak) => { - id_auto.push('-'); - } - _ => {} + inline::EventKind::Atom(inline::Atom::Softbreak) => { + id_auto.push('-'); + } + _ => {} + }) }); id_auto.drain(id_auto.trim_end_matches('-').len()..); @@ -652,7 +780,8 @@ impl<'s> Parser<'s> { #[must_use] pub fn new(src: &'s str) -> Self { let tree = block::parse(src); - let pre_pass = PrePass::new(src, tree.clone()); + let mut inline_parser = inline::Parser::new(src); + let pre_pass = PrePass::new(src, tree.clone(), &mut inline_parser); Self { src, @@ -660,34 +789,32 @@ impl<'s> Parser<'s> { pre_pass, block_attributes: Attributes::new(), table_head_row: false, + verbatim: false, footnote_references: Vec::new(), footnotes: Map::new(), footnote_index: 0, footnote_active: false, - inlines: span::InlineSpans::new(src), - inline_parser: None, + inline_parser, } } fn inline(&mut self) -> Option> { - self.inline_parser.as_mut().and_then(|parser| { - let mut inline = parser.next(); - - let mut first_is_attr = false; - let mut attributes = inline.as_ref().map_or_else(Attributes::new, |inl| { - if let inline::EventKind::Attributes = inl.kind { - first_is_attr = true; - attr::parse(self.inlines.slice(inl.span)) - } else { - Attributes::new() - } - }); - - if first_is_attr { - inline = parser.next(); - } + let next = self.inline_parser.next()?; + + let (inline, mut attributes) = match next { + inline::Event { + kind: inline::EventKind::Attributes { attrs, .. }, + .. + } => ( + self.inline_parser.next(), + self.inline_parser.attributes_store[attrs as usize].clone(), + ), + inline => (Some(inline), Attributes::new()), + }; - inline.map(|inline| match inline.kind { + inline.map(|inline| { + let enter = matches!(inline.kind, inline::EventKind::Enter(_)); + match inline.kind { inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => { let t = match c { inline::Container::Span => Container::Span, @@ -695,10 +822,7 @@ impl<'s> Parser<'s> { inline::Container::InlineMath => Container::Math { display: false }, inline::Container::DisplayMath => Container::Math { display: true }, inline::Container::RawFormat => Container::RawInline { - format: match self.inlines.src(inline.span) { - CowStr::Owned(_) => panic!(), - CowStr::Borrowed(s) => s, - }, + format: inline.span.of(self.src), }, inline::Container::Subscript => Container::Subscript, inline::Container::Superscript => Container::Superscript, @@ -707,54 +831,50 @@ impl<'s> Parser<'s> { inline::Container::Emphasis => Container::Emphasis, inline::Container::Strong => Container::Strong, inline::Container::Mark => Container::Mark, - inline::Container::InlineLink => Container::Link( - match self.inlines.src(inline.span) { - CowStr::Owned(s) => s.replace('\n', "").into(), - s @ CowStr::Borrowed(_) => s, - }, + inline::Container::InlineLink(url) => Container::Link( + self.inline_parser.cow_strs[url as usize].clone(), LinkType::Span(SpanLinkType::Inline), ), - inline::Container::InlineImage => Container::Image( - match self.inlines.src(inline.span) { - CowStr::Owned(s) => s.replace('\n', "").into(), - s @ CowStr::Borrowed(_) => s, - }, + inline::Container::InlineImage(url) => Container::Image( + self.inline_parser.cow_strs[url as usize].clone(), SpanLinkType::Inline, ), - inline::Container::ReferenceLink | inline::Container::ReferenceImage => { - let tag = match self.inlines.src(inline.span) { - CowStr::Owned(s) => s.replace('\n', " ").into(), - s @ CowStr::Borrowed(_) => s, - }; - let link_def = - self.pre_pass.link_definitions.get(tag.as_ref()).cloned(); - - let url = if let Some((url, attrs_def)) = link_def { + inline::Container::ReferenceLink(tag) + | inline::Container::ReferenceImage(tag) => { + let tag = &self.inline_parser.cow_strs[tag as usize]; + let link_def = self + .pre_pass + .link_definitions + .get::(tag.as_ref()) + .cloned(); + + let (url_or_tag, ty) = if let Some((url, attrs_def)) = link_def { attributes.union(attrs_def); - url + (url, SpanLinkType::Reference) } else { - self.pre_pass - .heading_id_by_tag(tag.as_ref()) - .map_or_else(|| "".into(), |id| format!("#{}", id).into()) + self.pre_pass.heading_id_by_tag(tag.as_ref()).map_or_else( + || (tag.clone(), SpanLinkType::Unresolved), + |id| (format!("#{}", id).into(), SpanLinkType::Reference), + ) }; - if matches!(c, inline::Container::ReferenceLink) { - Container::Link(url, LinkType::Span(SpanLinkType::Reference)) + if matches!(c, inline::Container::ReferenceLink(..)) { + Container::Link(url_or_tag, LinkType::Span(ty)) } else { - Container::Image(url, SpanLinkType::Reference) + Container::Image(url_or_tag, ty) } } inline::Container::Autolink => { - let url = self.inlines.src(inline.span); - let (url, ty) = if url.contains('@') { - (format!("mailto:{}", url).into(), LinkType::Email) + let url: CowStr = inline.span.of(self.src).into(); + let ty = if url.contains('@') { + LinkType::Email } else { - (url, LinkType::AutoLink) + LinkType::AutoLink }; Container::Link(url, ty) } }; - if matches!(inline.kind, inline::EventKind::Enter(_)) { + if enter { Event::Start(t, attributes) } else { Event::End(t) @@ -762,10 +882,7 @@ impl<'s> Parser<'s> { } inline::EventKind::Atom(a) => match a { inline::Atom::FootnoteReference => { - let tag = match self.inlines.src(inline.span) { - CowStr::Borrowed(s) => s, - CowStr::Owned(..) => panic!(), - }; + let tag = inline.span.of(self.src); let number = self .footnote_references .iter() @@ -777,15 +894,9 @@ impl<'s> Parser<'s> { }, |i| i + 1, ); - Event::FootnoteReference( - match self.inlines.src(inline.span) { - CowStr::Borrowed(s) => s, - CowStr::Owned(..) => panic!(), - }, - number, - ) + Event::FootnoteReference(inline.span.of(self.src), number) } - inline::Atom::Symbol => Event::Symbol(self.inlines.src(inline.span)), + inline::Atom::Symbol => Event::Symbol(inline.span.of(self.src).into()), inline::Atom::Quote { ty, left } => match (ty, left) { (inline::QuoteType::Single, true) => Event::LeftSingleQuote, (inline::QuoteType::Single, false) => Event::RightSingleQuote, @@ -800,13 +911,11 @@ impl<'s> Parser<'s> { inline::Atom::Hardbreak => Event::Hardbreak, inline::Atom::Escape => Event::Escape, }, - inline::EventKind::Str => Event::Str(self.inlines.src(inline.span)), - inline::EventKind::Whitespace - | inline::EventKind::Attributes - | inline::EventKind::Placeholder => { + inline::EventKind::Str => Event::Str(inline.span.of(self.src).into()), + inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => { panic!("{:?}", inline) } - }) + } }) } @@ -828,6 +937,7 @@ impl<'s> Parser<'s> { let enter = matches!(ev.kind, tree::EventKind::Enter(..)); let cont = match c { block::Node::Leaf(l) => { + self.inline_parser.reset(); if matches!(l, block::Leaf::LinkDefinition) { // ignore link definitions if enter { @@ -836,11 +946,6 @@ impl<'s> Parser<'s> { self.block_attributes = Attributes::new(); continue; } - if enter && !matches!(l, block::Leaf::CodeBlock) { - self.inlines.set_spans(self.tree.take_inlines()); - self.inline_parser = - Some(inline::Parser::new(self.inlines.chars())); - } match l { block::Leaf::Paragraph => Container::Paragraph, block::Leaf::Heading { has_section } => Container::Heading { @@ -855,6 +960,7 @@ impl<'s> Parser<'s> { }, block::Leaf::DescriptionTerm => Container::DescriptionTerm, block::Leaf::CodeBlock => { + self.verbatim = enter; if let Some(format) = content.strip_prefix('=') { Container::RawBlock { format } } else { @@ -877,7 +983,7 @@ impl<'s> Parser<'s> { class: (!ev.span.is_empty()).then(|| content), }, block::Container::Footnote => { - assert!(enter); + debug_assert!(enter); self.footnotes.insert(content, self.tree.take_branch()); self.block_attributes = Attributes::new(); continue; @@ -934,7 +1040,15 @@ impl<'s> Parser<'s> { Event::End(cont) } } - tree::EventKind::Inline => Event::Str(content.into()), // verbatim + tree::EventKind::Inline => { + if self.verbatim { + Event::Str(content.into()) + } else { + self.inline_parser + .feed_line(ev.span, self.tree.branch_is_empty()); + return self.next(); + } + } }; return Some(event); } @@ -1190,7 +1304,8 @@ mod test { Start(Blockquote, Attributes::new()), Start(Paragraph, Attributes::new()), Start(Verbatim, Attributes::new()), - Str("abc\ndef".into()), + Str("abc\n".into()), + Str("def".into()), End(Verbatim), End(Paragraph), End(Blockquote), @@ -1244,19 +1359,24 @@ mod test { End(Link("url".into(), LinkType::Span(SpanLinkType::Inline))), End(Paragraph), ); + } + + #[test] + fn link_inline_multi_line() { test_parse!( concat!( - "> [text](url\n", - "> url)\n", // + "> [text](a\n", // + "> bc\n", // + "> def)\n", // ), Start(Blockquote, Attributes::new()), Start(Paragraph, Attributes::new()), Start( - Link("urlurl".into(), LinkType::Span(SpanLinkType::Inline)), + Link("abcdef".into(), LinkType::Span(SpanLinkType::Inline)), Attributes::new() ), Str("text".into()), - End(Link("urlurl".into(), LinkType::Span(SpanLinkType::Inline))), + End(Link("abcdef".into(), LinkType::Span(SpanLinkType::Inline))), End(Paragraph), End(Blockquote), ); @@ -1298,8 +1418,74 @@ mod test { ); } + #[test] + fn link_reference_unresolved() { + test_parse!( + "[text][tag]", + Start(Paragraph, Attributes::new()), + Start( + Link("tag".into(), LinkType::Span(SpanLinkType::Unresolved)), + Attributes::new() + ), + Str("text".into()), + End(Link("tag".into(), LinkType::Span(SpanLinkType::Unresolved))), + End(Paragraph), + ); + test_parse!( + "![text][tag]", + Start(Paragraph, Attributes::new()), + Start( + Image("tag".into(), SpanLinkType::Unresolved), + Attributes::new() + ), + Str("text".into()), + End(Image("tag".into(), SpanLinkType::Unresolved)), + End(Paragraph), + ); + } + #[test] fn link_reference_multiline() { + test_parse!( + concat!( + "[text][a\n", // + "b]\n", // + "\n", // + "[a b]: url\n", // + ), + Start(Paragraph, Attributes::new()), + Start( + Link("url".into(), LinkType::Span(SpanLinkType::Reference)), + Attributes::new() + ), + Str("text".into()), + End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))), + End(Paragraph), + Blankline, + ); + test_parse!( + concat!( + "> [text][a\n", // + "> b]\n", // + "\n", // + "[a b]: url\n", // + ), + Start(Blockquote, Attributes::new()), + Start(Paragraph, Attributes::new()), + Start( + Link("url".into(), LinkType::Span(SpanLinkType::Reference)), + Attributes::new() + ), + Str("text".into()), + End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))), + End(Paragraph), + End(Blockquote), + Blankline, + ); + } + + #[test] + fn link_definition_multiline() { test_parse!( concat!( "[text][tag]\n", @@ -1522,6 +1708,97 @@ mod test { ); } + #[test] + fn attr_multiline() { + test_parse!( + concat!( + "> _abc_{a=b\n", // + "> c=d}\n", // + ), + Start(Blockquote, Attributes::new()), + Start(Paragraph, Attributes::new()), + Start(Emphasis, [("a", "b"), ("c", "d")].into_iter().collect()), + Str("abc".into()), + End(Emphasis), + End(Paragraph), + End(Blockquote), + ); + test_parse!( + concat!( + "> a{\n", // + "> %%\n", // + "> a=a}\n", // + ), + Start(Blockquote, Attributes::new()), + Start(Paragraph, Attributes::new()), + Start(Span, [("a", "a")].into_iter().collect()), + Str("a".into()), + End(Span), + End(Paragraph), + End(Blockquote), + ); + test_parse!( + concat!( + "> a{a=\"a\n", // + "> b\n", // + "> c\"}\n", // + ), + Start(Blockquote, Attributes::new()), + Start(Paragraph, Attributes::new()), + Start(Span, [("a", "a b c")].into_iter().collect()), + Str("a".into()), + End(Span), + End(Paragraph), + End(Blockquote), + ); + test_parse!( + concat!( + "> a{a=\"\n", // + "> b\"}\n", // + ), + Start(Blockquote, Attributes::new()), + Start(Paragraph, Attributes::new()), + Start(Span, [("a", "b")].into_iter().collect()), + Str("a".into()), + End(Span), + End(Paragraph), + End(Blockquote), + ); + } + + #[test] + fn attr_multiline_unclosed() { + test_parse!( + concat!( + "a{\n", // + " b\n", // + ), + Start(Paragraph, Attributes::new()), + Str("a{".into()), + Softbreak, + Str("b".into()), + End(Paragraph), + ); + } + + #[test] + fn attr_multiline_invalid() { + test_parse!( + concat!( + "a{a=b\n", // + " b\n", // + "}", // + ), + Start(Paragraph, Attributes::new()), + Str("a{a=b".into()), + Softbreak, + Str("b".into()), + Softbreak, + Str("}".into()), + End(Paragraph), + ); + } + #[test] fn list_item_unordered() { test_parse!( diff --git a/src/main.rs b/src/main.rs index b9ea08c3..e73c081c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -68,11 +68,11 @@ fn run() -> Result<(), std::io::Error> { }; let parser = jotdown::Parser::new(&content); - let html = jotdown::html::Renderer; + let mut renderer = jotdown::html::Renderer::default(); match app.output { - Some(path) => html.write(parser, File::create(path)?)?, - None => html.write(parser, BufWriter::new(std::io::stdout()))?, + Some(path) => renderer.write(parser, File::create(path)?)?, + None => renderer.write(parser, BufWriter::new(std::io::stdout()))?, } Ok(()) diff --git a/src/span.rs b/src/span.rs index fafb43cb..6c595a7b 100644 --- a/src/span.rs +++ b/src/span.rs @@ -1,5 +1,3 @@ -use crate::CowStr; - #[derive(Clone, Copy, Default, Debug, PartialEq, Eq)] pub struct Span { start: u32, @@ -30,14 +28,6 @@ impl Span { Self::empty_at(self.end()) } - pub fn with_start(self, start: usize) -> Self { - Self::new(start, self.end()) - } - - pub fn with_end(self, end: usize) -> Self { - Self::new(self.start(), end) - } - pub fn with_len(self, len: usize) -> Self { Self::by_len(self.start(), len) } @@ -115,205 +105,6 @@ impl Span { } } -pub trait DiscontinuousString<'s> { - type Chars: Iterator; - - fn src(&self, span: Span) -> CowStr<'s>; - - fn chars(&self) -> Self::Chars; -} - -impl<'s> DiscontinuousString<'s> for &'s str { - type Chars = std::str::Chars<'s>; - - fn src(&self, span: Span) -> CowStr<'s> { - span.of(self).into() - } - - fn chars(&self) -> Self::Chars { - str::chars(self) - } -} - -/// Multiple discontinuous [`std::str::Chars`] objects concatenated. -#[derive(Clone)] -pub struct InlineChars<'s, I> { - src: &'s str, - inlines: I, - next: std::str::Chars<'s>, -} - -// Implement inlines.flat_map(|sp| sp.of(self.src).chars()) -impl<'s, I: Iterator> InlineChars<'s, I> { - fn new(src: &'s str, inlines: I) -> Self { - Self { - src, - inlines, - next: "".chars(), - } - } -} - -impl<'s, I: Iterator> Iterator for InlineChars<'s, I> { - type Item = char; - - fn next(&mut self) -> Option { - if self.next.as_str().is_empty() { - self.next = self - .inlines - .next() - .map_or_else(|| "".chars(), |sp| sp.of(self.src).chars()); - } - self.next.next() - } -} - -pub type InlineCharsIter<'s> = InlineChars<'s, std::iter::Copied>>; - -/// Discontinuous slices of a [`&str`]. -#[derive(Default, Debug)] -pub struct InlineSpans<'s> { - src: &'s str, - spans: Vec, -} - -impl<'s> InlineSpans<'s> { - pub fn new(src: &'s str) -> Self { - Self { - src, - spans: Vec::new(), - } - } - - pub fn set_spans(&mut self, spans: impl Iterator) { - self.spans.clear(); - self.spans.extend(spans); - } - - pub fn slice<'i>(&'i self, span: Span) -> InlineSpansSlice<'s, 'i> { - let mut first = 0; - let mut last = 0; - let mut first_skip = 0; - let mut last_len = 0; - - let mut a = 0; - for (i, sp) in self.spans.iter().enumerate() { - let b = a + sp.len(); - if span.start() < b { - if a <= span.start() { - first = i; - first_skip = span.start() - a; - if span.end() <= b { - // continuous - last = i; - last_len = span.len(); - break; - } - } else { - last = i; - last_len = sp.len().min(span.end() - a); - break; - }; - } - a = b; - } - - assert_ne!(last_len, 0); - - InlineSpansSlice { - src: self.src, - first_skip, - last_len, - spans: &self.spans[first..=last], - } - } - - /// Borrow if continuous, copy if discontiunous. - fn borrow_or_copy>(src: &str, spans: I, span: Span) -> CowStr { - let mut a = 0; - let mut s = String::new(); - for sp in spans { - let b = a + sp.len(); - if span.start() < b { - let r = if a <= span.start() { - if span.end() <= b { - // continuous - return CowStr::Borrowed(&sp.of(src)[span.start() - a..span.end() - a]); - } - (span.start() - a)..sp.len() - } else if a <= span.end() { - 0..sp.len().min(span.end() - a) - } else { - break; - }; - s.push_str(&sp.of(src)[r]); - } - a = b; - } - assert_eq!(span.len(), s.len()); - CowStr::Owned(s) - } -} - -impl<'s> DiscontinuousString<'s> for InlineSpans<'s> { - type Chars = InlineCharsIter<'s>; - - fn src(&self, span: Span) -> CowStr<'s> { - Self::borrow_or_copy(self.src, self.spans.iter().copied(), span) - } - - fn chars(&self) -> Self::Chars { - // SAFETY: do not call set_spans while chars is in use - unsafe { std::mem::transmute(InlineChars::new(self.src, self.spans.iter().copied())) } - } -} - -/// A read-only slice of an [`InlineSpans`] object. -pub struct InlineSpansSlice<'s, 'i> { - src: &'s str, - first_skip: usize, - last_len: usize, - spans: &'i [Span], -} - -impl<'s, 'i> InlineSpansSlice<'s, 'i> { - fn spans(&self) -> InlineSpansSliceIter<'i> { - let (span_start, r_middle, span_end) = if self.spans.len() == 1 { - ( - Span::by_len(self.spans[0].start() + self.first_skip, self.last_len), - 0..0, - Span::by_len(self.spans[self.spans.len() - 1].start(), 0), - ) - } else { - ( - Span::new(self.spans[0].start() + self.first_skip, self.spans[0].end()), - 1..1 + self.spans.len().saturating_sub(2), - Span::by_len(self.spans[self.spans.len() - 1].start(), self.last_len), - ) - }; - std::iter::once(span_start) - .chain(self.spans[r_middle].iter().copied()) - .chain(std::iter::once(span_end)) - } -} - -impl<'s, 'i> DiscontinuousString<'s> for InlineSpansSlice<'s, 'i> { - type Chars = InlineChars<'s, InlineSpansSliceIter<'i>>; - - fn src(&self, span: Span) -> CowStr<'s> { - InlineSpans::borrow_or_copy(self.src, self.spans(), span) - } - - fn chars(&self) -> Self::Chars { - InlineChars::new(self.src, self.spans()) - } -} - -pub type InlineSpansSliceIter<'i> = std::iter::Chain< - std::iter::Chain, std::iter::Copied>>, - std::iter::Once, ->; - #[cfg(test)] mod test { use super::Span; diff --git a/src/string.rs b/src/string.rs new file mode 100644 index 00000000..b21d4a6c --- /dev/null +++ b/src/string.rs @@ -0,0 +1,344 @@ +use std::{borrow::Borrow, fmt::Display, ops::Deref, str::from_utf8}; + +// Largest CowStr variant is Owned(String). A String uses 3 words of memory, but a fourth word is +// needed to hold the tag (the tag takes a byte, but a full word is used for alignment reasons.) +// This means that the available space we have for an inline string is 4 words - 2 bytes for the +// tag and length. +const MAX_INLINE_STR_LEN: usize = 4 * std::mem::size_of::() - 2; + +/// A clone-on-write string that is inlined if it is small enough. +/// +/// Minimizes the number of heap allocations when working with small strings. +#[derive(Debug, Eq)] +pub enum CowStr<'s> { + Owned(String), + Borrowed(&'s str), + Inlined([u8; MAX_INLINE_STR_LEN], u8), +} + +impl<'s> CowStr<'s> { + /// Replaces all occurrences of `from` with `to`. + /// + /// Takes ownership of self and returns a new [`CowStr`]. + pub fn replace(self, from: &str, to: &str) -> Self { + if from.is_empty() { + return self; + } + + match self { + CowStr::Inlined(mut inner, len) => { + let mut len = len as usize; + let diff = to.len() as isize - from.len() as isize; + + while let Some(start) = from_utf8(&inner[..len]).unwrap().find(from) { + if diff.is_positive() { + len += diff as usize; + if len > MAX_INLINE_STR_LEN { + return CowStr::Owned(self.deref().replace(from, to)); + } + inner[start + from.len()..].rotate_right(diff as usize); + } else if diff.is_negative() { + len -= (-diff) as usize; + inner[start..].rotate_left((-diff) as usize); + } + + inner[start..start + to.len()].copy_from_slice(to.as_bytes()); + } + + CowStr::Inlined(inner, len as u8) + } + CowStr::Borrowed(s) if s.contains(from) => { + let mut inner = [0; MAX_INLINE_STR_LEN]; + let mut len = s.len(); + let diff = to.len() as isize - from.len() as isize; + inner[..len].copy_from_slice(s.as_bytes()); + + while let Some(start) = from_utf8(&inner[..len]).unwrap().find(from) { + if diff.is_positive() { + len += diff as usize; + if len > MAX_INLINE_STR_LEN { + return CowStr::Owned(self.deref().replace(from, to)); + } + inner[start + from.len()..].rotate_right(diff as usize); + } else if diff.is_negative() { + len -= (-diff) as usize; + inner[start..].rotate_left((-diff) as usize); + } + + inner[start..start + to.len()].copy_from_slice(to.as_bytes()); + } + + CowStr::Inlined(inner, len as u8) + } + CowStr::Owned(s) if s.contains(from) => CowStr::Owned(s.replace(from, to)), + _ => self, + } + } + + /// Pushes a character to the end of the [`CowStr`]. + pub fn push(&mut self, c: char) { + match self { + CowStr::Owned(this) => this.push(c), + CowStr::Inlined(inner, len) => { + let l = *len as usize + c.len_utf8(); + if l > MAX_INLINE_STR_LEN { + let mut s = self.to_string(); + s.push(c); + *self = CowStr::Owned(s); + } else { + c.encode_utf8(&mut inner[*len as usize..l]); + *len = l as u8; + } + } + CowStr::Borrowed(this) => { + let len = this.len() + c.len_utf8(); + if len > MAX_INLINE_STR_LEN { + let mut s = self.to_string(); + s.push(c); + *self = CowStr::Owned(s); + } else { + let mut inner = [0; MAX_INLINE_STR_LEN]; + inner[..this.len()].copy_from_slice(this.as_bytes()); + c.encode_utf8(&mut inner[this.len()..len]); + *self = CowStr::Inlined(inner, len as u8); + } + } + } + } + + /// Pushes a string slice to the end of the [`CowStr`]. + pub fn push_str(&mut self, s: &str) { + if s.is_empty() { + return; + } + + match self { + CowStr::Owned(this) => this.push_str(s), + CowStr::Inlined(inner, len) => { + let l = *len as usize + s.len(); + if l > MAX_INLINE_STR_LEN { + *self = CowStr::Owned(self.to_string() + s); + } else { + inner[*len as usize..l].copy_from_slice(s.as_bytes()); + *len = l as u8; + } + } + CowStr::Borrowed(this) => { + let len = this.len() + s.len(); + if len > MAX_INLINE_STR_LEN { + *self = CowStr::Owned(this.to_string() + s); + } else { + let mut inner = [0; MAX_INLINE_STR_LEN]; + inner[..this.len()].copy_from_slice(this.as_bytes()); + inner[this.len()..len].copy_from_slice(s.as_bytes()); + *self = CowStr::Inlined(inner, len as u8); + } + } + } + } +} + +impl<'s> Deref for CowStr<'s> { + type Target = str; + + fn deref(&self) -> &Self::Target { + match *self { + Self::Owned(ref s) => s.borrow(), + Self::Borrowed(s) => s, + // NOTE: Inlined strings can only be constructed from strings or chars, which means they + // are guaranteed to be valid UTF-8. We could consider unchecked conversion as well, but + // a benchmark should be done before introducing unsafes. + Self::Inlined(ref inner, len) => from_utf8(&inner[..len as usize]).unwrap(), + } + } +} + +impl<'s> AsRef for CowStr<'s> { + fn as_ref(&self) -> &str { + self.deref() + } +} + +impl<'s> From for CowStr<'s> { + fn from(value: char) -> Self { + let mut inner = [0u8; MAX_INLINE_STR_LEN]; + value.encode_utf8(&mut inner); + CowStr::Inlined(inner, value.len_utf8() as u8) + } +} + +impl<'s> From<&'s str> for CowStr<'s> { + fn from(value: &'s str) -> Self { + CowStr::Borrowed(value) + } +} + +impl<'s> From for CowStr<'s> { + fn from(value: String) -> Self { + CowStr::Owned(value) + } +} + +impl<'s> Clone for CowStr<'s> { + fn clone(&self) -> Self { + match self { + CowStr::Owned(s) => { + let len = s.len(); + if len > MAX_INLINE_STR_LEN { + CowStr::Owned(s.clone()) + } else { + let mut inner = [0u8; MAX_INLINE_STR_LEN]; + inner[..len].copy_from_slice(s.as_bytes()); + CowStr::Inlined(inner, len as u8) + } + } + CowStr::Borrowed(s) => CowStr::Borrowed(s), + CowStr::Inlined(inner, len) => CowStr::Inlined(*inner, *len), + } + } +} + +impl<'s> PartialEq for CowStr<'s> { + fn eq(&self, other: &Self) -> bool { + self.deref() == other.deref() + } +} + +impl<'s> Display for CowStr<'s> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.deref()) + } +} + +impl<'s, 'a> FromIterator<&'a str> for CowStr<'s> { + fn from_iter>(iter: T) -> Self { + CowStr::Owned(FromIterator::from_iter(iter)) + } +} + +#[cfg(test)] +mod tests { + macro_rules! assert_matches { + ($expression:expr, $pattern:pat) => { + match $expression { + $pattern => (), + ref e => panic!( + "assertion failed: `{:?}` does not match `{}`", + e, + stringify!($pattern) + ), + } + }; + } + + use super::*; + + #[test] + fn push_to_borrowed() { + let mut s = CowStr::Borrowed("hello"); + s.push_str("a".repeat(MAX_INLINE_STR_LEN - 6).as_str()); + assert_matches!(s, CowStr::Inlined(..)); + s.push('a'); + assert_matches!(s, CowStr::Inlined(..)); + s.push('a'); + assert_matches!(s, CowStr::Owned(..)); + } + + #[test] + fn push_to_inlined() { + let mut s = CowStr::Inlined([0; MAX_INLINE_STR_LEN], 0); + s.push_str("a".repeat(MAX_INLINE_STR_LEN - 1).as_str()); + assert_matches!(s, CowStr::Inlined(..)); + s.push('a'); + assert_matches!(s, CowStr::Inlined(..)); + s.push('a'); + assert_matches!(s, CowStr::Owned(..)); + } + + #[test] + fn push_to_owned() { + let mut s = CowStr::Owned("hello".to_string()); + s.push_str("a".repeat(MAX_INLINE_STR_LEN - 6).as_str()); + assert_matches!(s, CowStr::Owned(..)); + s.push('a'); + assert_matches!(s, CowStr::Owned(..)); + s.push('a'); + assert_matches!(s, CowStr::Owned(..)); + } + + #[test] + fn push_empty() { + let max_min1 = "a".repeat(MAX_INLINE_STR_LEN - 1); + let mut s = CowStr::Borrowed(&max_min1); + s.push_str(""); + assert_matches!(s, CowStr::Borrowed(..)); + + let max = "a".repeat(MAX_INLINE_STR_LEN); + let mut s = CowStr::Borrowed(&max); + s.push_str(""); + assert_matches!(s, CowStr::Borrowed(..)); + + let max_plus1 = "a".repeat(MAX_INLINE_STR_LEN + 1); + let mut s = CowStr::Borrowed(&max_plus1); + s.push_str(""); + assert_matches!(s, CowStr::Borrowed(..)); + } + + #[test] + fn replace_borrowed() { + let string = "a".repeat(MAX_INLINE_STR_LEN - 1) + "b"; + let s = CowStr::Borrowed(&string); + assert_matches!(s.clone().replace("b", ""), CowStr::Inlined(..)); + assert_matches!(s.clone().replace("b", "c"), CowStr::Inlined(..)); + assert_matches!(s.clone().replace("b", "cd"), CowStr::Owned(..)); + } + + #[test] + fn replace_inlined() { + let mut arr = [65; MAX_INLINE_STR_LEN]; + arr[0] = 66; + let s = CowStr::Inlined(arr, MAX_INLINE_STR_LEN as u8); + assert_matches!(s.clone().replace("B", ""), CowStr::Inlined(..)); + assert_matches!(s.clone().replace("B", "C"), CowStr::Inlined(..)); + assert_matches!(s.clone().replace("B", "CD"), CowStr::Owned(..)); + } + + #[test] + fn replace_owned() { + let string = "a".repeat(MAX_INLINE_STR_LEN - 1) + "b"; + // We need to create a new `s` each time, because we want to make sure the CowStr is of the + // Owned variant before replacing. Cloning the CowStr would inline it given the chance. + let s = CowStr::Owned(string.clone()); + assert_matches!(s.replace("b", ""), CowStr::Owned(..)); + let s = CowStr::Owned(string.clone()); + assert_matches!(s.replace("b", "c"), CowStr::Owned(..)); + let s = CowStr::Owned(string); + assert_matches!(s.replace("b", "cd"), CowStr::Owned(..)); + } + + #[test] + fn inline_replace() { + let mut s = CowStr::Borrowed("hello hello"); + s.push_str(" hellohello"); + assert_eq!( + s.clone().replace("djot", "jotdown").as_ref(), + "hello hello hellohello" + ); + assert_eq!(s.clone().replace("hello", "hi").as_ref(), "hi hi hihi"); + + let mut s = CowStr::Borrowed("start middle"); + s.push_str(" end"); + assert_eq!( + s.clone().replace("start", "replaced").as_ref(), + "replaced middle end" + ); + assert_eq!( + s.clone().replace("middle", "replaced").as_ref(), + "start replaced end" + ); + assert_eq!( + s.clone().replace("end", "replaced").as_ref(), + "start middle replaced" + ); + } +} diff --git a/src/tree.rs b/src/tree.rs index f0648dd0..4992a754 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -79,12 +79,16 @@ impl Tree { std::iter::from_fn(move || { head.take().map(|h| { let n = &self.nodes[h.index()]; - assert!(matches!(n.kind, NodeKind::Inline)); + debug_assert!(matches!(n.kind, NodeKind::Inline)); head = n.next; n.span }) }) } + + pub fn branch_is_empty(&self) -> bool { + matches!(self.head, None) + } } impl Iterator for Tree { @@ -126,7 +130,7 @@ pub struct NodeIndex(std::num::NonZeroUsize); impl NodeIndex { fn new(i: usize) -> Self { - assert_ne!(i, usize::MAX); + debug_assert_ne!(i, usize::MAX); Self((i + 1).try_into().unwrap()) } @@ -246,7 +250,7 @@ impl Builder { } } else { let last = self.branch.pop(); - assert_ne!(last, None); + debug_assert_ne!(last, None); } } @@ -314,7 +318,7 @@ impl Builder { } pub(super) fn finish(self) -> Tree { - assert_eq!(self.depth, 0); + debug_assert_eq!(self.depth, 0); let head = self.nodes[NodeIndex::root().index()].next; Tree { nodes: self.nodes.into_boxed_slice().into(), @@ -331,19 +335,19 @@ impl Builder { match &mut head.kind { NodeKind::Root | NodeKind::Inline | NodeKind::Atom(_) => { // set next pointer of previous node - assert_eq!(head.next, None); + debug_assert_eq!(head.next, None); head.next = Some(ni); } NodeKind::Container(_, child) => { self.branch.push(*head_ni); // set child pointer of current container - assert_eq!(*child, None); + debug_assert_eq!(*child, None); *child = Some(ni); } } } else if let Some(block) = self.branch.pop() { let mut block = &mut self.nodes[block.index()]; - assert!(matches!(block.kind, NodeKind::Container(..))); + debug_assert!(matches!(block.kind, NodeKind::Container(..))); block.next = Some(ni); } else { panic!() diff --git a/tests/afl/src/lib.rs b/tests/afl/src/lib.rs index 530a6aec..adbca14f 100644 --- a/tests/afl/src/lib.rs +++ b/tests/afl/src/lib.rs @@ -19,7 +19,9 @@ pub fn html(data: &[u8]) { if !s.contains("=html") { let p = jotdown::Parser::new(s); let mut html = "\n".to_string(); - jotdown::html::Renderer.push(p, &mut html).unwrap(); + jotdown::html::Renderer::default() + .push(p, &mut html) + .unwrap(); validate_html(&html); } } diff --git a/tests/bench/skip b/tests/bench/skip index d9afe9e2..33a6308e 100644 --- a/tests/bench/skip +++ b/tests/bench/skip @@ -1,3 +1,3 @@ block_list_flat:large list marker number -inline_links_flat:escaped attributes, empty hrefs +inline_links_flat:space before img, img attrs order inline_links_nested:empty link text diff --git a/tests/lib.rs b/tests/lib.rs index 984b6102..4fd36afd 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -14,7 +14,9 @@ macro_rules! suite_test { let expected = $expected; let p = jotdown::Parser::new(src); let mut actual = String::new(); - jotdown::html::Renderer.push(p, &mut actual).unwrap(); + jotdown::html::Renderer::default() + .push(p, &mut actual) + .unwrap(); assert_eq!( actual.trim(), expected.trim(), diff --git a/tests/suite/skip b/tests/suite/skip index aea68c34..75b4c75e 100644 --- a/tests/suite/skip +++ b/tests/suite/skip @@ -1,6 +1,5 @@ 38d85f9:multi-line block attributes 6c14561:multi-line block attributes -613a9d6:attribute container precedence f4f22fc:attribute key class order ae6fc15:bugged left/right quote 168469a:bugged left/right quote @@ -9,10 +8,7 @@ ae6fc15:bugged left/right quote e1f5b5e:untrimmed whitespace before linebreak 07888f3:div close within raw block 8423412:heading id conflict with existing id -00a46ed:clear inline formatting from link tags -a8e17c3:empty href c0a3dec:escape in url -e66af00:url container precedence 61876cf:roman alpha ambiguity f31b357:roman alpha ambiguity 642d380:table end in verbatim inline