diff --git a/src/elements/element.rs b/src/elements/element.rs index 1a52d5f..5cbc01a 100644 --- a/src/elements/element.rs +++ b/src/elements/element.rs @@ -357,7 +357,7 @@ impl SvgElement { // TODO: refactor this method to handle text event gen better let phantom = matches!(self.name(), "point" | "box"); - if self.has_attr("text") { + if self.has_attr("text") || self.has_attr("md") { let (orig_elem, text_elements) = process_text_attr(self)?; if orig_elem.name != "text" && !phantom { // We only care about the original element if it wasn't a text element diff --git a/src/elements/text.rs b/src/elements/text.rs index c90b049..9ef60b2 100644 --- a/src/elements/text.rs +++ b/src/elements/text.rs @@ -11,6 +11,291 @@ fn get_text_value(element: &mut SvgElement) -> String { text_string(&text_value) } +fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { + let text_value = element + .pop_attr("md") + .expect("no md attr in process_text_attr"); + + let (parsed_string, sections) = md_parse(&text_value); + + let mut state_per_char = vec![0; parsed_string.len()]; + + for s in sections { + let bit = s.code_bold_italic; + for i in s.start_ind..s.end_ind { + state_per_char[i] |= 1 << bit; + } + } + + let mut strings = vec![]; + let mut states = vec![]; + for i in 0..parsed_string.len() { + if i == 0 || states[states.len() - 1] != state_per_char[i] { + strings.push(String::new()); + states.push(state_per_char[i]) + } + strings + .last_mut() + .expect("filled from i == 0") + .push(parsed_string[i]); + } + + return (strings, states); +} + +#[derive(Debug)] +struct SectionData { + start_ind: usize, + end_ind: usize, + code_bold_italic: u32, +} + +// based on the commonmarkdown implementation +#[derive(Debug)] +struct DelimiterData { + ind: usize, // goes just before this char + char_type: char, + num_delimiters: usize, + is_active: bool, + could_open: bool, + could_close: bool, +} + +fn md_parse_escapes_and_delimiters(text_value: &str) -> (Vec, Vec) { + let mut result = vec![]; + let mut delimiters = vec![DelimiterData { + ind: 0, + char_type: ' ', + num_delimiters: 0, + is_active: false, + could_open: false, + could_close: false, + }]; + let mut escaped = false; + + // first pass process \ and find delimiters + for c in text_value.chars() { + let mut add = true; + match (c, escaped) { + ('\\', false) => { + add = false; + escaped = true; + } + ('\\', true) => { + escaped = true; + } + // the delimiters + ('`', false) | ('_', false) | ('*', false) => { + let last = delimiters.last_mut().expect("garenteed not to be empty"); + if c == last.char_type && last.ind == result.len() { + // is a continuation + last.num_delimiters += 1; + } else { + delimiters.push(DelimiterData { + ind: result.len(), + char_type: c, + num_delimiters: 1, + is_active: true, + could_open: true, + could_close: true, + }); + } + add = false; + } + ('`', true) | ('_', true) | ('*', true) => { + escaped = false; + } + ('n', true) => { + add = false; + result.push('\n'); + escaped = false; + } + (_, true) => { + // was not an escape + result.push('\\'); + escaped = false; + } + (_, false) => {} + } + if add { + result.push(c); + } + } + + return (result, delimiters); +} + +fn md_parse_set_delimiter_open_close(result: &Vec, delimiters: &mut Vec) { + // set could open/close + for i in 0..delimiters.len() { + let prev_char; + let next_char; + if i != 0 && delimiters[i - 1].ind == delimiters[i].ind { + prev_char = delimiters[i - 1].char_type; + } else if delimiters[i].ind == 0 { + prev_char = ' '; + } else { + prev_char = result[delimiters[i].ind - 1]; + } + + if i != delimiters.len() - 1 && delimiters[i + 1].ind == delimiters[i].ind { + next_char = delimiters[i + 1].char_type; + } else if delimiters[i].ind == result.len() { + next_char = ' '; + } else { + next_char = result[delimiters[i].ind]; + } + + match (prev_char.is_whitespace(), next_char.is_whitespace()) { + (false, false) => { + if delimiters[i].char_type == '_' { + delimiters[i].could_open = false; + delimiters[i].could_close = false; + } + } + (true, false) => { + delimiters[i].could_close = false; + } + (false, true) => { + delimiters[i].could_open = false; + } + (true, true) => { + delimiters[i].could_open = false; + delimiters[i].could_close = false; + } + } + + if next_char.is_ascii_punctuation() + && (!prev_char.is_whitespace() || !prev_char.is_ascii_punctuation()) + { + delimiters[i].could_open = false; + } + if prev_char.is_ascii_punctuation() + && (!next_char.is_whitespace() || !next_char.is_ascii_punctuation()) + { + delimiters[i].could_close = false; + } + } +} + +fn md_parse_eval_sections(delimiters: &mut Vec) -> Vec { + let mut sections = vec![]; + let stack_bottom = 0; // because I have a null element in it + let mut current_position = stack_bottom + 1; + let mut opener_a = [stack_bottom; 3]; + let mut opener_d = [stack_bottom; 3]; + let mut opener_t = [stack_bottom; 3]; + + loop { + while current_position != delimiters.len() + && !delimiters[current_position].could_close + && delimiters[current_position].is_active + { + current_position += 1; + } + if current_position == delimiters.len() { + break; + } + let opener_min = match delimiters[current_position].char_type { + '*' => &mut opener_a, + '_' => &mut opener_d, + '`' => &mut opener_t, + _ => panic!("this cant happen as current_position starts at 0 and all other delimiters are of above types"), + }; + + let min = opener_min[delimiters[current_position].num_delimiters % 3].max(stack_bottom); + let mut opener_ind = current_position - 1; + while opener_ind > min { + // found opener + if delimiters[opener_ind].is_active + && delimiters[opener_ind].could_open + && delimiters[opener_ind].char_type == delimiters[current_position].char_type + { + if (delimiters[opener_ind].could_close || delimiters[current_position].could_open) + && delimiters[opener_ind].num_delimiters % 3 + != delimiters[current_position].num_delimiters % 3 + { + } else { + // found valid opener + break; + } + } + opener_ind -= 1; + } + + if opener_ind == min { + // not found a opener + opener_min[delimiters[current_position].num_delimiters % 3] = current_position - 1; + current_position += 1; + } else { + delimiters[current_position].could_open = false; + delimiters[opener_ind].could_close = false; + // did + let code = delimiters[current_position].char_type == '`'; + let strong = !code + && delimiters[opener_ind].num_delimiters >= 2 + && delimiters[current_position].num_delimiters >= 2; + sections.push(SectionData { + start_ind: delimiters[opener_ind].ind, + end_ind: delimiters[current_position].ind, + code_bold_italic: match (code, strong) { + (true, _) => 0, + (_, true) => 1, + (_, _) => 2, + }, + }); + + delimiters[opener_ind].num_delimiters -= 1 + (strong as usize); + delimiters[current_position].num_delimiters -= 1 + (strong as usize); + + if delimiters[opener_ind].num_delimiters == 0 { + delimiters[opener_ind].is_active = false; + } + if delimiters[current_position].num_delimiters == 0 { + delimiters[current_position].is_active = false; + current_position += 1; + } + + for d in &mut delimiters[(opener_ind + 1)..current_position] { + d.is_active = false; + } + } + } + return sections; +} + +fn md_parse(text_value: &str) -> (Vec, Vec) { + let (mut result, mut delimiters) = md_parse_escapes_and_delimiters(text_value); + md_parse_set_delimiter_open_close(&result, &mut delimiters); + let mut sections = md_parse_eval_sections(&mut delimiters); + + let mut final_result = vec![]; + + // work from the back to avoid index invalidation + for d in delimiters.into_iter().rev() { + while d.ind < result.len() { + if let Some(thing) = result.pop() { + final_result.push(thing); + } + } + + for s in sections.iter_mut() { + // if start needs to be after or equal + if s.start_ind >= d.ind { + s.start_ind += d.num_delimiters as usize; + } + if s.end_ind > d.ind { + // if end needs to be after + s.end_ind += d.num_delimiters as usize; + } + } + let mut temp = vec![d.char_type; d.num_delimiters]; + final_result.append(&mut temp); + } + + return (final_result.into_iter().rev().collect(), sections); +} + /// Convert unescaped r"\n" into newline characters for multi-line text fn text_string(text_value: &str) -> String { let mut result = String::new(); @@ -162,22 +447,77 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec = text_value.lines().collect(); + let mut lines = vec![vec![]]; + let mut line_types = vec![vec![]]; + for i in 0..text_values.len() { + let mut segments = text_values[i].lines(); + + if let Some(first) = segments.next() { + if first != "" { + lines + .last_mut() + .expect("added item not removed") + .push(first); + line_types + .last_mut() + .expect("added item not removed") + .push(state_values[i]); + } else if i != 0 { + lines.push(vec![]); + line_types.push(vec![]); + } + } + + for s in segments { + lines.push(vec![s]); + line_types.push(vec![state_values[i]]); + } + + if let Some(last_char) = text_values[i].chars().last() { + if last_char == '\n' && i != text_values.len() - 1 { + lines.push(vec![]); + line_types.push(vec![]); + } + } + } + + for i in 0..lines.len() { + if lines[i].len() == 0 { + lines[i].push(""); + line_types[i].push(0); + } + } let line_count = lines.len(); - let multiline = line_count > 1; + let multielement = line_count > 1 || text_values.len() > 1; let vertical = orig_elem.has_class("d-text-vertical"); // Whether text is pre-formatted (i.e. spaces are not collapsed) let text_pre = orig_elem.has_class("d-text-pre"); - // There will always be a text element; if not multiline this is the only element. + // There will always be a text element; if not multielement this is the only element. let mut text_elem = if orig_elem.name() == "text" { orig_elem.clone() } else { @@ -244,6 +584,18 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec Result<(SvgElement, Vec Result<(SvgElement, Vec String { + "SectionData { start_ind: ".to_owned() + + &s.to_string() + + ", end_ind: " + + &e.to_string() + + ", code_bold_italic: " + + &i.to_string() + + " }" + } + + // using the md + let text = r"He*ll*o, \nworld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" + .to_owned() + + &sd(2, 4, 2) + + "])" + ); + + // mismatched + let text = r"*Hello** , \nworld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', '*', ' ', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" + .to_owned() + + &sd(0, 5, 2) + "])" + ); + + // diff type + let text = r"He*llo_, \nworld!"; + assert_eq!(format!("{:?}",md_parse(text)), "(['H', 'e', '*', 'l', 'l', 'o', '_', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [])"); + + // multiple diff type + let text = r"_hello*"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['_', 'h', 'e', 'l', 'l', 'o', '*'], [])" + ); + + // multiple same type + let text = r"He*ll*o, \nw*or*ld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" + .to_owned() + + &sd(2, 4, 2) + + ", " + + &sd(9, 11, 2) + + "])" + ); + + // space before + let text = r"**foo bar **"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['*', '*', 'f', 'o', 'o', ' ', 'b', 'a', 'r', ' ', '*', '*'], [])" + ); + + // punctuation before alphnum after + let text = r"**(**foo)"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['*', '*', '(', '*', '*', 'f', 'o', 'o', ')'], [])" + ); + } + + #[test] + fn test_get_md_value() { + let mut el = SvgElement::new("text", &[]); + let text = r"foo"; + el.set_attr("md", text); + assert_eq!(format!("{:?}", get_md_value(&mut el)), "([\"foo\"], [0])"); + + let text = r"**(**foo)"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"**(**foo)\"], [0])" + ); + + let text = r"*foo *bar**"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"foo bar\"], [4])" + ); + + let text = r"*foo**bar**baz*"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"foo\", \"bar\", \"baz\"], [4, 6, 4])" + ); + } } diff --git a/tests/integration_tests/text_attr.rs b/tests/integration_tests/text_attr.rs index 2df86ec..c011d80 100644 --- a/tests/integration_tests/text_attr.rs +++ b/tests/integration_tests/text_attr.rs @@ -614,3 +614,20 @@ fn test_multiline_outside() { expected.trim() ); } + +#[test] +fn test_md() { + let input = r#" + +"#; + let expected = r#" + + +multiline + +"#; + assert_eq!( + transform_str_default(input).unwrap().trim(), + expected.trim() + ); +}