From 51fd9286b98d606a94d3b1ba25b218e56772299b Mon Sep 17 00:00:00 2001 From: megamaths Date: Fri, 1 Aug 2025 10:00:05 +0100 Subject: [PATCH 1/3] added basic markdown attrib and parsing --- src/elements/element.rs | 2 +- src/elements/text.rs | 569 +++++++++++++++++++++++++-- tests/integration_tests/text_attr.rs | 17 + 3 files changed, 550 insertions(+), 38 deletions(-) diff --git a/src/elements/element.rs b/src/elements/element.rs index 1a52d5f..5cbc01a 100644 --- a/src/elements/element.rs +++ b/src/elements/element.rs @@ -357,7 +357,7 @@ impl SvgElement { // TODO: refactor this method to handle text event gen better let phantom = matches!(self.name(), "point" | "box"); - if self.has_attr("text") { + if self.has_attr("text") || self.has_attr("md") { let (orig_elem, text_elements) = process_text_attr(self)?; if orig_elem.name != "text" && !phantom { // We only care about the original element if it wasn't a text element diff --git a/src/elements/text.rs b/src/elements/text.rs index c90b049..efc6441 100644 --- a/src/elements/text.rs +++ b/src/elements/text.rs @@ -11,6 +11,279 @@ fn get_text_value(element: &mut SvgElement) -> String { text_string(&text_value) } +fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { + let text_value = element + .pop_attr("md") + .expect("no md attr in process_text_attr"); + + let (parsed_string, sections) = md_parse(&text_value); + + let mut state_per_char = vec![0; parsed_string.len()]; + + for i in 0..sections.len() { + let bit = sections[i].code_bold_italic; + for j in sections[i].start_ind..sections[i].end_ind { + state_per_char[j] |= 1 << bit; + } + } + + let mut strings = vec![]; + let mut states = vec![]; + for i in 0..parsed_string.len() { + if i == 0 || states[states.len() - 1] != state_per_char[i] { + strings.push(String::new()); + states.push(state_per_char[i]) + } + strings + .last_mut() + .expect("filled from i == 0") + .push(parsed_string[i]); + } + + return (strings, states); +} + +#[derive(Debug)] +struct SectionData { + start_ind: usize, + end_ind: usize, + code_bold_italic: u32, +} + +// based on the commonmarkdown implementation +#[derive(Debug)] +struct DelimiterData { + ind: usize, // goes just before this char + char_type: char, + num_delimiters: u32, + is_active: bool, + could_open: bool, + could_close: bool, +} + +fn md_parse(text_value: &str) -> (Vec, Vec) { + let mut sections = vec![]; + let mut result = vec![]; + let mut delimiters = vec![DelimiterData { + ind: 0, + char_type: ' ', + num_delimiters: 0, + is_active: false, + could_open: false, + could_close: false, + }]; + let mut escaped = false; + + // first pass process \ and find delimiters + for c in text_value.chars() { + let mut add = true; + if c == '\\' { + if !escaped { + add = false; + escaped = true; + } else { + escaped = false; + } + } + // the delimiters + else if c == '`' || c == '_' || c == '*' { + if !escaped { + let last = delimiters.last_mut().expect("garenteed not to be empty"); + if c == last.char_type && last.ind == result.len() { + // is a continuation + last.num_delimiters += 1; + } else { + delimiters.push(DelimiterData { + ind: result.len(), + char_type: c, + num_delimiters: 1, + is_active: true, + could_open: true, + could_close: true, + }); + } + add = false; + } else { + escaped = true; + } + } else if escaped { + if c == 'n' { + add = false; + result.push('\n'); + } else { + // was not an escape + result.push('\\'); + } + escaped = false; + } + + if add { + result.push(c); + } + } + + // set could open/close + for i in 0..delimiters.len() { + let prev_char; + let next_char; + if i != 0 && delimiters[i - 1].ind == delimiters[i].ind { + prev_char = delimiters[i - 1].char_type; + } else if delimiters[i].ind == 0 { + prev_char = ' '; + } else { + prev_char = result[delimiters[i].ind - 1]; + } + + if i != delimiters.len() - 1 && delimiters[i + 1].ind == delimiters[i].ind { + next_char = delimiters[i + 1].char_type; + } else if delimiters[i].ind == result.len() { + next_char = ' '; + } else { + next_char = result[delimiters[i].ind]; + } + + if next_char.is_whitespace() { + delimiters[i].could_open = false; + } + if prev_char.is_whitespace() { + delimiters[i].could_close = false; + } + if !next_char.is_whitespace() + && !prev_char.is_whitespace() + && delimiters[i].char_type == '_' + { + delimiters[i].could_open = false; + delimiters[i].could_close = false; + } + + if next_char.is_ascii_punctuation() + && (!prev_char.is_whitespace() || !prev_char.is_ascii_punctuation()) + { + delimiters[i].could_open = false; + } + if prev_char.is_ascii_punctuation() + && (!next_char.is_whitespace() || !next_char.is_ascii_punctuation()) + { + delimiters[i].could_close = false; + } + } + + let stack_bottom = 0; // because I have a null element in it + let mut current_position = stack_bottom + 1; + let mut opener_a = [stack_bottom; 3]; + let mut opener_d = [stack_bottom; 3]; + let mut opener_t = [stack_bottom; 3]; + + loop { + while current_position != delimiters.len() + && !delimiters[current_position].could_close + && delimiters[current_position].is_active + { + current_position += 1; + } + if current_position == delimiters.len() { + break; + } + let opener_min = match delimiters[current_position].char_type { + '*' => &mut opener_a, + '_' => &mut opener_d, + '`' => &mut opener_t, + _ => panic!(), + }; + println!("{} {:?}", current_position, delimiters); + + let min = opener_min[(delimiters[current_position].num_delimiters % 3) as usize] + .max(stack_bottom); + let mut opener_ind = current_position - 1; + while opener_ind > min { + // found opener + if delimiters[opener_ind].is_active + && delimiters[opener_ind].could_open + && delimiters[opener_ind].char_type == delimiters[current_position].char_type + { + if (delimiters[opener_ind].could_close || delimiters[current_position].could_open) + && delimiters[opener_ind].num_delimiters % 3 + != delimiters[current_position].num_delimiters % 3 + { + } else { + break; + } + } + opener_ind -= 1; + } + + if opener_ind == min { + // not found a opener + opener_min[(delimiters[current_position].num_delimiters % 3) as usize] = + current_position - 1; + current_position += 1; + } else { + delimiters[current_position].could_open = false; + delimiters[opener_ind].could_close = false; + // did + let code = delimiters[current_position].char_type == '`'; + let strong = !code + && delimiters[opener_ind].num_delimiters >= 2 + && delimiters[current_position].num_delimiters >= 2; + sections.push(SectionData { + start_ind: delimiters[opener_ind].ind, + end_ind: delimiters[current_position].ind, + code_bold_italic: if code { + 0 + } else if strong { + 1 + } else { + 2 + }, + }); + + println!("{} {} {}", opener_ind, current_position, strong); + delimiters[opener_ind].num_delimiters -= 1 + (strong as u32); + delimiters[current_position].num_delimiters -= 1 + (strong as u32); + + if delimiters[opener_ind].num_delimiters == 0 { + delimiters[opener_ind].is_active = false; + } + if delimiters[current_position].num_delimiters == 0 { + delimiters[current_position].is_active = false; + current_position += 1; + } + + for i in (opener_ind + 1)..current_position { + delimiters[i].is_active = false; + } + } + } + println!(); + + let mut final_result = vec![]; + + // work from the back to avoid index invalidation + for i in (0..delimiters.len()).rev() { + while delimiters[i].ind < result.len() { + if let Some(thing) = result.pop() { + final_result.push(thing); + } + } + + for j in 0..sections.len() { + // if start needs to be after or equal + if sections[j].start_ind >= delimiters[i].ind { + sections[j].start_ind += delimiters[i].num_delimiters as usize; + } + if sections[j].end_ind > delimiters[i].ind { + // if end needs to be after + sections[j].end_ind += delimiters[i].num_delimiters as usize; + } + } + for _ in 0..delimiters[i].num_delimiters { + final_result.push(delimiters[i].char_type); + } + } + + return (final_result.into_iter().rev().collect(), sections); +} + /// Convert unescaped r"\n" into newline characters for multi-line text fn text_string(text_value: &str) -> String { let mut result = String::new(); @@ -150,6 +423,27 @@ fn get_text_position(element: &mut SvgElement) -> Result<(f32, f32, bool, LocSpe Ok((tdx, tdy, outside, text_anchor, text_classes)) } +fn get_text_len(mono: bool, text: String) -> f32 { + if mono { + return 0.6 * text.len() as f32; + } + let mut length = 0.0; + + let long = ['m', 'w']; + let short = ['f', 'i', 'j', 'l', 'r', 't']; + for i in text.chars() { + if long.contains(&i) { + length += 0.8; + } else if short.contains(&i) { + length += 0.33; + } else { + length += 0.6; + } + } + + return length; +} + pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec)> { // Different conversions from line count to first-line offset based on whether // top, center, or bottom justification. @@ -162,22 +456,80 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec = text_value.lines().collect(); + let mut lines = vec![vec![]]; + let mut line_types = vec![vec![]]; + for i in 0..text_values.len() { + let mut segments = text_values[i].lines(); + + if let Some(first) = segments.next() { + if first != "" { + lines + .last_mut() + .expect("added item not removed") + .push(first); + line_types + .last_mut() + .expect("added item not removed") + .push(state_values[i]); + } else if i != 0 { + lines.push(vec![]); + line_types.push(vec![]); + } + } + + for s in segments { + lines.push(vec![s]); + line_types.push(vec![state_values[i]]); + } + + if let Some(last_char) = text_values[i].chars().last() { + if last_char == '\n' && i != text_values.len() - 1 { + lines.push(vec![]); + line_types.push(vec![]); + } + } + } + + for i in 0..lines.len() { + if lines[i].len() == 0 { + lines[i].push(""); + line_types[i].push(0); + } + } let line_count = lines.len(); + println!("{:?}", text_values[0].lines().collect::>()); + println!("{:?}", text_values); + println!("{:?}", lines); - let multiline = line_count > 1; + let multielement = line_count > 1 || text_values.len() > 1; let vertical = orig_elem.has_class("d-text-vertical"); // Whether text is pre-formatted (i.e. spaces are not collapsed) let text_pre = orig_elem.has_class("d-text-pre"); - // There will always be a text element; if not multiline this is the only element. + // There will always be a text element; if not multielement this is the only element. let mut text_elem = if orig_elem.name() == "text" { orig_elem.clone() } else { @@ -276,9 +628,9 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec Result<(SvgElement, Vec String { + "SectionData { start_ind: ".to_owned() + + &s.to_string() + + ", end_ind: " + + &e.to_string() + + ", code_bold_italic: " + + &i.to_string() + + " }" + } + + // using the md + let text = r"He*ll*o, \nworld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" + .to_owned() + + &sd(2, 4, 2) + + "])" + ); + + // mismatched + let text = r"*Hello** , \nworld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', '*', ' ', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" + .to_owned() + + &sd(0, 5, 2) + "])" + ); + + // diff type + let text = r"He*llo_, \nworld!"; + assert_eq!(format!("{:?}",md_parse(text)), "(['H', 'e', '*', 'l', 'l', 'o', '_', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [])"); + + // multiple diff type + let text = r"_hello*"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['_', 'h', 'e', 'l', 'l', 'o', '*'], [])" + ); + + // multiple same type + let text = r"He*ll*o, \nw*or*ld!"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['H', 'e', 'l', 'l', 'o', ',', ' ', '\\n', 'w', 'o', 'r', 'l', 'd', '!'], [" + .to_owned() + + &sd(2, 4, 2) + + ", " + + &sd(9, 11, 2) + + "])" + ); + + // space before + let text = r"**foo bar **"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['*', '*', 'f', 'o', 'o', ' ', 'b', 'a', 'r', ' ', '*', '*'], [])" + ); + + // punctuation before alphnum after + let text = r"**(**foo)"; + assert_eq!( + format!("{:?}", md_parse(text)), + "(['*', '*', '(', '*', '*', 'f', 'o', 'o', ')'], [])" + ); + } + + #[test] + fn test_get_md_value() { + let mut el = SvgElement::new("text", &[]); + let text = r"foo"; + el.set_attr("md", text); + assert_eq!(format!("{:?}", get_md_value(&mut el)), "([\"foo\"], [0])"); + + let text = r"**(**foo)"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"**(**foo)\"], [0])" + ); + + let text = r"*foo *bar**"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"foo bar\"], [4])" + ); + + let text = r"*foo**bar**baz*"; + el.set_attr("md", text); + assert_eq!( + format!("{:?}", get_md_value(&mut el)), + "([\"foo\", \"bar\", \"baz\"], [4, 6, 4])" + ); + } } diff --git a/tests/integration_tests/text_attr.rs b/tests/integration_tests/text_attr.rs index 2df86ec..c011d80 100644 --- a/tests/integration_tests/text_attr.rs +++ b/tests/integration_tests/text_attr.rs @@ -614,3 +614,20 @@ fn test_multiline_outside() { expected.trim() ); } + +#[test] +fn test_md() { + let input = r#" + +"#; + let expected = r#" + + +multiline + +"#; + assert_eq!( + transform_str_default(input).unwrap().trim(), + expected.trim() + ); +} From 6476018acfbaa98a8df05164ecb57e5d2fccaead Mon Sep 17 00:00:00 2001 From: megamaths Date: Tue, 5 Aug 2025 14:50:29 +0100 Subject: [PATCH 2/3] clean the printlines and fixed problem with single element md not adding styles --- src/elements/text.rs | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/src/elements/text.rs b/src/elements/text.rs index efc6441..7fd6d04 100644 --- a/src/elements/text.rs +++ b/src/elements/text.rs @@ -190,7 +190,6 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { '`' => &mut opener_t, _ => panic!(), }; - println!("{} {:?}", current_position, delimiters); let min = opener_min[(delimiters[current_position].num_delimiters % 3) as usize] .max(stack_bottom); @@ -206,6 +205,7 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { != delimiters[current_position].num_delimiters % 3 { } else { + // found valid opener break; } } @@ -237,7 +237,6 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { }, }); - println!("{} {} {}", opener_ind, current_position, strong); delimiters[opener_ind].num_delimiters -= 1 + (strong as u32); delimiters[current_position].num_delimiters -= 1 + (strong as u32); @@ -254,7 +253,6 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { } } } - println!(); let mut final_result = vec![]; @@ -423,27 +421,6 @@ fn get_text_position(element: &mut SvgElement) -> Result<(f32, f32, bool, LocSpe Ok((tdx, tdy, outside, text_anchor, text_classes)) } -fn get_text_len(mono: bool, text: String) -> f32 { - if mono { - return 0.6 * text.len() as f32; - } - let mut length = 0.0; - - let long = ['m', 'w']; - let short = ['f', 'i', 'j', 'l', 'r', 't']; - for i in text.chars() { - if long.contains(&i) { - length += 0.8; - } else if short.contains(&i) { - length += 0.33; - } else { - length += 0.6; - } - } - - return length; -} - pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec)> { // Different conversions from line count to first-line offset based on whether // top, center, or bottom justification. @@ -520,9 +497,6 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec>()); - println!("{:?}", text_values); - println!("{:?}", lines); let multielement = line_count > 1 || text_values.len() > 1; let vertical = orig_elem.has_class("d-text-vertical"); @@ -596,6 +570,18 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec Date: Tue, 5 Aug 2025 16:25:22 +0100 Subject: [PATCH 3/3] split markdown parser into multiple functions --- src/elements/text.rs | 134 ++++++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 60 deletions(-) diff --git a/src/elements/text.rs b/src/elements/text.rs index 7fd6d04..9ef60b2 100644 --- a/src/elements/text.rs +++ b/src/elements/text.rs @@ -20,10 +20,10 @@ fn get_md_value(element: &mut SvgElement) -> (Vec, Vec) { let mut state_per_char = vec![0; parsed_string.len()]; - for i in 0..sections.len() { - let bit = sections[i].code_bold_italic; - for j in sections[i].start_ind..sections[i].end_ind { - state_per_char[j] |= 1 << bit; + for s in sections { + let bit = s.code_bold_italic; + for i in s.start_ind..s.end_ind { + state_per_char[i] |= 1 << bit; } } @@ -55,14 +55,13 @@ struct SectionData { struct DelimiterData { ind: usize, // goes just before this char char_type: char, - num_delimiters: u32, + num_delimiters: usize, is_active: bool, could_open: bool, could_close: bool, } -fn md_parse(text_value: &str) -> (Vec, Vec) { - let mut sections = vec![]; +fn md_parse_escapes_and_delimiters(text_value: &str) -> (Vec, Vec) { let mut result = vec![]; let mut delimiters = vec![DelimiterData { ind: 0, @@ -77,17 +76,16 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { // first pass process \ and find delimiters for c in text_value.chars() { let mut add = true; - if c == '\\' { - if !escaped { + match (c, escaped) { + ('\\', false) => { add = false; escaped = true; - } else { - escaped = false; } - } - // the delimiters - else if c == '`' || c == '_' || c == '*' { - if !escaped { + ('\\', true) => { + escaped = true; + } + // the delimiters + ('`', false) | ('_', false) | ('*', false) => { let last = delimiters.last_mut().expect("garenteed not to be empty"); if c == last.char_type && last.ind == result.len() { // is a continuation @@ -103,25 +101,31 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { }); } add = false; - } else { - escaped = true; } - } else if escaped { - if c == 'n' { + ('`', true) | ('_', true) | ('*', true) => { + escaped = false; + } + ('n', true) => { add = false; result.push('\n'); - } else { + escaped = false; + } + (_, true) => { // was not an escape result.push('\\'); + escaped = false; } - escaped = false; + (_, false) => {} } - if add { result.push(c); } } + return (result, delimiters); +} + +fn md_parse_set_delimiter_open_close(result: &Vec, delimiters: &mut Vec) { // set could open/close for i in 0..delimiters.len() { let prev_char; @@ -142,18 +146,23 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { next_char = result[delimiters[i].ind]; } - if next_char.is_whitespace() { - delimiters[i].could_open = false; - } - if prev_char.is_whitespace() { - delimiters[i].could_close = false; - } - if !next_char.is_whitespace() - && !prev_char.is_whitespace() - && delimiters[i].char_type == '_' - { - delimiters[i].could_open = false; - delimiters[i].could_close = false; + match (prev_char.is_whitespace(), next_char.is_whitespace()) { + (false, false) => { + if delimiters[i].char_type == '_' { + delimiters[i].could_open = false; + delimiters[i].could_close = false; + } + } + (true, false) => { + delimiters[i].could_close = false; + } + (false, true) => { + delimiters[i].could_open = false; + } + (true, true) => { + delimiters[i].could_open = false; + delimiters[i].could_close = false; + } } if next_char.is_ascii_punctuation() @@ -167,7 +176,10 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { delimiters[i].could_close = false; } } +} +fn md_parse_eval_sections(delimiters: &mut Vec) -> Vec { + let mut sections = vec![]; let stack_bottom = 0; // because I have a null element in it let mut current_position = stack_bottom + 1; let mut opener_a = [stack_bottom; 3]; @@ -188,11 +200,10 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { '*' => &mut opener_a, '_' => &mut opener_d, '`' => &mut opener_t, - _ => panic!(), + _ => panic!("this cant happen as current_position starts at 0 and all other delimiters are of above types"), }; - let min = opener_min[(delimiters[current_position].num_delimiters % 3) as usize] - .max(stack_bottom); + let min = opener_min[delimiters[current_position].num_delimiters % 3].max(stack_bottom); let mut opener_ind = current_position - 1; while opener_ind > min { // found opener @@ -214,8 +225,7 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { if opener_ind == min { // not found a opener - opener_min[(delimiters[current_position].num_delimiters % 3) as usize] = - current_position - 1; + opener_min[delimiters[current_position].num_delimiters % 3] = current_position - 1; current_position += 1; } else { delimiters[current_position].could_open = false; @@ -228,17 +238,15 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { sections.push(SectionData { start_ind: delimiters[opener_ind].ind, end_ind: delimiters[current_position].ind, - code_bold_italic: if code { - 0 - } else if strong { - 1 - } else { - 2 + code_bold_italic: match (code, strong) { + (true, _) => 0, + (_, true) => 1, + (_, _) => 2, }, }); - delimiters[opener_ind].num_delimiters -= 1 + (strong as u32); - delimiters[current_position].num_delimiters -= 1 + (strong as u32); + delimiters[opener_ind].num_delimiters -= 1 + (strong as usize); + delimiters[current_position].num_delimiters -= 1 + (strong as usize); if delimiters[opener_ind].num_delimiters == 0 { delimiters[opener_ind].is_active = false; @@ -248,35 +256,41 @@ fn md_parse(text_value: &str) -> (Vec, Vec) { current_position += 1; } - for i in (opener_ind + 1)..current_position { - delimiters[i].is_active = false; + for d in &mut delimiters[(opener_ind + 1)..current_position] { + d.is_active = false; } } } + return sections; +} + +fn md_parse(text_value: &str) -> (Vec, Vec) { + let (mut result, mut delimiters) = md_parse_escapes_and_delimiters(text_value); + md_parse_set_delimiter_open_close(&result, &mut delimiters); + let mut sections = md_parse_eval_sections(&mut delimiters); let mut final_result = vec![]; // work from the back to avoid index invalidation - for i in (0..delimiters.len()).rev() { - while delimiters[i].ind < result.len() { + for d in delimiters.into_iter().rev() { + while d.ind < result.len() { if let Some(thing) = result.pop() { final_result.push(thing); } } - for j in 0..sections.len() { + for s in sections.iter_mut() { // if start needs to be after or equal - if sections[j].start_ind >= delimiters[i].ind { - sections[j].start_ind += delimiters[i].num_delimiters as usize; + if s.start_ind >= d.ind { + s.start_ind += d.num_delimiters as usize; } - if sections[j].end_ind > delimiters[i].ind { + if s.end_ind > d.ind { // if end needs to be after - sections[j].end_ind += delimiters[i].num_delimiters as usize; + s.end_ind += d.num_delimiters as usize; } } - for _ in 0..delimiters[i].num_delimiters { - final_result.push(delimiters[i].char_type); - } + let mut temp = vec![d.char_type; d.num_delimiters]; + final_result.append(&mut temp); } return (final_result.into_iter().rev().collect(), sections); @@ -571,7 +585,7 @@ pub fn process_text_attr(element: &SvgElement) -> Result<(SvgElement, Vec