From d2dc0bb96a8888c4cf7d9d19b38b5660917a1633 Mon Sep 17 00:00:00 2001 From: Wolf Vollprecht Date: Thu, 12 Feb 2026 15:55:48 +0100 Subject: [PATCH 1/2] add parsing of old style diffs --- .gitignore | 2 + src/patch/parse_normal.rs | 381 ++++++++++++++++++ test-data/normal-diff/add_only.diff | 4 + test-data/normal-diff/change_and_add.diff | 6 + test-data/normal-diff/complex.diff | 14 + .../normal-diff/delete_insert_delete.diff | 6 + test-data/normal-diff/new1.txt | 6 + test-data/normal-diff/new2.txt | 7 + test-data/normal-diff/new3.txt | 5 + test-data/normal-diff/new4.txt | 11 + test-data/normal-diff/old1.txt | 5 + test-data/normal-diff/old2.txt | 8 + test-data/normal-diff/old3.txt | 3 + test-data/normal-diff/old4.txt | 6 + 14 files changed, 464 insertions(+) create mode 100644 src/patch/parse_normal.rs create mode 100644 test-data/normal-diff/add_only.diff create mode 100644 test-data/normal-diff/change_and_add.diff create mode 100644 test-data/normal-diff/complex.diff create mode 100644 test-data/normal-diff/delete_insert_delete.diff create mode 100644 test-data/normal-diff/new1.txt create mode 100644 test-data/normal-diff/new2.txt create mode 100644 test-data/normal-diff/new3.txt create mode 100644 test-data/normal-diff/new4.txt create mode 100644 test-data/normal-diff/old1.txt create mode 100644 test-data/normal-diff/old2.txt create mode 100644 test-data/normal-diff/old3.txt create mode 100644 test-data/normal-diff/old4.txt diff --git a/.gitignore b/.gitignore index 96ef6c0..da5469a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /target Cargo.lock + +.DS_Store \ No newline at end of file diff --git a/src/patch/parse_normal.rs b/src/patch/parse_normal.rs new file mode 100644 index 0000000..2c92c3a --- /dev/null +++ b/src/patch/parse_normal.rs @@ -0,0 +1,381 @@ +//! Parser for the traditional (normal) diff format. +//! +//! Normal diff format uses commands like: +//! - `NaR` - add lines R from new file after line N in old file +//! - `NcR` - change lines N in old file to lines R from new file +//! - `NdR` - delete lines N from old file (would appear after line R in new file) +//! +//! Where N and R are line numbers or ranges like `start,end`. +//! +//! Lines from the old file are prefixed with `< ` and lines from the new file +//! are prefixed with `> `. Change commands have a `---` separator between the +//! old and new lines. + +use super::{Diff, Hunk, HunkRange, Line}; +use crate::utils::{LineIter, Text}; + +use super::parse::ParsePatchError; + +type Result = std::result::Result; + +/// Detect whether the input looks like a normal diff (as opposed to unified). +/// +/// Returns `true` if the first non-empty line matches the pattern +/// `\d+(,\d+)?[acd]\d+(,\d+)?`. +pub fn is_normal_diff(input: &T) -> bool { + let first_line = LineIter::new(input).next(); + if let Some((line, _end)) = first_line + && let Some(s) = line.as_str() + { + return parse_command_line(s).is_some(); + } + false +} + +/// Parse a normal diff format string into a `Diff`. +pub fn parse_normal(input: &str) -> Result> { + let hunks = parse_normal_hunks(input)?; + Ok(Diff::new(None::<&str>, None::<&str>, hunks)) +} + +/// Parse a normal diff format byte slice into a `Diff`. +pub fn parse_normal_bytes(input: &[u8]) -> Result> { + let hunks = parse_normal_hunks(input)?; + Ok(Diff::new(None::<&[u8]>, None::<&[u8]>, hunks)) +} + +/// Parse multiple normal diffs (not really applicable for normal format, +/// but provided for API consistency). Normal diff doesn't have multi-file +/// support built in, so this just returns a single diff. +pub fn parse_normal_multiple(input: &str) -> Result>> { + Ok(vec![parse_normal(input)?]) +} + +/// Parse multiple normal diffs from bytes. +pub fn parse_normal_bytes_multiple(input: &[u8]) -> Result>> { + Ok(vec![parse_normal_bytes(input)?]) +} + +/// A parsed command line from a normal diff. +#[derive(Debug, Clone, Copy)] +struct NormalCommand { + old_start: usize, + old_end: usize, + command: char, + new_start: usize, + new_end: usize, +} + +/// Parse a command line like `3c3`, `1,2d5`, `0a1,3`. +fn parse_command_line(line: &str) -> Option { + // Find the command character (a, c, or d) + let cmd_pos = line.find(['a', 'c', 'd'])?; + let command = line.as_bytes()[cmd_pos] as char; + + let left = &line[..cmd_pos]; + let right = &line[cmd_pos + 1..]; + + let (old_start, old_end) = parse_range(left)?; + let (new_start, new_end) = parse_range(right)?; + + Some(NormalCommand { + old_start, + old_end, + command, + new_start, + new_end, + }) +} + +/// Parse a range like `3` or `1,5`. +fn parse_range(s: &str) -> Option<(usize, usize)> { + if let Some((start, end)) = s.split_once(',') { + let start: usize = start.parse().ok()?; + let end: usize = end.parse().ok()?; + Some((start, end)) + } else { + let n: usize = s.parse().ok()?; + Some((n, n)) + } +} + +fn parse_normal_hunks<'a, T: Text + ?Sized + ToOwned>(input: &'a T) -> Result>> { + let all_lines: Vec<_> = LineIter::new(input).collect(); + let mut hunks = Vec::new(); + let mut i = 0; + + while i < all_lines.len() { + let (line, _end) = all_lines[i]; + + // Try to parse as a command line + let line_str = line.as_str().ok_or(ParsePatchError::HunkHeader)?; + + // Skip empty lines + if line_str.trim().is_empty() { + i += 1; + continue; + } + + let cmd = parse_command_line(line_str).ok_or(ParsePatchError::HunkHeader)?; + i += 1; + + let mut lines: Vec> = Vec::new(); + + match cmd.command { + 'a' => { + // Add: lines from new file prefixed with "> " + while i < all_lines.len() { + let (l, _) = all_lines[i]; + if let Some(content) = l.strip_prefix("> ") { + lines.push(Line::Insert((content, all_lines[i].1))); + i += 1; + } else { + break; + } + } + + let old_range = HunkRange::new(cmd.old_start + 1, 0); + let new_range = HunkRange::new(cmd.new_start, cmd.new_end - cmd.new_start + 1); + hunks.push(Hunk::new(old_range, new_range, None, lines)); + } + 'd' => { + // Delete: lines from old file prefixed with "< " + while i < all_lines.len() { + let (l, _) = all_lines[i]; + if let Some(content) = l.strip_prefix("< ") { + lines.push(Line::Delete((content, all_lines[i].1))); + i += 1; + } else { + break; + } + } + + let old_range = HunkRange::new(cmd.old_start, cmd.old_end - cmd.old_start + 1); + let new_range = HunkRange::new(cmd.new_start, 0); + hunks.push(Hunk::new(old_range, new_range, None, lines)); + } + 'c' => { + // Change: old lines with "< ", then "---", then new lines with "> " + while i < all_lines.len() { + let (l, _) = all_lines[i]; + if let Some(content) = l.strip_prefix("< ") { + lines.push(Line::Delete((content, all_lines[i].1))); + i += 1; + } else { + break; + } + } + + // Expect "---" separator + if i < all_lines.len() { + let (l, _) = all_lines[i]; + if l.as_str() == Some("---") { + i += 1; + } else { + return Err(ParsePatchError::HunkHeader); + } + } else { + return Err(ParsePatchError::UnexpectedEof); + } + + while i < all_lines.len() { + let (l, _) = all_lines[i]; + if let Some(content) = l.strip_prefix("> ") { + lines.push(Line::Insert((content, all_lines[i].1))); + i += 1; + } else { + break; + } + } + + let old_range = HunkRange::new(cmd.old_start, cmd.old_end - cmd.old_start + 1); + let new_range = HunkRange::new(cmd.new_start, cmd.new_end - cmd.new_start + 1); + hunks.push(Hunk::new(old_range, new_range, None, lines)); + } + _ => return Err(ParsePatchError::HunkHeader), + } + } + + if hunks.is_empty() { + return Err(ParsePatchError::NoHunks); + } + + Ok(hunks) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::apply; + use std::path::PathBuf; + + fn test_data_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("test-data") + .join("normal-diff") + } + + #[test] + fn test_is_normal_diff() { + assert!(is_normal_diff("2c2\n< old\n---\n> new\n")); + assert!(is_normal_diff("1,3d0\n< a\n< b\n< c\n")); + assert!(is_normal_diff("0a1,2\n> x\n> y\n")); + assert!(!is_normal_diff("--- a/file\n+++ b/file\n")); + assert!(!is_normal_diff("@@ -1,3 +1,3 @@\n")); + assert!(!is_normal_diff("diff --git a/f b/f\n")); + } + + #[test] + fn test_parse_change_and_add() { + let dir = test_data_dir(); + let old = std::fs::read_to_string(dir.join("old1.txt")).unwrap(); + let new = std::fs::read_to_string(dir.join("new1.txt")).unwrap(); + let patch_str = std::fs::read_to_string(dir.join("change_and_add.diff")).unwrap(); + + let diff = parse_normal(&patch_str).unwrap(); + assert_eq!(diff.hunks().len(), 2); + + let (result, stats) = apply(&old, &diff).unwrap(); + assert_eq!(result, new); + assert!(stats.has_changes()); + } + + #[test] + fn test_parse_delete_insert_delete() { + let dir = test_data_dir(); + let old = std::fs::read_to_string(dir.join("old2.txt")).unwrap(); + let new = std::fs::read_to_string(dir.join("new2.txt")).unwrap(); + let patch_str = std::fs::read_to_string(dir.join("delete_insert_delete.diff")).unwrap(); + + let diff = parse_normal(&patch_str).unwrap(); + assert_eq!(diff.hunks().len(), 3); + + let (result, stats) = apply(&old, &diff).unwrap(); + assert_eq!(result, new); + assert!(stats.has_changes()); + } + + #[test] + fn test_parse_add_only() { + let dir = test_data_dir(); + let old = std::fs::read_to_string(dir.join("old3.txt")).unwrap(); + let new = std::fs::read_to_string(dir.join("new3.txt")).unwrap(); + let patch_str = std::fs::read_to_string(dir.join("add_only.diff")).unwrap(); + + let diff = parse_normal(&patch_str).unwrap(); + assert_eq!(diff.hunks().len(), 2); + + let (result, stats) = apply(&old, &diff).unwrap(); + assert_eq!(result, new); + assert!(stats.has_changes()); + } + + #[test] + fn test_parse_complex() { + let dir = test_data_dir(); + let old = std::fs::read_to_string(dir.join("old4.txt")).unwrap(); + let new = std::fs::read_to_string(dir.join("new4.txt")).unwrap(); + let patch_str = std::fs::read_to_string(dir.join("complex.diff")).unwrap(); + + let diff = parse_normal(&patch_str).unwrap(); + + let (result, stats) = apply(&old, &diff).unwrap(); + assert_eq!(result, new); + assert!(stats.has_changes()); + } + + #[test] + fn test_parse_inline_change() { + let patch = "2c2\n< old line\n---\n> new line\n"; + let diff = parse_normal(patch).unwrap(); + assert_eq!(diff.hunks().len(), 1); + + let hunk = &diff.hunks()[0]; + assert_eq!(hunk.old_range().start(), 2); + assert_eq!(hunk.old_range().len(), 1); + assert_eq!(hunk.new_range().start(), 2); + assert_eq!(hunk.new_range().len(), 1); + assert_eq!(hunk.lines().len(), 2); + } + + #[test] + fn test_parse_inline_delete() { + let patch = "2,3d1\n< line two\n< line three\n"; + let diff = parse_normal(patch).unwrap(); + assert_eq!(diff.hunks().len(), 1); + + let hunk = &diff.hunks()[0]; + assert_eq!(hunk.old_range().start(), 2); + assert_eq!(hunk.old_range().len(), 2); + assert_eq!(hunk.new_range().start(), 1); + assert_eq!(hunk.new_range().len(), 0); + assert_eq!(hunk.lines().len(), 2); + } + + #[test] + fn test_parse_inline_add() { + let patch = "0a1,2\n> added one\n> added two\n"; + let diff = parse_normal(patch).unwrap(); + assert_eq!(diff.hunks().len(), 1); + + let hunk = &diff.hunks()[0]; + assert_eq!(hunk.old_range().start(), 1); + assert_eq!(hunk.old_range().len(), 0); + assert_eq!(hunk.new_range().start(), 1); + assert_eq!(hunk.new_range().len(), 2); + assert_eq!(hunk.lines().len(), 2); + } + + #[test] + fn test_parse_bytes() { + let patch = b"2c2\n< old\n---\n> new\n"; + let diff = parse_normal_bytes(patch).unwrap(); + assert_eq!(diff.hunks().len(), 1); + } + + #[test] + fn test_roundtrip_change() { + let old = "line 1\nline 2\nline 3\n"; + let new = "line 1\nmodified line 2\nline 3\n"; + let patch = "2c2\n< line 2\n---\n> modified line 2\n"; + + let diff = parse_normal(patch).unwrap(); + let (result, _) = apply(old, &diff).unwrap(); + assert_eq!(result, new); + } + + #[test] + fn test_roundtrip_delete() { + let old = "line 1\nline 2\nline 3\n"; + let new = "line 1\nline 3\n"; + let patch = "2d1\n< line 2\n"; + + let diff = parse_normal(patch).unwrap(); + let (result, _) = apply(old, &diff).unwrap(); + assert_eq!(result, new); + } + + #[test] + fn test_roundtrip_add() { + let old = "line 1\nline 3\n"; + let new = "line 1\nline 2\nline 3\n"; + let patch = "1a2\n> line 2\n"; + + let diff = parse_normal(patch).unwrap(); + let (result, _) = apply(old, &diff).unwrap(); + assert_eq!(result, new); + } + + #[test] + fn test_multiline_change() { + let old = "a\nb\nc\nd\n"; + let new = "a\nB\nC\nd\n"; + let patch = "2,3c2,3\n< b\n< c\n---\n> B\n> C\n"; + + let diff = parse_normal(patch).unwrap(); + let (result, stats) = apply(old, &diff).unwrap(); + assert_eq!(result, new); + assert_eq!(stats.lines_added, 2); + assert_eq!(stats.lines_deleted, 2); + } +} diff --git a/test-data/normal-diff/add_only.diff b/test-data/normal-diff/add_only.diff new file mode 100644 index 0000000..9bbf87d --- /dev/null +++ b/test-data/normal-diff/add_only.diff @@ -0,0 +1,4 @@ +0a1 +> new first line +3a5 +> new last line diff --git a/test-data/normal-diff/change_and_add.diff b/test-data/normal-diff/change_and_add.diff new file mode 100644 index 0000000..8f6bd3d --- /dev/null +++ b/test-data/normal-diff/change_and_add.diff @@ -0,0 +1,6 @@ +2c2 +< This is the second line. +--- +> This is the modified second line. +5a6 +> An added sixth line. diff --git a/test-data/normal-diff/complex.diff b/test-data/normal-diff/complex.diff new file mode 100644 index 0000000..ebfbac6 --- /dev/null +++ b/test-data/normal-diff/complex.diff @@ -0,0 +1,14 @@ +1a2 +> #include +3,5c4,10 +< int main() { +< printf("Hello, World!\n"); +< return 0; +--- +> int main(int argc, char *argv[]) { +> if (argc > 1) { +> printf("Hello, %s!\n", argv[1]); +> } else { +> printf("Hello, World!\n"); +> } +> return EXIT_SUCCESS; diff --git a/test-data/normal-diff/delete_insert_delete.diff b/test-data/normal-diff/delete_insert_delete.diff new file mode 100644 index 0000000..51e8eee --- /dev/null +++ b/test-data/normal-diff/delete_insert_delete.diff @@ -0,0 +1,6 @@ +2d1 +< beta +5a5 +> inserted line +8d7 +< theta diff --git a/test-data/normal-diff/new1.txt b/test-data/normal-diff/new1.txt new file mode 100644 index 0000000..4877518 --- /dev/null +++ b/test-data/normal-diff/new1.txt @@ -0,0 +1,6 @@ +This is the first line. +This is the modified second line. +This is the third line. +This is the fourth line. +This is the fifth line. +An added sixth line. diff --git a/test-data/normal-diff/new2.txt b/test-data/normal-diff/new2.txt new file mode 100644 index 0000000..49d1a8d --- /dev/null +++ b/test-data/normal-diff/new2.txt @@ -0,0 +1,7 @@ +alpha +gamma +delta +epsilon +inserted line +zeta +eta diff --git a/test-data/normal-diff/new3.txt b/test-data/normal-diff/new3.txt new file mode 100644 index 0000000..317c9fe --- /dev/null +++ b/test-data/normal-diff/new3.txt @@ -0,0 +1,5 @@ +new first line +line 1 +line 2 +line 3 +new last line diff --git a/test-data/normal-diff/new4.txt b/test-data/normal-diff/new4.txt new file mode 100644 index 0000000..6d6b8b9 --- /dev/null +++ b/test-data/normal-diff/new4.txt @@ -0,0 +1,11 @@ +#include +#include + +int main(int argc, char *argv[]) { + if (argc > 1) { + printf("Hello, %s!\n", argv[1]); + } else { + printf("Hello, World!\n"); + } + return EXIT_SUCCESS; +} diff --git a/test-data/normal-diff/old1.txt b/test-data/normal-diff/old1.txt new file mode 100644 index 0000000..69aaf3f --- /dev/null +++ b/test-data/normal-diff/old1.txt @@ -0,0 +1,5 @@ +This is the first line. +This is the second line. +This is the third line. +This is the fourth line. +This is the fifth line. diff --git a/test-data/normal-diff/old2.txt b/test-data/normal-diff/old2.txt new file mode 100644 index 0000000..1e395f2 --- /dev/null +++ b/test-data/normal-diff/old2.txt @@ -0,0 +1,8 @@ +alpha +beta +gamma +delta +epsilon +zeta +eta +theta diff --git a/test-data/normal-diff/old3.txt b/test-data/normal-diff/old3.txt new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/test-data/normal-diff/old3.txt @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 diff --git a/test-data/normal-diff/old4.txt b/test-data/normal-diff/old4.txt new file mode 100644 index 0000000..f26b97c --- /dev/null +++ b/test-data/normal-diff/old4.txt @@ -0,0 +1,6 @@ +#include + +int main() { + printf("Hello, World!\n"); + return 0; +} From 6342ee5c38965dfb253cb39d226a5557ba877ed9 Mon Sep 17 00:00:00 2001 From: Wolf Vollprecht Date: Thu, 12 Feb 2026 17:41:30 +0100 Subject: [PATCH 2/2] fmt --- src/apply.rs | 66 ++++++++++++++++++++++++++++++++++++++++++++++++ src/patch/mod.rs | 35 ++++++++++++++++++++++--- 2 files changed, 97 insertions(+), 4 deletions(-) diff --git a/src/apply.rs b/src/apply.rs index 4477e6a..e018def 100644 --- a/src/apply.rs +++ b/src/apply.rs @@ -1040,4 +1040,70 @@ mod test { .join("\n"); insta::assert_snapshot!(relevant_lines); } + + #[test] + fn test_create_new_file() { + // Test creating a new file (original is /dev/null) + let patch = r#"--- /dev/null ++++ b/new_file.txt +@@ -0,0 +1,3 @@ ++line 1 ++line 2 ++line 3 +"#; + let diff = Diff::from_str(patch).unwrap(); + + // Check parsing + assert_eq!( + diff.original(), + None, + "original should be None for /dev/null" + ); + assert!(diff.modified().is_some()); + + // Apply to empty base (since file doesn't exist yet) + let (content, stats) = apply("", &diff).expect("Should apply file creation patch"); + + assert_eq!(content, "line 1\nline 2\nline 3\n"); + assert_eq!(stats.lines_added, 3); + assert_eq!(stats.lines_deleted, 0); + assert!(stats.has_changes()); + } + + #[test] + fn test_create_file_multi_patch() { + // Test case from user report: multi-file patch that creates new files + // All files have "--- /dev/null" as the original + let base_folder = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("test-data") + .join("create-file"); + let patch = std::fs::read_to_string(base_folder.join("create_file.patch")).unwrap(); + let diffs = crate::patch::patch_from_str(&patch).unwrap(); + + // This is a multi-file patch that creates new files + assert_eq!(diffs.len(), 9, "Should parse 9 diffs"); + + for (i, single_diff) in diffs.iter().enumerate() { + // For file creation, original should be None (from /dev/null) + assert_eq!( + single_diff.original(), + None, + "Diff {} original should be None for file creation", + i + ); + + // Apply to empty base image (since file doesn't exist) + let (content, stats) = + apply("", single_diff).expect(&format!("Diff {} should apply successfully", i)); + + // All files should have content (lines added > 0) + assert!(stats.lines_added > 0, "Diff {} should add lines", i); + assert_eq!(stats.lines_deleted, 0, "Diff {} should not delete lines", i); + assert!(!content.is_empty(), "Diff {} should produce content", i); + } + + // Snapshot the first file's content + let (content, _) = apply("", &diffs[0]).unwrap(); + insta::assert_snapshot!(content); + } } diff --git a/src/patch/mod.rs b/src/patch/mod.rs index 48da198..29cc03b 100644 --- a/src/patch/mod.rs +++ b/src/patch/mod.rs @@ -1,5 +1,6 @@ mod format; mod parse; +pub mod parse_normal; pub use format::PatchFormatter; pub use parse::{HunkRangeStrategy, ParsePatchError, ParserConfig}; @@ -85,8 +86,15 @@ impl + ToOwned + ?Sized> Diff<'_, T> { } } +/// Parse a patch from a string. +/// +/// Automatically detects whether the input is in unified or normal diff format. pub fn patch_from_str(input: &str) -> Result, ParsePatchError> { - parse::parse_multiple(input) + if parse_normal::is_normal_diff(input) { + parse_normal::parse_normal_multiple(input) + } else { + parse::parse_multiple(input) + } } pub fn patch_from_str_with_config( @@ -96,8 +104,15 @@ pub fn patch_from_str_with_config( parse::parse_multiple_with_config(input, config) } +/// Parse a patch from bytes. +/// +/// Automatically detects whether the input is in unified or normal diff format. pub fn patch_from_bytes(input: &[u8]) -> Result, ParsePatchError> { - parse::parse_bytes_multiple(input) + if parse_normal::is_normal_diff(input) { + parse_normal::parse_normal_bytes_multiple(input) + } else { + parse::parse_bytes_multiple(input) + } } pub fn patch_from_bytes_with_config( @@ -110,6 +125,8 @@ pub fn patch_from_bytes_with_config( impl<'a> Diff<'a, str> { /// Parse a `Patch` from a string /// + /// Automatically detects whether the input is in unified or normal diff format. + /// /// ``` /// use flickzeug::Diff; /// @@ -129,14 +146,24 @@ impl<'a> Diff<'a, str> { /// ``` #[allow(clippy::should_implement_trait)] pub fn from_str(s: &'a str) -> Result, ParsePatchError> { - parse::parse(s) + if parse_normal::is_normal_diff(s) { + parse_normal::parse_normal(s) + } else { + parse::parse(s) + } } } impl<'a> Diff<'a, [u8]> { /// Parse a `Patch` from bytes + /// + /// Automatically detects whether the input is in unified or normal diff format. pub fn from_bytes(s: &'a [u8]) -> Result, ParsePatchError> { - parse::parse_bytes(s) + if parse_normal::is_normal_diff(s) { + parse_normal::parse_normal_bytes(s) + } else { + parse::parse_bytes(s) + } } }