From 7583b0b864ecdf781a911fa252087c315e385cd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Mon, 15 Dec 2025 10:56:46 +0100 Subject: [PATCH 1/9] Replace ArrayPool with direct char[] allocation for newline appending ArrayPool buffers can be reused with different content, which breaks Onigwrap's regex search caching (uses memory reference equality). Direct allocation creates unique buffers that enable proper caching, improving overall tokenization performance. --- .../Internal/Grammars/Grammar.cs | 59 ++++++++----------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/src/TextMateSharp/Internal/Grammars/Grammar.cs b/src/TextMateSharp/Internal/Grammars/Grammar.cs index ff277ea..1828f9c 100644 --- a/src/TextMateSharp/Internal/Grammars/Grammar.cs +++ b/src/TextMateSharp/Internal/Grammars/Grammar.cs @@ -255,46 +255,37 @@ private object Tokenize(ReadOnlyMemory lineText, StateStack prevState, boo } // Check if we need to append newline - char[] rentedBuffer = null; - ReadOnlyMemory effectiveLineText; - try + ReadOnlyMemory effectiveLineText; + if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') { - if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') - { - // Only add \n if the passed lineText didn't have it. - // Use ArrayPool to avoid per-line allocation - int requiredLength = lineText.Length + 1; - rentedBuffer = ArrayPool.Shared.Rent(requiredLength); - lineText.Span.CopyTo(rentedBuffer); - rentedBuffer[lineText.Length] = '\n'; - effectiveLineText = rentedBuffer.AsMemory(0, requiredLength); - } - else - { - effectiveLineText = lineText; - } + // Only add \n if the passed lineText didn't have it. + // We need to allocate a new buffer with the newline + char[] buffer = new char[lineText.Length + 1]; + lineText.Span.CopyTo(buffer); + buffer[lineText.Length] = '\n'; + effectiveLineText = buffer.AsMemory(); + } + else + { + effectiveLineText = lineText; + } - int lineLength = effectiveLineText.Length; - LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors); - TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState, - lineTokens, true, timeLimit); + int lineLength = effectiveLineText.Length; + LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, + _balancedBracketSelectors); + TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, + prevState, + lineTokens, true, timeLimit); - if (emitBinaryTokens) - { - return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), - tokenizeResult.Stack, tokenizeResult.StoppedEarly); - } - return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), - tokenizeResult.Stack, tokenizeResult.StoppedEarly); - } - finally + if (emitBinaryTokens) { - if (rentedBuffer != null) - { - ArrayPool.Shared.Return(rentedBuffer); - } + return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), + tokenizeResult.Stack, tokenizeResult.StoppedEarly); } + + return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), + tokenizeResult.Stack, tokenizeResult.StoppedEarly); } private void GenerateRootId() From dabf7038ff47afd9d95ec453186a626781e63057 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Mon, 15 Dec 2025 10:59:15 +0100 Subject: [PATCH 2/9] Remove unused using --- src/TextMateSharp/Internal/Grammars/Grammar.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/TextMateSharp/Internal/Grammars/Grammar.cs b/src/TextMateSharp/Internal/Grammars/Grammar.cs index 1828f9c..fe09793 100644 --- a/src/TextMateSharp/Internal/Grammars/Grammar.cs +++ b/src/TextMateSharp/Internal/Grammars/Grammar.cs @@ -1,5 +1,4 @@ using System; -using System.Buffers; using System.Collections.Generic; using TextMateSharp.Grammars; From 7df39789a2beb3ed5f92a96dc86ec8dbb09083c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Mon, 15 Dec 2025 11:01:04 +0100 Subject: [PATCH 3/9] Add some docs to public interfaces --- src/TextMateSharp/Model/IModelLines.cs | 56 ++++++++++++++++++++++++++ src/TextMateSharp/Model/ITMModel.cs | 41 ++++++++++++++++++- 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/src/TextMateSharp/Model/IModelLines.cs b/src/TextMateSharp/Model/IModelLines.cs index 80f7568..04972a6 100644 --- a/src/TextMateSharp/Model/IModelLines.cs +++ b/src/TextMateSharp/Model/IModelLines.cs @@ -4,17 +4,73 @@ namespace TextMateSharp.Model { + /// + /// Represents a document model that provides line-based access for TextMate tokenization. + /// public interface IModelLines { + /// + /// Notifies that a new line has been added at the specified index. + /// + /// The zero-based index where the line was added. void AddLine(int lineIndex); + + /// + /// Notifies that a line has been removed at the specified index. + /// + /// The zero-based index of the removed line. void RemoveLine(int lineIndex); + + /// + /// Notifies that the content of a line has changed. + /// + /// The zero-based index of the updated line. void UpdateLine(int lineIndex); + + /// + /// Gets the number of model lines currently tracked. + /// int GetSize(); + + /// + /// Gets the model line at the specified index. + /// + /// The zero-based index of the line. ModelLine Get(int lineIndex); + + /// + /// Executes an action for each model line. + /// + /// The action to execute on each line. void ForEach(Action action); + + /// + /// Gets the total number of lines in the document. + /// int GetNumberOfLines(); + + /// + /// Gets the text content of the line at the specified index. + /// + /// The zero-based index of the line. + /// The line text wrapped in a structure. + /// + /// For optimal performance, the returned text should include the line terminator + /// (e.g., '\n' or "\r\n") if one exists. When line terminators are not included, + /// the tokenization engine will allocate a new buffer to append a newline character, + /// which impacts performance and memory usage. + /// LineText GetLineText(int lineIndex); + + /// + /// Gets the length of the line at the specified index. + /// + /// The zero-based index of the line. int GetLineLength(int lineIndex); + + /// + /// Releases resources used by this model. + /// void Dispose(); } } \ No newline at end of file diff --git a/src/TextMateSharp/Model/ITMModel.cs b/src/TextMateSharp/Model/ITMModel.cs index a957d74..09f9e1d 100644 --- a/src/TextMateSharp/Model/ITMModel.cs +++ b/src/TextMateSharp/Model/ITMModel.cs @@ -4,15 +4,54 @@ namespace TextMateSharp.Model { + /// + /// Represents a TextMate model that manages tokenization of a document. + /// The model coordinates between the document content and the grammar to produce tokens. + /// public interface ITMModel { + /// + /// Gets the grammar currently used for tokenization. + /// + /// The current grammar, or null if no grammar is set. IGrammar GetGrammar(); + + /// + /// Sets the grammar to use for tokenization. + /// Changing the grammar will invalidate existing tokens and trigger re-tokenization. + /// + /// The grammar to use for tokenization. void SetGrammar(IGrammar grammar); + + /// + /// Registers a listener to be notified when tokens change in the model. + /// + /// The listener to receive token change notifications. void AddModelTokensChangedListener(IModelTokensChangedListener listener); + + /// + /// Removes a previously registered token change listener. + /// + /// The listener to remove. void RemoveModelTokensChangedListener(IModelTokensChangedListener listener); + + /// + /// Releases resources used by this model and stops background tokenization. + /// void Dispose(); + + /// + /// Gets the tokens for a specific line. + /// + /// The zero-based line index. + /// A list of tokens for the specified line, or null if the line has not been tokenized yet. List GetLineTokens(int line); - void ForceTokenization(int lineIndex); + /// + /// Forces immediate tokenization of a specific line, bypassing the background tokenization queue. + /// Use this when you need tokens for a line immediately (e.g., for visible lines in the viewport). + /// + /// The zero-based index of the line to tokenize. + void ForceTokenization(int lineIndex); } } \ No newline at end of file From 3c68def85881c41e81bc8a3b351665f5773956af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Mon, 15 Dec 2025 11:25:33 +0100 Subject: [PATCH 4/9] Updated the sample to use ReadonlyMemory --- src/TextMateSharp.Demo/Program.cs | 83 +++++++++++++++++-------------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/src/TextMateSharp.Demo/Program.cs b/src/TextMateSharp.Demo/Program.cs index eed263a..eb8febf 100644 --- a/src/TextMateSharp.Demo/Program.cs +++ b/src/TextMateSharp.Demo/Program.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Generic; using System.Globalization; using System.IO; @@ -53,44 +54,39 @@ static void Main(string[] args) IStateStack? ruleStack = null; - using (StreamReader sr = new StreamReader(fileToParse)) + string fileContent = File.ReadAllText(fileToParse); + ReadOnlyMemory contentMemory = fileContent.AsMemory(); + + foreach (var lineRange in GetLineRanges(fileContent)) { - string? line = sr.ReadLine(); + ReadOnlyMemory lineMemory = contentMemory.Slice(lineRange.Start, lineRange.Length); + ITokenizeLineResult result = grammar.TokenizeLine(lineMemory, ruleStack, TimeSpan.MaxValue); + + ruleStack = result.RuleStack; - while (line != null) + foreach (IToken token in result.Tokens) { - ITokenizeLineResult result = grammar.TokenizeLine(line, ruleStack, TimeSpan.MaxValue); + int startIndex = Math.Min(token.StartIndex, lineRange.Length); + int endIndex = Math.Min(token.EndIndex, lineRange.Length); - ruleStack = result.RuleStack; + int foreground = -1; + int background = -1; + FontStyle fontStyle = FontStyle.NotSet; - foreach (IToken token in result.Tokens) + foreach (var themeRule in theme.Match(token.Scopes)) { - int startIndex = (token.StartIndex > line.Length) ? - line.Length : token.StartIndex; - int endIndex = (token.EndIndex > line.Length) ? - line.Length : token.EndIndex; - - int foreground = -1; - int background = -1; - FontStyle fontStyle = FontStyle.NotSet; - - foreach (var themeRule in theme.Match(token.Scopes)) - { - if (foreground == -1 && themeRule.foreground > 0) - foreground = themeRule.foreground; + if (foreground == -1 && themeRule.foreground > 0) + foreground = themeRule.foreground; - if (background == -1 && themeRule.background > 0) - background = themeRule.background; + if (background == -1 && themeRule.background > 0) + background = themeRule.background; - if (fontStyle == FontStyle.NotSet && themeRule.fontStyle > 0) - fontStyle = themeRule.fontStyle; - } - - WriteToken(line.SubstringAtIndexes(startIndex, endIndex), foreground, background, fontStyle, theme); + if (fontStyle == FontStyle.NotSet && themeRule.fontStyle > 0) + fontStyle = themeRule.fontStyle; } - Console.WriteLine(); - line = sr.ReadLine(); + ReadOnlySpan tokenSpan = lineMemory.Span.Slice(startIndex, endIndex - startIndex); + WriteToken(tokenSpan, foreground, background, fontStyle, theme); } } @@ -113,11 +109,12 @@ static void Main(string[] args) Console.WriteLine("ERROR: " + ex.Message); } } - static void WriteToken(string text, int foreground, int background, FontStyle fontStyle, Theme theme) + + static void WriteToken(ReadOnlySpan text, int foreground, int background, FontStyle fontStyle, Theme theme) { if (foreground == -1) { - Console.Write(text); + Console.Out.Write(text); return; } @@ -127,7 +124,8 @@ static void WriteToken(string text, int foreground, int background, FontStyle fo Color foregroundColor = GetColor(foreground, theme); Style style = new Style(foregroundColor, backgroundColor, decoration); - Markup markup = new Markup(text.Replace("[", "[[").Replace("]", "]]"), style); + string textStr = text.ToString(); + Markup markup = new Markup(textStr.Replace("[", "[[").Replace("]", "]]"), style); AnsiConsole.Write(markup); } @@ -173,13 +171,26 @@ static Color HexToColor(string hexString) return new Color(r, g, b); } - } - internal static class StringExtensions - { - internal static string SubstringAtIndexes(this string str, int startIndex, int endIndex) + static IEnumerable<(int Start, int Length)> GetLineRanges(string content) { - return str.Substring(startIndex, endIndex - startIndex); + int lineStart = 0; + + for (int i = 0; i < content.Length; i++) + { + if (content[i] == '\n') + { + int lineLength = i - lineStart + 1; // Include the \n + yield return (lineStart, lineLength); + lineStart = i + 1; + } + } + + // Handle last line without terminator + if (lineStart < content.Length) + { + yield return (lineStart, content.Length - lineStart); + } } } } From 2f9f3dd1fe712878183572c2d542333d5d928592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Mon, 15 Dec 2025 11:30:30 +0100 Subject: [PATCH 5/9] Fix line breaks --- src/TextMateSharp.Demo/Program.cs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/TextMateSharp.Demo/Program.cs b/src/TextMateSharp.Demo/Program.cs index eb8febf..1d8b8e7 100644 --- a/src/TextMateSharp.Demo/Program.cs +++ b/src/TextMateSharp.Demo/Program.cs @@ -57,8 +57,12 @@ static void Main(string[] args) string fileContent = File.ReadAllText(fileToParse); ReadOnlyMemory contentMemory = fileContent.AsMemory(); + bool needsedLineBreak = true; + foreach (var lineRange in GetLineRanges(fileContent)) { + needsedLineBreak = true; + ReadOnlyMemory lineMemory = contentMemory.Slice(lineRange.Start, lineRange.Length); ITokenizeLineResult result = grammar.TokenizeLine(lineMemory, ruleStack, TimeSpan.MaxValue); @@ -87,7 +91,13 @@ static void Main(string[] args) ReadOnlySpan tokenSpan = lineMemory.Span.Slice(startIndex, endIndex - startIndex); WriteToken(tokenSpan, foreground, background, fontStyle, theme); + + if (tokenSpan.IndexOf('\n') != -1) + needsedLineBreak = false; } + + if (needsedLineBreak) + Console.WriteLine(); } var colorDictionary = theme.GetGuiColorDictionary(); From c5801116bac49b879ce739b74b1405fcddb80564 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Mon, 15 Dec 2025 11:40:50 +0100 Subject: [PATCH 6/9] Fixed typo --- src/TextMateSharp.Demo/Program.cs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/TextMateSharp.Demo/Program.cs b/src/TextMateSharp.Demo/Program.cs index 1d8b8e7..f0235f2 100644 --- a/src/TextMateSharp.Demo/Program.cs +++ b/src/TextMateSharp.Demo/Program.cs @@ -57,11 +57,9 @@ static void Main(string[] args) string fileContent = File.ReadAllText(fileToParse); ReadOnlyMemory contentMemory = fileContent.AsMemory(); - bool needsedLineBreak = true; - foreach (var lineRange in GetLineRanges(fileContent)) { - needsedLineBreak = true; + bool needsLineBreak = true; ReadOnlyMemory lineMemory = contentMemory.Slice(lineRange.Start, lineRange.Length); ITokenizeLineResult result = grammar.TokenizeLine(lineMemory, ruleStack, TimeSpan.MaxValue); @@ -93,10 +91,10 @@ static void Main(string[] args) WriteToken(tokenSpan, foreground, background, fontStyle, theme); if (tokenSpan.IndexOf('\n') != -1) - needsedLineBreak = false; + needsLineBreak = false; } - if (needsedLineBreak) + if (needsLineBreak) Console.WriteLine(); } From 84b09bfb72e35243db68e4f0df2601d843ab2e57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Mon, 15 Dec 2025 14:15:50 +0100 Subject: [PATCH 7/9] Rename method for clarity --- src/TextMateSharp.Tests/Model/TMModelTests.cs | 7 ++++--- src/TextMateSharp/Model/AbstractLineList.cs | 2 +- src/TextMateSharp/Model/IModelLines.cs | 2 +- src/TextMateSharp/Model/TMModel.cs | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/TextMateSharp.Tests/Model/TMModelTests.cs b/src/TextMateSharp.Tests/Model/TMModelTests.cs index 5e6f901..bb743fa 100644 --- a/src/TextMateSharp.Tests/Model/TMModelTests.cs +++ b/src/TextMateSharp.Tests/Model/TMModelTests.cs @@ -1,4 +1,5 @@ -using Moq; +using System; +using Moq; using NUnit.Framework; @@ -125,9 +126,9 @@ public override int GetLineLength(int lineIndex) { return _lines[lineIndex].Length; } - public override LineText GetLineText(int lineIndex) + public override LineText GetLineTextIncludingTerminators(int lineIndex) { - return _lines[lineIndex]; + return _lines[lineIndex] + Environment.NewLine; } public override int GetNumberOfLines() { diff --git a/src/TextMateSharp/Model/AbstractLineList.cs b/src/TextMateSharp/Model/AbstractLineList.cs index fb255da..2046d13 100644 --- a/src/TextMateSharp/Model/AbstractLineList.cs +++ b/src/TextMateSharp/Model/AbstractLineList.cs @@ -96,7 +96,7 @@ public int GetSize() public abstract int GetNumberOfLines(); - public abstract LineText GetLineText(int lineIndex); + public abstract LineText GetLineTextIncludingTerminators(int lineIndex); public abstract int GetLineLength(int lineIndex); diff --git a/src/TextMateSharp/Model/IModelLines.cs b/src/TextMateSharp/Model/IModelLines.cs index 04972a6..c38c62d 100644 --- a/src/TextMateSharp/Model/IModelLines.cs +++ b/src/TextMateSharp/Model/IModelLines.cs @@ -60,7 +60,7 @@ public interface IModelLines /// the tokenization engine will allocate a new buffer to append a newline character, /// which impacts performance and memory usage. /// - LineText GetLineText(int lineIndex); + LineText GetLineTextIncludingTerminators(int lineIndex); /// /// Gets the length of the line at the specified index. diff --git a/src/TextMateSharp/Model/TMModel.cs b/src/TextMateSharp/Model/TMModel.cs index bafe0eb..f2d55ec 100644 --- a/src/TextMateSharp/Model/TMModel.cs +++ b/src/TextMateSharp/Model/TMModel.cs @@ -195,7 +195,7 @@ public int UpdateTokensInRange(ModelTokensChangedEventBuilder eventBuilder, int ModelLine modeLine = model._lines.Get(lineIndex); try { - text = model._lines.GetLineText(lineIndex); + text = model._lines.GetLineTextIncludingTerminators(lineIndex); // Tokenize only the first X characters r = model._tokenizer.Tokenize(text, modeLine.State, 0, MAX_LEN_TO_TOKENIZE, stopLineTokenizationAfter); } From 8d2fc1fc5b379b6ce3b84e4408d4a42f5e0e9c06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Mon, 15 Dec 2025 18:28:35 +0100 Subject: [PATCH 8/9] Update the benchmark test to use ReadonlyMemory --- .../BigFileTokenizationBenchmark.cs | 35 ++++++++++++++++--- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs b/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs index c77c547..6a0748d 100644 --- a/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs +++ b/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Generic; using System.IO; using BenchmarkDotNet.Attributes; @@ -11,7 +12,7 @@ namespace TextMateSharp.Benchmarks public class BigFileTokenizationBenchmark { private IGrammar _grammar = null!; - private string[] _lines = null!; + private string _content = null!; [GlobalSetup] public void Setup() @@ -40,8 +41,8 @@ public void Setup() // Load the file into memory - _lines = File.ReadAllLines(bigFilePath); - Console.WriteLine($"Loaded {_lines.Length} lines from bigfile.cs"); + _content = File.ReadAllText(bigFilePath); + Console.WriteLine($"Loaded bigfile.cs"); // Load the C# grammar RegistryOptions options = new RegistryOptions(ThemeName.DarkPlus); @@ -60,14 +61,38 @@ public int TokenizeAllLines() int totalTokens = 0; IStateStack? ruleStack = null; - for (int i = 0; i < _lines.Length; i++) + ReadOnlyMemory contentMemory = _content.AsMemory(); + + foreach (var lineRange in GetLineRanges(_content)) { - ITokenizeLineResult result = _grammar.TokenizeLine(_lines[i], ruleStack, TimeSpan.MaxValue); + ReadOnlyMemory lineMemory = contentMemory.Slice(lineRange.Start, lineRange.Length); + ITokenizeLineResult result = _grammar.TokenizeLine(lineMemory, ruleStack, TimeSpan.MaxValue); ruleStack = result.RuleStack; totalTokens += result.Tokens.Length; } return totalTokens; } + + static IEnumerable<(int Start, int Length)> GetLineRanges(string content) + { + int lineStart = 0; + + for (int i = 0; i < content.Length; i++) + { + if (content[i] == '\n') + { + int lineLength = i - lineStart + 1; // Include the \n + yield return (lineStart, lineLength); + lineStart = i + 1; + } + } + + // Handle last line without terminator + if (lineStart < content.Length) + { + yield return (lineStart, content.Length - lineStart); + } + } } } From 27c5e2daf9f7194691a145d0ff09f1fc511cbb71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Tue, 16 Dec 2025 09:11:05 +0100 Subject: [PATCH 9/9] Update to Onigwrap 1.0.10 --- build/Directory.Build.props | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/Directory.Build.props b/build/Directory.Build.props index 7d9da74..627bad8 100644 --- a/build/Directory.Build.props +++ b/build/Directory.Build.props @@ -3,6 +3,6 @@ latest true 8.0.5 - 1.0.9 + 1.0.10