diff --git a/src/TextMateSharp.Tests/Model/TMModelTests.cs b/src/TextMateSharp.Tests/Model/TMModelTests.cs index bb743fa..0eee169 100644 --- a/src/TextMateSharp.Tests/Model/TMModelTests.cs +++ b/src/TextMateSharp.Tests/Model/TMModelTests.cs @@ -1,11 +1,10 @@ -using System; -using Moq; - +using Moq; using NUnit.Framework; - +using System; +using System.Diagnostics; using System.IO; +using System.Threading; using System.Threading.Tasks; - using TextMateSharp.Grammars; using TextMateSharp.Model; using TextMateSharp.Tests.Resources; @@ -95,7 +94,156 @@ public void TMModel_Should_Emit_ModelTokensChangedEvent_To_Clean_Highlighted_Lin } - static bool IsRangeValid(ModelTokensChangedEvent e, int fromLine, int toLine) + [Test] + public async Task TMModel_Dispose_Should_Be_NonBlocking() + { + // arrange + await using Stream stream = ResourceReader.OpenStream("sample.cs"); + using StreamReader reader = new StreamReader(stream); + + ModelLinesMock modelLines = new ModelLinesMock(reader.ReadToEnd().Split("\n")); + TMModel tmModel = new TMModel(modelLines); + + RegistryOptions options = new RegistryOptions(ThemeName.DarkPlus); + Registry.Registry registry = new Registry.Registry(options); + IGrammar grammar = registry.LoadGrammar("source.cs"); + + TaskCompletionSource firstCallback = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + CountingListener listener = new CountingListener(firstCallback); + + tmModel.SetGrammar(grammar); + tmModel.AddModelTokensChangedListener(listener); + + try + { + Task completed = await Task.WhenAny(firstCallback.Task, Task.Delay(5000)); + Assert.AreSame(firstCallback.Task, completed, "Timed out waiting for first tokenization callback."); + + // act + Stopwatch stopwatch = Stopwatch.StartNew(); + tmModel.Dispose(); + stopwatch.Stop(); + + // assert + const int maxDisposeTimeMs = 250; // threshold (in milliseconds) for considering Dispose() effectively non-blocking + Assert.Less(stopwatch.ElapsedMilliseconds, maxDisposeTimeMs, "Dispose() should be best-effort and non-blocking."); + } + finally + { + // Cleanup, Dispose is expected to be idempotent + tmModel.Dispose(); + } + } + + [Test] + public void TMModel_Dispose_Should_Be_Idempotent() + { + // arrange + ModelLinesMock modelLines = new ModelLinesMock(new string[] { "line 1" }); + TMModel tmModel = new TMModel(modelLines); + + // act + tmModel.Dispose(); + + // assert + Assert.DoesNotThrow(() => tmModel.Dispose()); + } + + [Test] + public async Task TMModel_Should_Stop_Emitting_After_Last_Listener_Removed() + { + // arrange + await using Stream stream = ResourceReader.OpenStream("sample.cs"); + using StreamReader reader = new StreamReader(stream); + + ModelLinesMock modelLines = new ModelLinesMock(reader.ReadToEnd().Split("\n")); + TMModel tmModel = new TMModel(modelLines); + + RegistryOptions options = new RegistryOptions(ThemeName.DarkPlus); + Registry.Registry registry = new Registry.Registry(options); + IGrammar grammar = registry.LoadGrammar("source.cs"); + + TaskCompletionSource firstCallback = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + CountingListener listener = new CountingListener(firstCallback); + + tmModel.SetGrammar(grammar); + tmModel.AddModelTokensChangedListener(listener); + + try + { + Task completed = await Task.WhenAny(firstCallback.Task, Task.Delay(5000)); + Assert.AreSame(firstCallback.Task, completed, "Timed out waiting for first tokenization callback."); + + int countAtRemove = listener.CallbackCount; + + // act + tmModel.RemoveModelTokensChangedListener(listener); + + const int delayBetweenInvalidationsMs = 10; + const int maxInvalidations = 50; + for (int invalidationCount = 0; invalidationCount < maxInvalidations; invalidationCount++) + { + tmModel.InvalidateLine(0); + await Task.Delay(delayBetweenInvalidationsMs); + } + + await Task.Delay(250); + + int countAfter = listener.CallbackCount; + + // assert + Assert.LessOrEqual( + countAfter, + countAtRemove + 1, + "At most one in-flight callback is acceptable after listener removal; additional callbacks indicate tokenization continued."); + } + finally + { + // Cleanup, Dispose is expected to be idempotent + tmModel.Dispose(); + } + } + + [Test] + public async Task TMModel_SetGrammar_FlipLoop_Should_Not_Deadlock_And_Should_Emit_ClearEvents() + { + // arrange + ModelLinesMock modelLines = new ModelLinesMock(new string[] { "line 1", "line 2", "line 3" }); + TMModel tmModel = new TMModel(modelLines); + + RegistryOptions options = new RegistryOptions(ThemeName.DarkPlus); + Registry.Registry registry = new Registry.Registry(options); + IGrammar grammar = registry.LoadGrammar("source.cs"); + + TaskCompletionSource sawClear = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + CountingClearListener listener = new CountingClearListener(sawClear); + + tmModel.AddModelTokensChangedListener(listener); + + try + { + // act + const int maxFlipIterations = 50; + for (int flipCount = 0; flipCount < maxFlipIterations; flipCount++) + { + tmModel.SetGrammar(grammar); + tmModel.SetGrammar(null); + } + + Task completed = await Task.WhenAny(sawClear.Task, Task.Delay(5000)); + + // assert + Assert.AreSame(sawClear.Task, completed, "Timed out waiting for a clear-highlight event after SetGrammar(null)."); + Assert.GreaterOrEqual(listener.ClearEventCount, 1, "Expected at least one clear-highlight event."); + } + finally + { + // Cleanup, Dispose is expected to be idempotent + tmModel.Dispose(); + } + } + + private static bool IsRangeValid(ModelTokensChangedEvent e, int fromLine, int toLine) { if (e.Ranges.Count != 1) return false; @@ -109,51 +257,111 @@ static bool IsRangeValid(ModelTokensChangedEvent e, int fromLine, int toLine) return true; } + internal sealed class CountingListener : IModelTokensChangedListener + { + private readonly TaskCompletionSource _firstCallback; + private int _callbackCount; + + internal CountingListener(TaskCompletionSource firstCallback) + { + _firstCallback = firstCallback; + } + + internal int CallbackCount + { + get { return Volatile.Read(ref _callbackCount); } + } + + void IModelTokensChangedListener.ModelTokensChanged(ModelTokensChangedEvent e) + { + Interlocked.Increment(ref _callbackCount); + _firstCallback.TrySetResult(true); + } + } + + internal sealed class CountingClearListener : IModelTokensChangedListener + { + // This test creates a 3-line model; the "clear highlight" event is expected to cover the full document range. + private const int FirstLineIndex = 0; + private const int LastLineIndex = 2; + + private readonly TaskCompletionSource _sawClear; + private int _clearEventCount; + + internal CountingClearListener(TaskCompletionSource sawClear) + { + _sawClear = sawClear; + } + + internal int ClearEventCount + { + get { return Volatile.Read(ref _clearEventCount); } + } + + void IModelTokensChangedListener.ModelTokensChanged(ModelTokensChangedEvent e) + { + if (e.Ranges.Count == 1 + && e.Ranges[0].FromLineNumber == FirstLineIndex + && e.Ranges[0].ToLineNumber == LastLineIndex) + { + Interlocked.Increment(ref _clearEventCount); + _sawClear.TrySetResult(true); + } + } + } + class ModelLinesMock : AbstractLineList { string[] _lines; + internal ModelLinesMock(string[] lines) { _lines = lines; for (int i = 0; i < lines.Length; i++) AddLine(i); } + public override void Dispose() { - } + public override int GetLineLength(int lineIndex) { return _lines[lineIndex].Length; } + public override LineText GetLineTextIncludingTerminators(int lineIndex) { return _lines[lineIndex] + Environment.NewLine; } + public override int GetNumberOfLines() { return _lines.Length; } + public override void UpdateLine(int lineIndex) { // no op } } + internal class ModelTokensChangedListenerMock : IModelTokensChangedListener { internal volatile bool Finished; internal volatile int LastParsedLine; private readonly int _lineCount; + internal ModelTokensChangedListenerMock(int lineCount) { _lineCount = lineCount; } + void IModelTokensChangedListener.ModelTokensChanged(ModelTokensChangedEvent e) { foreach (var range in e.Ranges) { LastParsedLine = range.ToLineNumber; - if (LastParsedLine >= _lineCount) { Finished = true; diff --git a/src/TextMateSharp/Model/AbstractLineList.cs b/src/TextMateSharp/Model/AbstractLineList.cs index 2046d13..f7e9605 100644 --- a/src/TextMateSharp/Model/AbstractLineList.cs +++ b/src/TextMateSharp/Model/AbstractLineList.cs @@ -5,6 +5,21 @@ namespace TextMateSharp.Model { + /// + /// Base implementation of providing a thread-safe internal list. + /// + /// + /// + /// Threading: This class serializes access to its internal line list via . + /// + /// + /// Important: Do not call back into (e.g. via , + /// , or ) while holding . + /// may hold its own internal lock while calling + /// or , which can acquire ; + /// reversing that order can deadlock. + /// + /// public abstract class AbstractLineList : IModelLines { private IList _list = new List(); @@ -18,10 +33,11 @@ public AbstractLineList() public void SetModel(TMModel model) { this._model = model; - lock(mLock) + lock (mLock) { - foreach (var line in _list) + for (int i = 0; i < _list.Count; i++) { + ModelLine line = _list[i]; line.IsInvalid = true; } } @@ -58,11 +74,38 @@ public void ForEach(Action action) { lock (mLock) { - foreach (ModelLine modelLine in _list) + for (int i = 0; i < _list.Count; i++) + { + ModelLine modelLine = _list[i]; action(modelLine); + } } } + /// + /// Notifies the owning that a single line must be (re)tokenized. + /// + /// + /// + /// Threading / lock ordering: This method calls back into , which may acquire TMModel's + /// internal lock. + /// + /// + /// Do not call this method while holding (or from within , + /// , , or callbacks that + /// execute under ), otherwise you can create a lock-order inversion deadlock + /// (mLock -> TMModel lock) with the tokenizer thread (TMModel lock -> mLock). + /// + /// + /// Safe pattern for derived classes: + /// + /// lock(mLock): mutate the internal list and compute affected ranges. + /// Release . + /// Call / / . + /// + /// + /// + /// Zero-based line index to invalidate. protected void InvalidateLine(int lineIndex) { if (_model != null) @@ -71,6 +114,14 @@ protected void InvalidateLine(int lineIndex) } } + /// + /// Notifies the owning that a range of lines must be (re)tokenized. + /// + /// + /// + /// + /// Zero-based start line index (inclusive). + /// Zero-based end line index (inclusive). protected void InvalidateLineRange(int iniLineIndex, int endLineIndex) { if (_model != null) @@ -79,6 +130,14 @@ protected void InvalidateLineRange(int iniLineIndex, int endLineIndex) } } + /// + /// Forces synchronous tokenization over a range of lines by delegating to the owning . + /// + /// + /// + /// + /// Zero-based start line index (inclusive). + /// Zero-based end line index (inclusive). protected void ForceTokenization(int startLineIndex, int endLineIndex) { if (_model != null) @@ -98,10 +157,28 @@ public int GetSize() public abstract LineText GetLineTextIncludingTerminators(int lineIndex); - public abstract int GetLineLength(int lineIndex); - - public abstract void Dispose(); - - object mLock = new object(); + public abstract int GetLineLength(int lineIndex); + + public abstract void Dispose(); + + /// + /// Lock protecting the internal structure. + /// + /// + /// + /// Threading / lock-ordering contract: This class protects its internal line list with . + /// has its own internal lock. + /// + /// + /// The tokenizer thread in may acquire TMModel's lock and then call into + /// , which acquires (lock order: TMModel lock -> mLock). + /// + /// + /// To avoid deadlocks, derived implementations must never re-enter (via + /// , , or ) + /// while holding (which would create mLock -> TMModel lock). + /// + /// + readonly object mLock = new object(); } } \ No newline at end of file diff --git a/src/TextMateSharp/Model/ITMModel.cs b/src/TextMateSharp/Model/ITMModel.cs index 09f9e1d..06caec4 100644 --- a/src/TextMateSharp/Model/ITMModel.cs +++ b/src/TextMateSharp/Model/ITMModel.cs @@ -1,3 +1,4 @@ +using System; using System.Collections.Generic; using TextMateSharp.Grammars; @@ -7,8 +8,10 @@ namespace TextMateSharp.Model /// /// Represents a TextMate model that manages tokenization of a document. /// The model coordinates between the document content and the grammar to produce tokens. + /// Implementations of this interface also implement + /// and must properly dispose of any resources they hold. /// - public interface ITMModel + public interface ITMModel : IDisposable { /// /// Gets the grammar currently used for tokenization. @@ -35,11 +38,6 @@ public interface ITMModel /// The listener to remove. void RemoveModelTokensChangedListener(IModelTokensChangedListener listener); - /// - /// Releases resources used by this model and stops background tokenization. - /// - void Dispose(); - /// /// Gets the tokens for a specific line. /// diff --git a/src/TextMateSharp/Model/TMModel.cs b/src/TextMateSharp/Model/TMModel.cs index f2d55ec..341d469 100644 --- a/src/TextMateSharp/Model/TMModel.cs +++ b/src/TextMateSharp/Model/TMModel.cs @@ -2,22 +2,58 @@ using System.Collections.Generic; using System.Diagnostics; using System.Threading; - +using System.Threading.Tasks; using TextMateSharp.Grammars; namespace TextMateSharp.Model { + /// + /// Represents a tokenization model that manages the tokenization of lines of text using a specified grammar. + /// + /// + /// + /// The TMModel class provides mechanisms for tokenizing text lines based on a grammar, + /// supporting dynamic grammar updates, line invalidation, and asynchronous processing through a dedicated tokenizer + /// thread. It allows clients to listen for tokenization changes and ensures thread safety for concurrent + /// operations. The model is designed to be disposed of safely and will stop processing when disposed. Tokenization + /// is performed efficiently, with safeguards for long lines and performance-sensitive scenarios. + /// + /// + /// Threading model: The document is edited by the host (typically on the UI thread) via the supplied + /// implementation, while tokenization work is performed by an internal background + /// . + /// + /// + /// Synchronization: TMModel protects its own lifecycle and tokenization state with an internal lock (), + /// and protects its internal list with . + /// + /// + /// Lock ordering: To avoid deadlocks, TMModel code acquires before calling into + /// (which may acquire ), + /// i.e. -> . Implementations of + /// must not call back into TMModel + /// (e.g. via / ) + /// while holding . + /// + /// + /// Listeners: The collection is mutated under lock(listeners), but listener callbacks are invoked + /// outside that lock (using a snapshot) to avoid deadlocks and re-entrancy issues. + /// + /// public class TMModel : ITMModel { private const int MAX_LEN_TO_TOKENIZE = 10000; private IGrammar _grammar; - private List listeners; + private readonly List listeners; private Tokenizer _tokenizer; private TokenizerThread _thread; - private IModelLines _lines; - private Queue _invalidLines = new Queue(); - private object _lock = new object(); - private ManualResetEvent _resetEvent = new ManualResetEvent(false); + private readonly IModelLines _lines; + private readonly Queue _invalidLines = new Queue(); + private readonly object _lock = new object(); + private int _isDisposedFlag; // atomic disposed state (0 = not disposed, 1 = disposed) + private int _grammarEpoch; + + private bool IsDisposed => Volatile.Read(ref _isDisposedFlag) != 0; public TMModel(IModelLines lines) { @@ -28,103 +64,414 @@ public TMModel(IModelLines lines) public bool IsStopped { - get { return this._thread == null || this._thread.IsStopped; } + get + { + lock (_lock) + { + return this._thread == null || this._thread.IsStopped; + } + } } - class TokenizerThread + sealed class TokenizerThread : IDisposable { + // code depending on IsStopped alone may race. IsDisposed is the canonical + // disposed state and IsStopped is for informational purposes only public volatile bool IsStopped; private string name; private TMModel model; private TMState lastState; + private AutoResetEvent _workAvailable; + private CancellationTokenSource _cts; + private Task _task; + private int _isDisposedFlag; // atomic disposed state (0 = not disposed, 1 = disposed) + private readonly object _lifecycleLock = new object(); + + private bool IsDisposed => Volatile.Read(ref _isDisposedFlag) != 0; public TokenizerThread(string name, TMModel model) { this.name = name; this.model = model; this.IsStopped = true; + this._workAvailable = new AutoResetEvent(false); + this._cts = new CancellationTokenSource(); } public void Run() { - IsStopped = false; + CancellationToken token; + + if (IsDisposed) + return; + + lock (_lifecycleLock) + { + if (IsDisposed) + return; + + // Prevent double-start (or a start racing with DisposeAfterCompletion deciding _task is null). + if (_task != null) + return; - ThreadPool.QueueUserWorkItem(ThreadWorker); + CancellationTokenSource cts = _cts; + if (cts == null) + return; + + IsStopped = false; + token = cts.Token; + + _task = Task.Factory.StartNew( + () => ThreadWorker(token), + token, + TaskCreationOptions.DenyChildAttach, + TaskScheduler.Default); + } } public void Stop() { IsStopped = true; + + CancellationTokenSource cts; + lock (_lifecycleLock) + { + cts = _cts; + } + + try + { + cts?.Cancel(); + } + catch (ObjectDisposedException) + { + // CancellationTokenSource was already disposed - ignore + } + + Signal(); } - void ThreadWorker(object state) + public void Signal() { - if (IsStopped) + AutoResetEvent workAvailable; + lock (_lifecycleLock) { - return; + workAvailable = _workAvailable; } - do + try { - int toProcess = -1; + workAvailable?.Set(); + } + catch (ObjectDisposedException) + { + // AutoResetEvent was already disposed - ignore + } + } - if (model._grammar.IsCompiling) - { - this.model._resetEvent.Reset(); - this.model._resetEvent.WaitOne(); - continue; - } + /// + /// Initiates non-blocking cleanup of this thread instance. + /// If the worker task is still running, disposal is deferred + /// until the task completes via a continuation. This method + /// never blocks the calling thread. + /// + public void DisposeAfterCompletion() + { + Task task; + lock (_lifecycleLock) + { + task = _task; + } + + if (task != null) + { + // Observe any fault on the worker task itself + ObserveTask(task); - lock (this.model._lock) + // When the worker task completes, dispose this instance + Task continuation = task.ContinueWith(_ => { - if (model._invalidLines.Count > 0) + try { - toProcess = model._invalidLines.Dequeue(); + Dispose(); } - } + catch (Exception e) + { + // Dispose should not throw, but guard against it + Trace.TraceError($"Exception occurred while disposing TokenizerThread '{name}' after task completion: {e.Message}"); + } + }, CancellationToken.None, TaskContinuationOptions.None, TaskScheduler.Default); - if (toProcess == -1) + // Observe the continuation so its own faults don't go unobserved + ObserveTask(continuation); + } + else + { + try { - this.model._resetEvent.Reset(); - this.model._resetEvent.WaitOne(); - continue; + Dispose(); + } + catch (Exception e) + { + // Dispose should not throw, but guard against it + Trace.TraceError($"Exception occurred while disposing TokenizerThread '{name}' with no task: {e.Message}"); } + } + } - var modelLine = model._lines.Get(toProcess); + public void Dispose() + { + // atomic check-and-set to prevent multiple concurrent disposes (e.g. racing DisposeAfterCompletion continuations) + if (Interlocked.CompareExchange(ref _isDisposedFlag, 1, 0) != 0) + return; - if (modelLine == null || !modelLine.IsInvalid) - continue; + IsStopped = true; - try + CancellationTokenSource cts; + AutoResetEvent workAvailable; + + lock (_lifecycleLock) + { + cts = _cts; + workAvailable = _workAvailable; + + // Prevent further use by other racing calls (Signal/Stop/Run/DisposeAfterCompletion). + _cts = null; + _workAvailable = null; + _task = null; + } + + try + { + cts?.Cancel(); + } + catch (ObjectDisposedException) + { + // CancellationTokenSource was already disposed - ignore + } + + // Signal the AutoResetEvent before disposing it so any thread + // blocked on WaitAny wakes up and can exit cleanly. + try + { + workAvailable?.Set(); + } + catch (ObjectDisposedException) + { + // AutoResetEvent was already disposed - ignore + } + + try + { + cts?.Dispose(); + } + catch (ObjectDisposedException) + { + // CancellationTokenSource was already disposed - ignore + } + + try + { + workAvailable?.Dispose(); + } + catch (ObjectDisposedException) + { + // AutoResetEvent was already disposed - ignore + } + } + + /// + /// Observes a task's exception to prevent UnobservedTaskException. + /// Accessing Task.Exception marks the exception as observed in the TPL. + /// This is the canonical .NET pattern - the property getter sets an + /// internal "observed" flag that cannot be compiled away by the + /// compiler or JIT, because it is a side-effecting property access. + /// + private static void ObserveTask(Task task) + { + task?.ContinueWith( + t => { - this.RevalidateTokens(toProcess, null); - } - catch (Exception e) + // Intentionally access Exception for its side effect: TPL marks the exception + // as observed, preventing UnobservedTaskException + _ = t.Exception; + }, + CancellationToken.None, + TaskContinuationOptions.OnlyOnFaulted, + TaskScheduler.Default); + } + + void ThreadWorker(CancellationToken cancellationToken) + { + if (IsStopped) + { + return; + } + + // Snapshot the wait handles at entry - these are owned by this + // TokenizerThread instance and will not be replaced (only disposed + // after this task completes via DisposeAfterCompletion). + AutoResetEvent workAvailable; + lock (_lifecycleLock) + { + workAvailable = _workAvailable; + } + + if (workAvailable == null) + { + return; + } + + // NOTE on CancellationToken.WaitHandle: + // ThreadWorker waits on both _workAvailable and the cancellation token's wait handle. + // CancellationToken.WaitHandle is lazily created (may allocate a WaitHandle on first access), + // but it is owned by this TokenizerThread's CancellationTokenSource (_cts). + // We intentionally do NOT dispose cancellationToken.WaitHandle directly; disposing _cts in Dispose() + // releases any resources associated with the token (including its underlying wait handle). + WaitHandle cancellationHandle = cancellationToken.WaitHandle; + WaitHandle[] waitHandles = new WaitHandle[] { workAvailable, cancellationHandle }; + + try + { + do { - System.Diagnostics.Debug.WriteLine(e.Message); + if (cancellationToken.IsCancellationRequested) + break; - if (toProcess < model._lines.GetNumberOfLines()) + // Check if this thread instance has been replaced by SetGrammar/Stop. + // Reading _thread under _lock prevents a stale read. + lock (model._lock) { - model.InvalidateLine(toProcess); + if (model._thread != this) + break; } - } - } while (!IsStopped && model._thread != null); + + int toProcess = -1; + + // Snapshot _grammar under lock so we get a consistent read. + // SetGrammar can change _grammar on the UI thread at any time. + IGrammar grammar; + + lock (this.model._lock) + { + grammar = model._grammar; + } + + if (grammar != null && grammar.IsCompiling) + { + try + { + WaitHandle.WaitAny(waitHandles); + } + catch (ObjectDisposedException) + { + break; + } + continue; + } + + lock (this.model._lock) + { + if (model._invalidLines.Count > 0) + { + toProcess = model._invalidLines.Dequeue(); + } + } + + if (toProcess == -1) + { + try + { + WaitHandle.WaitAny(waitHandles); + } + catch (ObjectDisposedException) + { + break; + } + continue; + } + + IModelLines lines = model._lines; + + bool isInvalid; + lock (model._lock) + { + // If Stop()/SetGrammar swapped out the thread, exit quickly + if (model._thread != this) + break; + + ModelLine modelLine = lines.Get(toProcess); + isInvalid = modelLine != null && modelLine.IsInvalid; + } + + if (!isInvalid) + continue; + + try + { + this.RevalidateTokens(toProcess, null); + } + catch (Exception e) + { + System.Diagnostics.Debug.WriteLine(e.Message); + + if (toProcess < lines.GetNumberOfLines()) + { + model.InvalidateLine(toProcess); + } + } + } while (!IsStopped && !cancellationToken.IsCancellationRequested && !ShouldStop()); + } + catch (ObjectDisposedException) + { + // WaitHandle or CancellationTokenSource was disposed - exit gracefully + } + catch (OperationCanceledException) + { + // CancellationToken was cancelled - exit gracefully + } + catch (Exception e) + { + // Last-resort safety net - exit rather than crash + Trace.TraceError($"Unexpected exception in TokenizerThread '{name}': {e.Message}"); + } + } + + /// + /// Checks whether this thread instance has been replaced. + /// Must read model._thread under lock to prevent stale reads. + /// + private bool ShouldStop() + { + lock (model._lock) + { + return model._thread != this; + } } Stopwatch _stopwatch = new Stopwatch(); private void RevalidateTokens(int startLine, int? toLineIndexOrNull) { - if (model._tokenizer == null) + Tokenizer tokenizer; + + lock (model._lock) + { + tokenizer = model._tokenizer; + } + + if (tokenizer == null) return; model.BuildEventWithCallback(eventBuilder => { + IModelLines lines = model._lines; + int toLineIndex = toLineIndexOrNull ?? 0; - if (toLineIndexOrNull == null || toLineIndex >= model._lines.GetNumberOfLines()) + if (toLineIndexOrNull == null || toLineIndex >= lines.GetNumberOfLines()) { - toLineIndex = model._lines.GetNumberOfLines() - 1; + toLineIndex = lines.GetNumberOfLines() - 1; } long tokenizedChars = 0; @@ -139,7 +486,7 @@ private void RevalidateTokens(int startLine, int? toLineIndexOrNull) // - tokenizing the next line would go above MAX_ALLOWED_TIME int lineIndex = startLine; - while (lineIndex <= toLineIndex && lineIndex < model.GetLines().GetNumberOfLines()) + while (lineIndex <= toLineIndex && lineIndex < lines.GetNumberOfLines()) { elapsedTime = _stopwatch.ElapsedMilliseconds; if (elapsedTime > MAX_ALLOWED_TIME) @@ -151,7 +498,7 @@ private void RevalidateTokens(int startLine, int? toLineIndexOrNull) // Compute how many characters will be tokenized for this line try { - currentCharsToTokenize = model._lines.GetLineLength(lineIndex); + currentCharsToTokenize = lines.GetLineLength(lineIndex); } catch (Exception e) { @@ -178,12 +525,32 @@ private void RevalidateTokens(int startLine, int? toLineIndexOrNull) public int UpdateTokensInRange(ModelTokensChangedEventBuilder eventBuilder, int startIndex, int endLineIndex) { + // Snapshot _grammar, _tokenizer, and _lines so we have consistent + // references. SetGrammar can null _grammar/_tokenizer on the UI + // thread at any time. _lines is assigned once in the constructor + // but we snapshot for consistency and to avoid repeated field reads + IGrammar grammar; + Tokenizer tokenizer; + IModelLines lines; + int expectedGrammarEpoch; + + lock (model._lock) + { + grammar = model._grammar; + tokenizer = model._tokenizer; + lines = model._lines; + expectedGrammarEpoch = model._grammarEpoch; + } + + if (tokenizer == null || lines == null) + return startIndex; + TimeSpan stopLineTokenizationAfter = TimeSpan.FromMilliseconds(3000); int nextInvalidLineIndex = startIndex; int lineIndex = startIndex; - while (lineIndex <= endLineIndex && lineIndex < model._lines.GetNumberOfLines()) + while (lineIndex <= endLineIndex && lineIndex < lines.GetNumberOfLines()) { - if (model._grammar != null && model._grammar.IsCompiling) + if (grammar != null && grammar.IsCompiling) { lineIndex++; continue; @@ -192,12 +559,36 @@ public int UpdateTokensInRange(ModelTokensChangedEventBuilder eventBuilder, int int endStateIndex = lineIndex + 1; LineTokens r = null; LineText text = default; - ModelLine modeLine = model._lines.Get(lineIndex); + ModelLine modelLine = lines.Get(lineIndex); + + if (modelLine == null) + { + lineIndex++; + continue; + } + + TMState startingState = null; try { - text = model._lines.GetLineTextIncludingTerminators(lineIndex); + text = lines.GetLineTextIncludingTerminators(lineIndex); // Tokenize only the first X characters - r = model._tokenizer.Tokenize(text, modeLine.State, 0, MAX_LEN_TO_TOKENIZE, stopLineTokenizationAfter); + + lock (model._lock) + { + // If the model was disposed/stopped or grammar swapped while we were preparing work, + // do not use a potentially stale ModelLine state. + if (model._thread != this || + model._grammarEpoch != expectedGrammarEpoch || + !object.ReferenceEquals(model._tokenizer, tokenizer)) + { + model.InvalidateLine(lineIndex); + return nextInvalidLineIndex; + } + + startingState = modelLine.State; + } + + r = tokenizer.Tokenize(text, startingState, 0, MAX_LEN_TO_TOKENIZE, stopLineTokenizationAfter); } catch (Exception e) { @@ -208,61 +599,87 @@ public int UpdateTokensInRange(ModelTokensChangedEventBuilder eventBuilder, int if (r != null && r.Tokens != null && r.Tokens.Count != 0) { - // Cannot have a stop offset before the last token - r.ActualStopOffset = Math.Max(r.ActualStopOffset, r.Tokens[r.Tokens.Count - 1].StartIndex + 1); + TMToken tmToken = r.Tokens[r.Tokens.Count - 1]; + if (tmToken != null) + { + // Cannot have a stop offset before the last token + r.ActualStopOffset = Math.Max(r.ActualStopOffset, tmToken.StartIndex + 1); + } } - if (r != null && r.ActualStopOffset < text.Length) + if (r != null && r.ActualStopOffset < text.Length && r.Tokens != null) { // Treat the rest of the line (if above limit) as one default token r.Tokens.Add(new TMToken(r.ActualStopOffset, new List())); // Use as end state the starting state - r.EndState = modeLine.State; + r.EndState = startingState; } if (r == null) { r = new LineTokens(new List() { new TMToken(0, new List()) }, text.Length, - modeLine.State); + startingState); } - modeLine.Tokens = r.Tokens; - eventBuilder.registerChangedTokens(lineIndex + 1); - modeLine.IsInvalid = false; - - if (endStateIndex < model._lines.GetNumberOfLines()) + // Hold _lock across both the epoch check AND all state writes so + // that SetGrammar cannot reset lines between the check and the + // writes. The work inside the lock is only field assignments and + // _lines.Get calls (which acquire AbstractLineList.mLock - same + // lock ordering as everywhere else: _lock -> mLock, never reversed). + lock (model._lock) { - ModelLine endStateLine = model._lines.Get(endStateIndex); - if (endStateLine.State != null && r.EndState.Equals(endStateLine.State)) + if (model._thread != this || model._grammarEpoch != expectedGrammarEpoch || !object.ReferenceEquals(model._tokenizer, tokenizer)) + { + // Stale result (grammar/tokenizer/thread changed). Re-queue under the new epoch. + // InvalidateLine re-acquires _lock, which is safe because Monitor is reentrant. + model.InvalidateLine(lineIndex); + return nextInvalidLineIndex; + } + + modelLine.Tokens = r.Tokens; + eventBuilder.registerChangedTokens(lineIndex + 1); + modelLine.IsInvalid = false; + + if (endStateIndex < lines.GetNumberOfLines()) { - // The end state of this line remains the same - nextInvalidLineIndex = lineIndex + 1; - while (nextInvalidLineIndex < model._lines.GetNumberOfLines()) + ModelLine endStateLine = lines.Get(endStateIndex); + if (endStateLine?.State != null && r.EndState != null && r.EndState.Equals(endStateLine.State)) { - bool isLastLine = nextInvalidLineIndex + 1 >= model._lines.GetNumberOfLines(); - if (model._lines.Get(nextInvalidLineIndex).IsInvalid - || (!isLastLine && model._lines.Get(nextInvalidLineIndex + 1).State == null) - || (isLastLine && this.lastState == null)) + // The end state of this line remains the same + nextInvalidLineIndex = lineIndex + 1; + while (nextInvalidLineIndex < lines.GetNumberOfLines()) { - break; + bool isLastLine = nextInvalidLineIndex + 1 >= lines.GetNumberOfLines(); + ModelLine nextLine = lines.Get(nextInvalidLineIndex); + ModelLine nextNextLine = !isLastLine ? lines.Get(nextInvalidLineIndex + 1) : null; + if (nextLine == null || nextLine.IsInvalid + || (!isLastLine && (nextNextLine == null || nextNextLine.State == null)) + || (isLastLine && this.lastState == null)) + { + break; + } + + nextInvalidLineIndex++; } - nextInvalidLineIndex++; + lineIndex = nextInvalidLineIndex; + } + else if (endStateLine != null) + { + endStateLine.State = r.EndState; + lineIndex++; + } + else + { + lineIndex++; } - - lineIndex = nextInvalidLineIndex; } else { - endStateLine.State = r.EndState; + this.lastState = r.EndState; lineIndex++; } } - else - { - this.lastState = r.EndState; - lineIndex++; - } } return nextInvalidLineIndex; @@ -271,54 +688,151 @@ public int UpdateTokensInRange(ModelTokensChangedEventBuilder eventBuilder, int public IGrammar GetGrammar() { - return _grammar; + if (IsDisposed) + return null; + + lock (_lock) + { + return _grammar; + } } public void SetGrammar(IGrammar grammar) { - if (!Object.Equals(grammar, this._grammar)) + if (IsDisposed) + return; + + TokenizerThread oldThread = null; + TokenizerThread newThreadToRun = null; + bool shouldInvalidate = false; + int endLineIndex = 0; + + // Single lock: snapshot old thread, bump epoch, apply new grammar + // state, and install new thread - all atomically. + lock (_lock) { - Stop(); + if (IsDisposed) + return; + + if (Object.Equals(grammar, this._grammar)) + return; + + Interlocked.Increment(ref _grammarEpoch); + oldThread = _thread; + _thread = null; this._grammar = grammar; _lines.ForEach((line) => line.ResetTokenizationState()); + _invalidLines.Clear(); if (grammar != null) { this._tokenizer = new Tokenizer(grammar); - _lines.Get(0).State = _tokenizer.GetInitialState(); - Start(); - InvalidateLine(0); + ModelLine firstLine = _lines.Get(0); + if (firstLine != null) + { + firstLine.State = _tokenizer.GetInitialState(); + } + + this._thread = new TokenizerThread("TMModelThread", this); + newThreadToRun = this._thread; + shouldInvalidate = true; } else { - Emit(new ModelTokensChangedEvent(new Range(0, _lines.GetNumberOfLines() - 1), this)); + this._tokenizer = null; + endLineIndex = _lines.GetNumberOfLines() - 1; } } + + newThreadToRun?.Run(); + + // Outside lock: stop and dispose old thread (non-blocking) + // The old worker will see IsStopped/CancellationRequested/_thread!=this + // and exit its loop, then disposal is handled after the task completes + if (oldThread != null) + { + oldThread.Stop(); + oldThread.DisposeAfterCompletion(); + } + + // Outside lock: signal work or emit event + if (shouldInvalidate) + { + InvalidateLine(0); + } + else + { + Emit(new ModelTokensChangedEvent(new Range(0, endLineIndex), this)); + } } public void AddModelTokensChangedListener(IModelTokensChangedListener listener) { - if (this._grammar != null) - Start(); + if (IsDisposed) + return; - lock (listeners) + TokenizerThread oldThreadToDispose = null; + + lock (_lock) { - if (!listeners.Contains(listener)) + if (IsDisposed) + return; + + lock (listeners) + { + if (IsDisposed) + return; + + if (!listeners.Contains(listener)) + { + listeners.Add(listener); + } + } + + // If grammar is set, ensure the tokenizer thread is running now that someone is listening. + if (this._grammar != null) { - listeners.Add(listener); + oldThreadToDispose = StartInternal(); } } + + // Dispose any old stopped thread outside _lock to avoid blocking the model's critical section + if (oldThreadToDispose != null) + { + oldThreadToDispose.Stop(); + oldThreadToDispose.DisposeAfterCompletion(); + } } public void RemoveModelTokensChangedListener(IModelTokensChangedListener listener) { - lock (listeners) + if (IsDisposed) + return; + + bool shouldStop = false; + + // Make remove + stop decision atomic relative to Add by synchronizing under _lock. + // Correct lock ordering is: _lock -> listeners. + lock (_lock) { - listeners.Remove(listener); - if (listeners.Count == 0) + if (IsDisposed) + return; + + lock (listeners) { - // no need to keep tokenizing if no-one cares + listeners.Remove(listener); + if (listeners.Count == 0) + { + // no need to keep tokenizing if no-one cares + shouldStop = true; + } + } + + if (shouldStop) + { + // Stop internally takes _lock, but Monitor is re-entrant + // This keeps the decision and action atomic relative to other operations guarded by _lock Stop(); } } @@ -326,31 +840,64 @@ public void RemoveModelTokensChangedListener(IModelTokensChangedListener listene public void Dispose() { + if (Interlocked.CompareExchange(ref _isDisposedFlag, 1, 0) != 0) + return; + + // Stop first (acquires _lock internally) without holding listeners. This preserves the global + // lock ordering _lock -> listeners used elsewhere (e.g. in AddModelTokensChangedListener) and + // avoids any lock inversion between _lock and listeners. + Stop(); + lock (listeners) { listeners.Clear(); } - Stop(); GetLines().Dispose(); } private void Stop() { - if (_thread == null) + TokenizerThread threadToStop = null; + + lock (_lock) + { + threadToStop = _thread; + _thread = null; + } + + if (threadToStop == null) { return; } - this._thread.Stop(); - _resetEvent.Set(); - this._thread = null; + threadToStop.Stop(); + threadToStop.DisposeAfterCompletion(); } - private void Start() + /// + /// Starts the tokenizer thread if there is no current thread or the current thread has stopped, and returns the + /// previously stopped thread instance (if any) so that it can be disposed by the caller. + /// + /// + /// This method must be called while holding the appropriate lock to ensure thread safety. If a previous tokenizer + /// thread instance exists and is already stopped, it is returned and must be disposed by the caller to prevent + /// resource leaks. If no previous thread existed, or if the existing thread is still running, this method returns null. + /// + /// + /// The previous instance of the tokenizer thread if it existed and was stopped; otherwise, null (for example, when there + /// was no previous thread or the existing thread is still running). + /// + private TokenizerThread StartInternal() { + if (IsDisposed) + return null; + + TokenizerThread oldThread = null; + if (this._thread == null || this._thread.IsStopped) { + oldThread = this._thread; // may be non-null but stopped - caller must dispose this._thread = new TokenizerThread("TMModelThread", this); } @@ -358,39 +905,139 @@ private void Start() { this._thread.Run(); } + + return oldThread; } private void BuildEventWithCallback(Action callback) { - if (this._thread == null || this._thread.IsStopped) + if (IsDisposed) return; + TokenizerThread expectedThread; + int expectedGrammarEpoch; + + lock (_lock) + { + if (IsDisposed) + return; + + expectedThread = this._thread; + expectedGrammarEpoch = this._grammarEpoch; + + if (expectedThread == null || expectedThread.IsStopped) + return; + } + ModelTokensChangedEventBuilder eventBuilder = new ModelTokensChangedEventBuilder(this); callback(eventBuilder); + // Re-check: Stop()/Dispose() may have run during the callback + if (IsDisposed) + return; + + // Re-check after the callback, because Stop()/Dispose()/SetGrammar() can run concurrently. + // If the tokenizer thread or grammar epoch changed, the event we're about to emit may no longer + // match the model's current grammar context (stale publish), so drop it. + lock (_lock) + { + // If Disposed, or Stop/SetGrammar swapped out the thread while callback ran, drop the event + if (IsDisposed) + return; + + if (!object.ReferenceEquals(this._thread, expectedThread)) + return; + + if (expectedThread.IsStopped) + return; + + if (this._grammarEpoch != expectedGrammarEpoch) + return; + } + ModelTokensChangedEvent e = eventBuilder.Build(); if (e != null) { - this.Emit(e); + Emit(e, expectedThread, expectedGrammarEpoch); } } private void Emit(ModelTokensChangedEvent e) { + if (e == null) + return; + + EmitCore(e, false, null, 0); + } + + private void Emit(ModelTokensChangedEvent e, TokenizerThread expectedThread, int expectedGrammarEpoch) + { + if (e == null) + return; + + EmitCore(e, true, expectedThread, expectedGrammarEpoch); + } + + private void EmitCore(ModelTokensChangedEvent e, bool requireEpochMatch, TokenizerThread expectedThread, int expectedGrammarEpoch) + { + // Avoid possible deadlocks by not invoking listeners under lock. Invoking listeners can cause + // arbitrary reentrant code to run, including code that tries to acquire locks that would + // deadlock with listeners lock (e.g. _lock or external locks in listener code). To avoid this, + // snapshot the listener list under lock, then invoke callbacks outside the lock + IModelTokensChangedListener[] listenerSnapshot; lock (listeners) { - foreach (IModelTokensChangedListener listener in listeners) + if (listeners.Count == 0) + { + return; + } + + listenerSnapshot = listeners.ToArray(); + } + + // Call the listeners outside the lock to avoid deadlocks. Listeners may synchronously call back into the model, + // but they will see a consistent state because we release the lock before invoking them, and we re-check + // IsDisposed and thread identity after the callback and before emitting. + for (int i = 0; i < listenerSnapshot.Length; i++) + { + IModelTokensChangedListener listener = listenerSnapshot[i]; + if (listener == null) { - try - { - listener.ModelTokensChanged(e); - } - catch (Exception ex) + continue; + } + + if (requireEpochMatch) + { + lock (_lock) { - System.Diagnostics.Debug.WriteLine(ex.Message); + if (IsDisposed) + return; + + if (!object.ReferenceEquals(this._thread, expectedThread)) + return; + + if (expectedThread != null && expectedThread.IsStopped) + return; + + if (this._grammarEpoch != expectedGrammarEpoch) + return; } } + else + { + if (IsDisposed) + return; + } + + try + { + listener.ModelTokensChanged(e); + } + catch (Exception ex) + { + System.Diagnostics.Debug.WriteLine(ex.Message); + } } } @@ -401,58 +1048,164 @@ public void ForceTokenization(int lineIndex) public void ForceTokenization(int startLineIndex, int endLineIndex) { - if (_grammar == null) + if (IsDisposed) return; - var tokenizerThread = this._thread; + TokenizerThread tokenizerThread; + int clampedStartLineIndex; + int clampedEndLineIndex; - if (tokenizerThread == null || tokenizerThread.IsStopped) - return; + lock (_lock) + { + if (IsDisposed) + return; - this.BuildEventWithCallback(eventBuilder => - tokenizerThread.UpdateTokensInRange(eventBuilder, startLineIndex, endLineIndex) - ); + if (_grammar == null) + return; + + tokenizerThread = this._thread; + if (tokenizerThread == null || tokenizerThread.IsStopped) + return; + + int lineCount = this._lines.GetNumberOfLines(); + if (lineCount <= 0) + return; + + clampedStartLineIndex = Math.Max(0, startLineIndex); + clampedEndLineIndex = Math.Min(endLineIndex, lineCount - 1); + if (clampedStartLineIndex > clampedEndLineIndex) + return; + } + + BuildEventWithCallback(eventBuilder => + { + tokenizerThread.UpdateTokensInRange(eventBuilder, clampedStartLineIndex, clampedEndLineIndex); + }); } + /// + /// Gets the tokens for the specified line, if available. + /// + /// Zero-based line index. + /// + /// The current token list for the line, or null if the model is disposed, the index is out of range, + /// or the line has not been tokenized yet. + /// + /// + /// + /// Ownership: The returned is owned by the model and must be treated as read-only. + /// Do not add, remove, or modify items in the returned list. + /// + /// + /// Stability: The model may replace the underlying list during subsequent tokenization updates for the same line. + /// Callers that need a stable view should cache the returned reference only for the duration of a single rendering pass. + /// + /// + /// Threading: This method synchronizes access to the underlying line state. + /// + /// public List GetLineTokens(int lineIndex) { - if (lineIndex < 0 || lineIndex > _lines.GetNumberOfLines() - 1) + if (IsDisposed) return null; - return _lines.Get(lineIndex).Tokens; + lock (_lock) + { + if (IsDisposed) + return null; + + ModelLine line = _lines.Get(lineIndex); + if (line == null) + return null; + + return line.Tokens; + } } + /// + /// Returns whether the specified line is currently marked as needing (re)tokenization. + /// + /// Zero-based line index. + /// + /// true if the line is invalid; otherwise false. Returns false if the model is disposed + /// or the index is out of range. + /// + /// + /// + /// This is a lightweight state query used by the tokenization pipeline. + /// + /// public bool IsLineInvalid(int lineIndex) { - return _lines.Get(lineIndex).IsInvalid; + if (IsDisposed) + return false; + + lock (_lock) + { + if (IsDisposed) + return false; + + ModelLine line = _lines.Get(lineIndex); + if (line == null) + return false; + + return line.IsInvalid; + } } public void InvalidateLine(int lineIndex) { - var line = this._lines.Get(lineIndex); - if (line == null) + if (IsDisposed) return; - line.IsInvalid = true; - + TokenizerThread thread; lock (_lock) { + if (IsDisposed) + return; + + ModelLine line = this._lines.Get(lineIndex); + if (line == null) + return; + + line.IsInvalid = true; this._invalidLines.Enqueue(lineIndex); - _resetEvent.Set(); + thread = _thread; } + + thread?.Signal(); } public void InvalidateLineRange(int iniLineIndex, int endLineIndex) { + if (IsDisposed) + return; + + TokenizerThread thread; lock (_lock) { - for (int i = iniLineIndex; i <= endLineIndex; i++) + int lineCount = this._lines.GetNumberOfLines(); + if (lineCount != 0) { - this._lines.Get(i).IsInvalid = true; - this._invalidLines.Enqueue(i); + // clamp to valid range + int clampedStart = Math.Max(0, iniLineIndex); + int clampedEnd = Math.Min(endLineIndex, lineCount - 1); + + for (int i = clampedStart; i <= clampedEnd; i++) + { + ModelLine line = this._lines.Get(i); + if (line != null) + { + line.IsInvalid = true; + this._invalidLines.Enqueue(i); + } + } } - _resetEvent.Set(); + + thread = _thread; } + + thread?.Signal(); } public IModelLines GetLines() @@ -460,4 +1213,4 @@ public IModelLines GetLines() return this._lines; } } -} \ No newline at end of file +}