From 56f4a58c3842f9bc541c7f5b99db2fca1f874c12 Mon Sep 17 00:00:00 2001 From: mofiaboss Date: Fri, 5 Dec 2025 12:32:27 -0500 Subject: [PATCH] fix: ensure writer lock is released on exception in RefreshAuthToken MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #24 - ReaderWriterLock deadlock when HTTP request fails Problem: When RefreshAuthToken() encounters an exception during the HTTP request (e.g., SSL failure, connection reset, timeout), the writer lock is never released. Since this is a static lock shared across all SDK instances, one failed refresh blocks ALL subsequent SDK calls until they timeout after 30 seconds. This caused production incidents where cascading lock timeouts amplified transient network failures into extended outages. Solution: Wrap the method body in try/finally to ensure ReleaseWriterLock() is always called, regardless of success or failure. Changes: - Add try/finally block to guarantee lock release - Remove redundant reader lock acquisition in success path - Add comprehensive unit tests proving lock is released after exceptions The tests verify: - Lock is released after connection failures - Multiple sequential calls complete quickly (not 30s each) - Lock can be immediately acquired after exception - Concurrent calls all resolve without permanent contention 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../RefreshAuthTokenLockReleaseTest.cs | 368 ++++++++++++++++++ SDK/Kount/Ris/Request.cs | 81 ++-- 2 files changed, 410 insertions(+), 39 deletions(-) create mode 100644 PaymentsFraudTest/RefreshAuthTokenLockReleaseTest.cs diff --git a/PaymentsFraudTest/RefreshAuthTokenLockReleaseTest.cs b/PaymentsFraudTest/RefreshAuthTokenLockReleaseTest.cs new file mode 100644 index 0000000..be65881 --- /dev/null +++ b/PaymentsFraudTest/RefreshAuthTokenLockReleaseTest.cs @@ -0,0 +1,368 @@ +namespace PaymentsFraudTest +{ + using System; + using System.Diagnostics; + using System.Linq; + using System.Net; + using System.Net.Sockets; + using System.Reflection; + using System.Threading; + using System.Threading.Tasks; + using Kount.Ris; + using Xunit; + + /// + /// Tests for GitHub Issue #24: RefreshAuthToken must release lock on exception + /// https://github.com/Kount/kount-ris-dotnet-sdk/issues/24 + /// + /// The bug: RefreshAuthToken() acquires a writer lock but doesn't use try/finally, + /// so if an HTTP exception occurs, the static lock is NEVER released. This causes + /// all subsequent SDK calls to timeout waiting for the lock (30-second timeout). + /// + /// The fix: Wrap the method body in try/finally to ensure ReleaseWriterLock() is + /// always called, even when exceptions occur. + /// + [Collection("RefreshAuthTokenTests")] // Prevents parallel execution - these tests share static state + public class RefreshAuthTokenLockReleaseTest : IDisposable + { + // SDK uses 30-second lock timeout. If we're waiting more than 2 seconds for a call + // that should fail immediately, something is wrong with lock release. + private const int QuickFailureThresholdMs = 2000; + + // Maximum acceptable time for multiple sequential operations + // Should be << 30 seconds (the lock timeout) to prove lock is being released + private const int MaxSequentialOperationsMs = 10000; + + // SDK's internal lock timeout + private const int SdkLockTimeoutSeconds = 30; + + public RefreshAuthTokenLockReleaseTest() + { + // Ensure clean state before each test + ResetStaticAuthState(); + EnsureLockIsReleased(); + } + + public void Dispose() + { + // Clean up after each test + ResetStaticAuthState(); + EnsureLockIsReleased(); + } + + /// + /// CORE TEST: Verifies that when RefreshAuthToken throws an exception, + /// the writer lock is properly released so subsequent calls don't timeout. + /// + /// This directly tests the try/finally fix for GitHub Issue #24. + /// + /// Expected behavior WITH the fix: Both calls fail quickly (connection refused) + /// Behavior WITHOUT the fix: Second call hangs for 30 seconds waiting for lock + /// + [Fact] + public void RefreshAuthToken_WhenConnectionRefused_ReleasesLockForNextCall() + { + // Arrange: Use localhost with a port that's definitely not listening + // This gives us instant connection refused - no DNS delays + var config = CreateConfigWithUnreachableLocalhost(); + + // Verify the lock is free before we start + AssertLockIsNotHeld("Lock was held before test started - test isolation failure"); + + // Act: First call - should fail with connection error but MUST release the lock + var stopwatch = Stopwatch.StartNew(); + var firstException = Record.Exception(() => + { + new Inquiry(true, config, null); + }); + var firstCallTime = stopwatch.ElapsedMilliseconds; + stopwatch.Restart(); + + // Assert: First call failed (expected - invalid URL) + Assert.NotNull(firstException); + + // Key assertion: Lock should be released after exception + AssertLockIsNotHeld("Lock was still held after first call exception - try/finally fix not working"); + + // Act: Second call - should also fail quickly, NOT wait 30 seconds for stuck lock + var secondException = Record.Exception(() => + { + new Inquiry(true, config, null); + }); + stopwatch.Stop(); + var secondCallTime = stopwatch.ElapsedMilliseconds; + + // Assert: Second call also failed (expected) + Assert.NotNull(secondException); + + // Assert: Second call completed quickly (proves lock was released) + Assert.True( + secondCallTime < QuickFailureThresholdMs, + $"Second call took {secondCallTime}ms. If this is close to {SdkLockTimeoutSeconds}s, " + + $"the lock was not released after the first exception. Expected <{QuickFailureThresholdMs}ms."); + + // Both calls should have similar timing (both hitting connection refused immediately) + var timeDifference = Math.Abs(firstCallTime - secondCallTime); + Assert.True( + timeDifference < 1000, + $"First call: {firstCallTime}ms, Second call: {secondCallTime}ms. " + + $"Large difference suggests lock contention."); + } + + /// + /// Verifies multiple sequential calls all complete quickly. + /// If the lock wasn't being released, each call would add 30 seconds. + /// 5 calls * 30 seconds = 150 seconds WITHOUT the fix + /// 5 calls * ~100ms = ~500ms WITH the fix + /// + [Fact] + public void RefreshAuthToken_MultipleSequentialFailures_AllCompleteQuickly() + { + // Arrange + var config = CreateConfigWithUnreachableLocalhost(); + const int callCount = 5; + + // Act + var stopwatch = Stopwatch.StartNew(); + for (int i = 0; i < callCount; i++) + { + ResetStaticAuthState(); // Force refresh on each call + + var exception = Record.Exception(() => new Inquiry(true, config, null)); + Assert.NotNull(exception); + } + stopwatch.Stop(); + + // Assert: Total time should be << 30 seconds + // Without the fix: 5 * 30s = 150s (first succeeds in getting lock, rest timeout) + // With the fix: 5 * ~100ms = ~500ms + Assert.True( + stopwatch.ElapsedMilliseconds < MaxSequentialOperationsMs, + $"{callCount} sequential calls took {stopwatch.ElapsedMilliseconds}ms. " + + $"Expected <{MaxSequentialOperationsMs}ms. If close to {callCount * SdkLockTimeoutSeconds}s, " + + $"lock is not being released between calls."); + } + + /// + /// Directly verifies the ReaderWriterLock state using reflection. + /// This is the most definitive test that the lock is released. + /// + [Fact] + public void RefreshAuthToken_AfterException_CanImmediatelyAcquireLock() + { + // Arrange + var config = CreateConfigWithUnreachableLocalhost(); + var lockField = GetLockField(); + var lockValue = lockField.GetValue(null); + Assert.NotNull(lockValue); + var readerWriterLock = (ReaderWriterLock)lockValue; + + // Act: Trigger a failed refresh + var exception = Record.Exception(() => new Inquiry(true, config, null)); + Assert.NotNull(exception); + + // Assert: We should be able to acquire the lock immediately + var stopwatch = Stopwatch.StartNew(); + bool acquiredLock = false; + try + { + // Try to acquire with a very short timeout + readerWriterLock.AcquireWriterLock(TimeSpan.FromMilliseconds(100)); + acquiredLock = true; + } + catch (ApplicationException) + { + // Lock acquisition timed out - this means lock is stuck + acquiredLock = false; + } + finally + { + stopwatch.Stop(); + if (acquiredLock) + { + readerWriterLock.ReleaseWriterLock(); + } + } + + Assert.True( + acquiredLock, + $"Could not acquire writer lock after {stopwatch.ElapsedMilliseconds}ms. " + + "The lock is stuck - try/finally fix not applied correctly."); + + Assert.True( + stopwatch.ElapsedMilliseconds < 200, + $"Lock acquisition took {stopwatch.ElapsedMilliseconds}ms, expected instant."); + } + + /// + /// Tests concurrent access to verify no permanent lock contention. + /// Multiple threads should all fail quickly, not queue up behind a stuck lock. + /// + [Fact] + public async Task RefreshAuthToken_ConcurrentCalls_AllResolveWithinReasonableTime() + { + // Arrange + var config = CreateConfigWithUnreachableLocalhost(); + const int threadCount = 3; + var exceptions = new Exception?[] { null, null, null }; + var completionTimes = new long[threadCount]; + + // Act: Launch concurrent requests + var tasks = new Task[threadCount]; + var startSignal = new ManualResetEventSlim(false); + + for (int i = 0; i < threadCount; i++) + { + int index = i; + tasks[i] = Task.Run(() => + { + startSignal.Wait(); // All threads start simultaneously + var sw = Stopwatch.StartNew(); + try + { + ResetStaticAuthState(); + new Inquiry(true, config, null); + } + catch (Exception ex) + { + exceptions[index] = ex; + } + sw.Stop(); + completionTimes[index] = sw.ElapsedMilliseconds; + }); + } + + startSignal.Set(); // Release all threads + + // Wait with timeout using async await + using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(90)); + try + { + await Task.WhenAll(tasks).WaitAsync(cts.Token); + } + catch (OperationCanceledException) + { + Assert.Fail("Some threads did not complete within 90 seconds - possible deadlock"); + } + + // Assert: All threads completed, all got exceptions + for (int i = 0; i < threadCount; i++) + { + Assert.NotNull(exceptions[i]); + } + + // Total time should be reasonable + // Worst case with proper lock release: threads serialize, each takes ~100ms + // Without fix: 30s * threadCount minimum + var maxTime = completionTimes.Max(); + Assert.True( + maxTime < SdkLockTimeoutSeconds * 1000, + $"Slowest thread took {maxTime}ms. If this is ~{SdkLockTimeoutSeconds}s, " + + "threads are waiting for stuck lock."); + } + + #region Helper Methods + + /// + /// Creates a Configuration pointing to localhost on a port that's not listening. + /// This ensures connection refused errors happen instantly (no DNS lookup delay). + /// + private Configuration CreateConfigWithUnreachableLocalhost() + { + // Port 1 is reserved and won't have anything listening + // Using 127.0.0.1 avoids any DNS resolution + return new Configuration + { + MerchantId = "999999", + URL = "https://127.0.0.1:1/api/v1/ris", + ConfigKey = "0123456789ABCDEF0123456789ABCDEF", // 32 chars for Base85 + ConnectTimeout = "1000", // 1 second - fail fast + EnableMigrationMode = "true", + PaymentsFraudClientId = "123456", // Must be numeric - SDK parses as Int64 + // Auth URL pointing to non-listening localhost port - instant connection refused + PaymentsFraudAuthUrl = "https://127.0.0.1:1/oauth2/token" + }; + } + + /// + /// Resets the static auth state to force a token refresh on next call. + /// + private void ResetStaticAuthState() + { + var expirationField = typeof(Request).GetField( + "_bearerAuthResponseExpiration", + BindingFlags.NonPublic | BindingFlags.Static); + + expirationField?.SetValue(null, DateTimeOffset.MinValue); + + var responseField = typeof(Request).GetField( + "_bearerAuthResponse", + BindingFlags.NonPublic | BindingFlags.Static); + + if (responseField != null) + { + var emptyResponse = Activator.CreateInstance(responseField.FieldType); + responseField.SetValue(null, emptyResponse); + } + } + + /// + /// Gets the private static ReaderWriterLock field via reflection. + /// + private FieldInfo GetLockField() + { + var field = typeof(Request).GetField( + "_bearerRefreshLock", + BindingFlags.NonPublic | BindingFlags.Static); + + Assert.NotNull(field); + return field; + } + + /// + /// Asserts that the writer lock is not currently held. + /// + private void AssertLockIsNotHeld(string message) + { + var lockField = GetLockField(); + var lockValue = lockField.GetValue(null); + if (lockValue == null) + { + Assert.Fail("Lock field is null - test cannot proceed"); + return; + } + var readerWriterLock = (ReaderWriterLock)lockValue; + + Assert.False(readerWriterLock.IsWriterLockHeld, message); + } + + /// + /// Ensures the lock is released. Called during test setup/teardown. + /// + private void EnsureLockIsReleased() + { + var lockField = GetLockField(); + var lockValue = lockField.GetValue(null); + if (lockValue == null) + { + return; // Nothing to release + } + var readerWriterLock = (ReaderWriterLock)lockValue; + + // If the current thread somehow holds the lock, release it + if (readerWriterLock.IsWriterLockHeld) + { + try + { + readerWriterLock.ReleaseWriterLock(); + } + catch + { + // Ignore - just best effort cleanup + } + } + } + + #endregion + } +} diff --git a/SDK/Kount/Ris/Request.cs b/SDK/Kount/Ris/Request.cs index 6a7ef06..90b72f4 100755 --- a/SDK/Kount/Ris/Request.cs +++ b/SDK/Kount/Ris/Request.cs @@ -1150,61 +1150,64 @@ private Hashtable FetchArrayParams(Hashtable data) } /// - /// Refresh the bearer token - /// - /// - /// - /// + /// Refresh the bearer token. + /// Uses try/finally to ensure the writer lock is always released, even if an exception + /// occurs during the HTTP request or JSON deserialization. This prevents deadlocks + /// when network errors occur (e.g., SSL failures, connection resets, timeouts). + /// See: https://github.com/Kount/kount-ris-dotnet-sdk/issues/24 + /// + /// The authentication URL + /// The API key for authentication + /// Thrown when token refresh fails private static void RefreshAuthToken(string authUrl, string apiKey) { _bearerRefreshLock.AcquireWriterLock(TimeSpan.FromSeconds(30)); - - // short circuit if another thread as already refreshed the token - if (_bearerAuthResponseExpiration > DateTimeOffset.Now) + try { - _bearerRefreshLock.ReleaseWriterLock(); - return; - } - - string tokenUrl = authUrl + "?grant_type=client_credentials&scope=k1_integration_api"; - - HttpWebRequest webReq = (HttpWebRequest)WebRequest.Create(tokenUrl); - webReq.Method = "POST"; - webReq.ContentType = "application/x-www-form-urlencoded"; - webReq.Headers[PF_AUTH_HEADER] = "Basic " + apiKey; + // Short circuit if another thread has already refreshed the token + if (_bearerAuthResponseExpiration > DateTimeOffset.Now) + { + return; + } + string tokenUrl = authUrl + "?grant_type=client_credentials&scope=k1_integration_api"; - string responseString = string.Empty; - using (HttpWebResponse webResp = (HttpWebResponse)webReq.GetResponse()) - { - // Read the token response string - using (Stream responseStream = webResp.GetResponseStream()) + HttpWebRequest webReq = (HttpWebRequest)WebRequest.Create(tokenUrl); + webReq.Method = "POST"; + webReq.ContentType = "application/x-www-form-urlencoded"; + webReq.Headers[PF_AUTH_HEADER] = "Basic " + apiKey; + + string responseString = string.Empty; + using (HttpWebResponse webResp = (HttpWebResponse)webReq.GetResponse()) { - if (responseStream != null) + // Read the token response string + using (Stream responseStream = webResp.GetResponseStream()) { - using (StreamReader tokenResponse = new StreamReader(responseStream)) + if (responseStream != null) { - responseString = tokenResponse.ReadToEnd(); + using (StreamReader tokenResponse = new StreamReader(responseStream)) + { + responseString = tokenResponse.ReadToEnd(); + } } } } + + BearerAuthResponse authResponse = JsonSerializer.Deserialize(responseString); + if (authResponse != null && authResponse.ExpiresIn != 0) + { + _bearerAuthResponseExpiration = DateTime.Now.AddSeconds(authResponse.ExpiresIn); + _bearerAuthResponse = authResponse; + } + else + { + throw new Kount.Ris.RequestException("Failed to update the bearer token invalid response"); + } } - - BearerAuthResponse authResponse = JsonSerializer.Deserialize(responseString); - if (authResponse != null && authResponse.ExpiresIn != 0) - { - _bearerRefreshLock.AcquireReaderLock(TimeSpan.FromSeconds(5)); - _bearerAuthResponseExpiration = DateTime.Now.AddSeconds(authResponse.ExpiresIn); - _bearerAuthResponse = authResponse; - _bearerRefreshLock.ReleaseReaderLock(); - } - else + finally { _bearerRefreshLock.ReleaseWriterLock(); - throw new Kount.Ris.RequestException("Failed to update the bearer token invalid response"); } - - _bearerRefreshLock.ReleaseWriterLock(); } } } \ No newline at end of file