From 8db8a3741e3645a5b5c8a97ba3011df2bda1b856 Mon Sep 17 00:00:00 2001 From: Justice Date: Sun, 31 May 2026 00:09:03 +0100 Subject: [PATCH] feat(stellar): add Horizon circuit breaker --- harvest-finance/backend/.env.example | 2 + .../stellar/services/stellar.service.spec.ts | 60 +++-- .../src/stellar/services/stellar.service.ts | 240 ++++++++++++++---- .../src/stellar/utils/circuit-breaker.spec.ts | 86 +++++++ .../src/stellar/utils/circuit-breaker.ts | 177 +++++++++++++ .../src/stellar/utils/stellar-retry.ts | 19 +- 6 files changed, 510 insertions(+), 74 deletions(-) create mode 100644 harvest-finance/backend/src/stellar/utils/circuit-breaker.spec.ts create mode 100644 harvest-finance/backend/src/stellar/utils/circuit-breaker.ts diff --git a/harvest-finance/backend/.env.example b/harvest-finance/backend/.env.example index ab67c968e..29f9a7204 100644 --- a/harvest-finance/backend/.env.example +++ b/harvest-finance/backend/.env.example @@ -45,6 +45,8 @@ PAYMENT_AUTO_RELEASE=true STELLAR_NETWORK=testnet STELLAR_PLATFORM_PUBLIC_KEY= STELLAR_PLATFORM_SECRET_KEY= +STELLAR_CIRCUIT_FAILURE_THRESHOLD=5 +STELLAR_CIRCUIT_RESET_TIMEOUT_MS=30000 # Stellar Authentication (SEP-10) STELLAR_SERVER_SECRET=SBX7SARQOFS6IM2HS2N5TVK54AEF55E3FHOXBTWA6IPEEJJ4W5WJWE6W diff --git a/harvest-finance/backend/src/stellar/services/stellar.service.spec.ts b/harvest-finance/backend/src/stellar/services/stellar.service.spec.ts index 55f2dc57a..b0ce606a2 100644 --- a/harvest-finance/backend/src/stellar/services/stellar.service.spec.ts +++ b/harvest-finance/backend/src/stellar/services/stellar.service.spec.ts @@ -7,7 +7,10 @@ import * as StellarSdk from 'stellar-sdk'; import { BadRequestException, InternalServerErrorException, + ServiceUnavailableException, } from '@nestjs/common'; +import { CircuitBreaker } from '../utils/circuit-breaker'; +import { isRetryableStellarError } from '../utils/stellar-retry'; describe('StellarService - Escrow Creation', () => { let service: StellarService; @@ -39,26 +42,17 @@ describe('StellarService - Escrow Creation', () => { get: jest.fn().mockImplementation((key, defaultValue) => { const config: Record = { STELLAR_NETWORK: 'testnet', - STELLAR_PLATFORM_PUBLIC_KEY: - 'GXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX', + STELLAR_PLATFORM_PUBLIC_KEY: platformKeypair.publicKey(), }; return config[key] ?? defaultValue; }), - getOrThrow: jest - .fn() - .mockReturnValue( - 'GXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX', - ), + getOrThrow: jest.fn().mockReturnValue(platformKeypair.publicKey()), }, }, { provide: SecretsService, useValue: { - getSecret: jest - .fn() - .mockResolvedValue( - 'SDXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX', - ), + getSecret: jest.fn().mockResolvedValue(platformKeypair.secret()), }, }, { @@ -247,14 +241,16 @@ describe('StellarService - Escrow Creation', () => { }); it('should throw error after max retries', async () => { - mockServer.submitTransaction.mockRejectedValue( - new Error('Persistent failure'), - ); + const persistentTimeout = Object.assign(new Error('Persistent timeout'), { + code: 'ETIMEDOUT', + }); + + mockServer.submitTransaction.mockRejectedValue(persistentTimeout); const mockTx = {} as any; await expect(service['submitWithRetry'](mockTx, 'test')).rejects.toThrow( - 'Persistent failure', + 'Persistent timeout', ); expect(mockServer.submitTransaction).toHaveBeenCalledTimes(3); }); @@ -282,6 +278,38 @@ describe('StellarService - Escrow Creation', () => { }); }); + describe('Horizon Circuit Breaker', () => { + beforeEach(() => { + (service as any).horizonCircuitBreaker = new CircuitBreaker({ + name: 'stellar-horizon-test', + failureThreshold: 2, + resetTimeoutMs: 30_000, + shouldTrip: isRetryableStellarError, + }); + }); + + it('opens after repeated transient Horizon failures and blocks the next call', async () => { + const transientFailure = { + response: { status: 503 }, + message: 'Horizon unavailable', + }; + + mockServer.loadAccount.mockRejectedValue(transientFailure); + + await expect( + service.getAccountInfo(farmerKeypair.publicKey()), + ).rejects.toThrow(InternalServerErrorException); + await expect( + service.getAccountInfo(farmerKeypair.publicKey()), + ).rejects.toThrow(InternalServerErrorException); + await expect( + service.getAccountInfo(farmerKeypair.publicKey()), + ).rejects.toThrow(ServiceUnavailableException); + + expect(mockServer.loadAccount).toHaveBeenCalledTimes(2); + }); + }); + describe('Fee Bump Transactions', () => { it('should submit fee bump with priority fee', async () => { const submitFeeBumpSpy = jest diff --git a/harvest-finance/backend/src/stellar/services/stellar.service.ts b/harvest-finance/backend/src/stellar/services/stellar.service.ts index d480cd072..74c7a3be3 100644 --- a/harvest-finance/backend/src/stellar/services/stellar.service.ts +++ b/harvest-finance/backend/src/stellar/services/stellar.service.ts @@ -4,11 +4,19 @@ import { BadRequestException, InternalServerErrorException, OnModuleInit, + ServiceUnavailableException, } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; import { SecretsService } from '../../common/secrets/secrets.service'; import * as StellarSdk from 'stellar-sdk'; import { CustomLoggerService } from '../../logger/custom-logger.service'; +import { retry } from '../../common/utils/retry'; +import { + CircuitBreaker, + CircuitBreakerOpenError, + CircuitBreakerStateChange, +} from '../utils/circuit-breaker'; +import { isRetryableStellarError } from '../utils/stellar-retry'; import { EscrowCreateParams, EscrowResult, @@ -30,6 +38,7 @@ export class StellarService implements OnModuleInit { private readonly server: StellarSdk.Horizon.Server; private readonly networkPassphrase: string; private readonly platformPublicKey: string; + private readonly horizonCircuitBreaker: CircuitBreaker; private platformSecretKey: string; private structuredLogger: CustomLoggerService; @@ -61,6 +70,7 @@ export class StellarService implements OnModuleInit { this.platformPublicKey = this.configService.getOrThrow( 'STELLAR_PLATFORM_PUBLIC_KEY', ); + this.horizonCircuitBreaker = this.createHorizonCircuitBreaker(); } async onModuleInit() { @@ -85,7 +95,10 @@ export class StellarService implements OnModuleInit { async getAccountInfo(publicKey: string): Promise { this.validatePublicKey(publicKey); try { - const account = await this.server.loadAccount(publicKey); + const account = await this.loadHorizonAccount( + publicKey, + `getAccountInfo(${publicKey})`, + ); const xlmBalance = account.balances.find( (b) => b.asset_type === 'native', ); @@ -118,7 +131,10 @@ export class StellarService implements OnModuleInit { > { this.validatePublicKey(publicKey); try { - const account = await this.server.loadAccount(publicKey); + const account = await this.loadHorizonAccount( + publicKey, + `getAccountBalances(${publicKey})`, + ); return account.balances.map((b: StellarBalance) => ({ assetCode: b.asset_type === 'native' ? 'XLM' : b.asset_code!, assetIssuer: @@ -170,7 +186,10 @@ export class StellarService implements OnModuleInit { .limit(pageSize); if (cursor) call.cursor(cursor); - const page = await call.call(); + const page = await this.callHorizon( + `getAccountTransactions(${publicKey})`, + () => call.call(), + ); if (page.records.length === 0) { return { total: null, @@ -207,7 +226,10 @@ export class StellarService implements OnModuleInit { .limit(finalPageSize); if (cursor) finalCall.cursor(cursor); - const finalPage = await finalCall.call(); + const finalPage = await this.callHorizon( + `getAccountTransactions(${publicKey})`, + () => finalCall.call(), + ); const slice = finalPage.records.slice( remainingToSkip, remainingToSkip + safeLimit, @@ -266,10 +288,10 @@ export class StellarService implements OnModuleInit { const decodedRecords = await Promise.all( history.records.map(async (txMeta: any) => { try { - const fullTx = await this.server - .transactions() - .transaction(txMeta.hash) - .call(); + const fullTx = await this.callHorizon( + `getDecodedAccountTransactions(${txMeta.hash})`, + () => this.server.transactions().transaction(txMeta.hash).call(), + ); const envelope = StellarSdk.TransactionBuilder.fromXDR( fullTx.envelope_xdr, this.networkPassphrase, @@ -323,7 +345,7 @@ export class StellarService implements OnModuleInit { async verifyConnection(): Promise { try { - await this.server.loadAccount(this.platformPublicKey); + await this.loadHorizonAccount(this.platformPublicKey, 'verifyConnection'); this.logger.log( `Stellar connection OK — platform account: ${this.platformPublicKey}`, ); @@ -357,8 +379,9 @@ export class StellarService implements OnModuleInit { const platformKeypair = StellarSdk.Keypair.fromSecret( this.platformSecretKey, ); - const platformAccount = await this.server.loadAccount( + const platformAccount = await this.loadHorizonAccount( this.platformPublicKey, + 'releaseUpfrontPayment.loadPlatformAccount', ); const transaction = new StellarSdk.TransactionBuilder(platformAccount, { @@ -442,8 +465,9 @@ export class StellarService implements OnModuleInit { } const asset = this.resolveAsset(assetCode, assetIssuer); - const platformAccount = await this.server.loadAccount( + const platformAccount = await this.loadHorizonAccount( this.platformPublicKey, + 'createEscrow.loadPlatformAccount', ); // Predicate: farmer can claim unconditionally @@ -490,10 +514,14 @@ export class StellarService implements OnModuleInit { this.platformSecretKey, params.priorityFeeStroops, ); - const response = await this.server - .transactions() - .transaction(bumpResult.feeBumpTransactionHash) - .call(); + const response = await this.callHorizon( + 'createEscrow.lookupFeeBumpTransaction', + () => + this.server + .transactions() + .transaction(bumpResult.feeBumpTransactionHash) + .call(), + ); const balanceId = this.extractBalanceId(response as any); return { @@ -554,7 +582,10 @@ export class StellarService implements OnModuleInit { ); } - const farmerAccount = await this.server.loadAccount(farmerPublicKey); + const farmerAccount = await this.loadHorizonAccount( + farmerPublicKey, + 'releasePayment.loadFarmerAccount', + ); const transaction = new StellarSdk.TransactionBuilder(farmerAccount, { fee: await this.getBaseFee(), @@ -626,7 +657,10 @@ export class StellarService implements OnModuleInit { ); } - const buyerAccount = await this.server.loadAccount(buyerPublicKey); + const buyerAccount = await this.loadHorizonAccount( + buyerPublicKey, + 'refundEscrow.loadBuyerAccount', + ); const transaction = new StellarSdk.TransactionBuilder(buyerAccount, { fee: await this.getBaseFee(), @@ -702,8 +736,9 @@ export class StellarService implements OnModuleInit { } const sourceKeypair = StellarSdk.Keypair.fromSecret(sourceSecretKey); - const sourceAccount = await this.server.loadAccount( + const sourceAccount = await this.loadHorizonAccount( sourceKeypair.publicKey(), + 'setupMultiSigAccount.loadSourceAccount', ); const txBuilder = new StellarSdk.TransactionBuilder(sourceAccount, { @@ -760,14 +795,14 @@ export class StellarService implements OnModuleInit { transactionHash: string, ): Promise { try { - const tx = await this.server - .transactions() - .transaction(transactionHash) - .call(); - const ops = await this.server - .operations() - .forTransaction(transactionHash) - .call(); + const tx = await this.callHorizon( + `getTransactionStatus(${transactionHash})`, + () => this.server.transactions().transaction(transactionHash).call(), + ); + const ops = await this.callHorizon( + `getTransactionStatus.operations(${transactionHash})`, + () => this.server.operations().forTransaction(transactionHash).call(), + ); const operations = ops.records.map((op: any) => ({ type: op.type, @@ -800,10 +835,10 @@ export class StellarService implements OnModuleInit { async getClaimableBalances(publicKey: string): Promise { this.validatePublicKey(publicKey); try { - const response = await this.server - .claimableBalances() - .claimant(publicKey) - .call(); + const response = await this.callHorizon( + `getClaimableBalances(${publicKey})`, + () => this.server.claimableBalances().claimant(publicKey).call(), + ); return response.records; } catch (err) { this.handleStellarError(err, 'getClaimableBalances'); @@ -842,7 +877,7 @@ export class StellarService implements OnModuleInit { async estimateFee(operationCount = 1): Promise { const safeOperationCount = Math.max(1, operationCount); try { - const feeStats = await this.server.feeStats(); + const feeStats = await this.getHorizonFeeStats('estimateFee'); const chargedStats = feeStats.fee_charged as any; const baseFeeStroops = parseInt(chargedStats.mode, 10); const totalStroops = baseFeeStroops * safeOperationCount; @@ -894,7 +929,7 @@ export class StellarService implements OnModuleInit { percentile: number = 90, ): Promise { try { - const stats = await this.server.feeStats(); + const stats = await this.getHorizonFeeStats('getRecommendedPriorityFee'); const pStats = stats.fee_charged as any; let recommendedStroops = parseInt(pStats.mode, 10); @@ -991,6 +1026,100 @@ export class StellarService implements OnModuleInit { // PRIVATE HELPERS // ───────────────────────────────────────────────────────────────────────────── + private createHorizonCircuitBreaker(): CircuitBreaker { + return new CircuitBreaker({ + name: 'stellar-horizon', + failureThreshold: this.getPositiveIntegerConfig( + 'STELLAR_CIRCUIT_FAILURE_THRESHOLD', + 5, + ), + resetTimeoutMs: this.getPositiveIntegerConfig( + 'STELLAR_CIRCUIT_RESET_TIMEOUT_MS', + 30_000, + ), + shouldTrip: isRetryableStellarError, + onStateChange: (change) => this.logCircuitBreakerStateChange(change), + }); + } + + private getPositiveIntegerConfig(key: string, defaultValue: number): number { + const raw = this.configService.get(key); + const parsed = Number(raw); + return Number.isFinite(parsed) && parsed > 0 + ? Math.floor(parsed) + : defaultValue; + } + + private logCircuitBreakerStateChange( + change: CircuitBreakerStateChange, + ): void { + const payload = { + event: 'stellar_horizon_circuit_state_changed', + circuit: change.name, + from: change.from, + to: change.to, + reason: change.reason, + failureCount: change.failureCount, + retryAfterMs: change.retryAfterMs, + }; + + if (change.to === 'open') { + this.structuredLogger?.warn(payload, StellarService.name); + this.logger.warn( + `Stellar Horizon circuit opened | retryAfterMs=${change.retryAfterMs} reason=${change.reason}`, + ); + return; + } + + this.structuredLogger?.log(payload, StellarService.name); + this.logger.log( + `Stellar Horizon circuit state changed | ${change.from} -> ${change.to} reason=${change.reason}`, + ); + } + + private async callHorizon( + context: string, + operation: () => Promise, + ): Promise { + try { + return await this.horizonCircuitBreaker.execute(operation, context); + } catch (err) { + if (err instanceof CircuitBreakerOpenError) { + throw this.createHorizonUnavailableException(err, context); + } + throw err; + } + } + + private async loadHorizonAccount( + publicKey: string, + context: string, + ): Promise { + return this.callHorizon(context, () => this.server.loadAccount(publicKey)); + } + + private async getHorizonFeeStats(context: string): Promise { + return this.callHorizon(context, () => this.server.feeStats()); + } + + private createHorizonUnavailableException( + err: CircuitBreakerOpenError, + context: string, + ): ServiceUnavailableException { + this.structuredLogger?.errorEvent?.( + 'stellar_horizon_circuit_open', + { + context, + retryAfterMs: err.retryAfterMs, + message: err.message, + }, + StellarService.name, + ); + return new ServiceUnavailableException( + `Stellar Horizon is temporarily unavailable (context: ${context}). Retry after ${err.retryAfterMs}ms`, + ); + } + private resolveAsset( assetCode?: string, assetIssuer?: string, @@ -1008,7 +1137,7 @@ export class StellarService implements OnModuleInit { private async getBaseFee(): Promise { try { - const stats = await this.server.feeStats(); + const stats = await this.getHorizonFeeStats('getBaseFee'); return stats.fee_charged.mode; } catch { return '100'; @@ -1067,11 +1196,15 @@ export class StellarService implements OnModuleInit { } private handleStellarError(err: any, context: string): never { + if (err instanceof CircuitBreakerOpenError) { + throw this.createHorizonUnavailableException(err, context); + } + const status = err?.response?.status; if (err?.response?.data?.extras?.result_codes) { const resultCodes = err.response.data.extras.result_codes; - this.structuredLogger?.errorEvent( + this.structuredLogger?.errorEvent?.( 'stellar_tx_failed', { context, @@ -1100,7 +1233,11 @@ export class StellarService implements OnModuleInit { throw err; } - this.structuredLogger?.errorEvent( + if (err instanceof ServiceUnavailableException) { + throw err; + } + + this.structuredLogger?.errorEvent?.( 'stellar_tx_failed', { context, @@ -1123,19 +1260,24 @@ export class StellarService implements OnModuleInit { transaction: StellarSdk.Transaction | StellarSdk.FeeBumpTransaction, context: string, ): Promise { - const maxRetries = 3; - let lastError: Error | null = null; - - for (let attempt = 1; attempt <= maxRetries; attempt++) { - try { - const response = await this.server.submitTransaction(transaction); - return response; - } catch (error) { - lastError = error as Error; - if (attempt === maxRetries) break; - await new Promise((resolve) => setTimeout(resolve, 1000 * attempt)); - } - } - throw lastError; + return retry( + () => + this.callHorizon(context, () => + this.server.submitTransaction(transaction), + ), + { + maxAttempts: 3, + baseDelayMs: 1000, + maxDelayMs: 2000, + factor: 2, + jitter: false, + isRetryable: isRetryableStellarError, + onRetry: (err, attempt, delayMs) => { + this.logger.warn( + `Retrying Stellar submit in ${delayMs}ms | context=${context} attempt=${attempt} error=${(err as Error)?.message ?? 'unknown'}`, + ); + }, + }, + ); } } diff --git a/harvest-finance/backend/src/stellar/utils/circuit-breaker.spec.ts b/harvest-finance/backend/src/stellar/utils/circuit-breaker.spec.ts new file mode 100644 index 000000000..b8624f39c --- /dev/null +++ b/harvest-finance/backend/src/stellar/utils/circuit-breaker.spec.ts @@ -0,0 +1,86 @@ +import { + CircuitBreaker, + CircuitBreakerOpenError, +} from './circuit-breaker'; + +describe('CircuitBreaker', () => { + let now: number; + + const createBreaker = (failureThreshold = 2) => + new CircuitBreaker({ + name: 'test-circuit', + failureThreshold, + resetTimeoutMs: 1000, + shouldTrip: (err) => Boolean((err as { transient?: boolean }).transient), + now: () => now, + }); + + beforeEach(() => { + now = 0; + }); + + it('opens after the configured number of transient failures', async () => { + const breaker = createBreaker(); + const transientError = { transient: true }; + + await expect( + breaker.execute(() => Promise.reject(transientError)), + ).rejects.toBe(transientError); + await expect( + breaker.execute(() => Promise.reject(transientError)), + ).rejects.toBe(transientError); + + expect(breaker.snapshot().state).toBe('open'); + await expect(breaker.execute(() => Promise.resolve('ok'))).rejects.toThrow( + CircuitBreakerOpenError, + ); + }); + + it('moves to half-open after reset timeout and closes after a successful probe', async () => { + const breaker = createBreaker(1); + + await expect( + breaker.execute(() => Promise.reject({ transient: true })), + ).rejects.toEqual({ transient: true }); + expect(breaker.snapshot().state).toBe('open'); + + now = 1000; + + await expect(breaker.execute(() => Promise.resolve('ok'))).resolves.toBe( + 'ok', + ); + expect(breaker.snapshot()).toMatchObject({ + state: 'closed', + failureCount: 0, + retryAfterMs: 0, + }); + }); + + it('does not trip on non-transient failures', async () => { + const breaker = createBreaker(1); + const validationError = { transient: false }; + + await expect( + breaker.execute(() => Promise.reject(validationError)), + ).rejects.toBe(validationError); + + expect(breaker.snapshot().state).toBe('closed'); + expect(breaker.snapshot().failureCount).toBe(0); + }); + + it('reopens when the half-open probe fails with a transient error', async () => { + const breaker = createBreaker(1); + + await expect( + breaker.execute(() => Promise.reject({ transient: true })), + ).rejects.toEqual({ transient: true }); + + now = 1000; + await expect( + breaker.execute(() => Promise.reject({ transient: true })), + ).rejects.toEqual({ transient: true }); + + expect(breaker.snapshot().state).toBe('open'); + expect(breaker.snapshot().retryAfterMs).toBe(1000); + }); +}); diff --git a/harvest-finance/backend/src/stellar/utils/circuit-breaker.ts b/harvest-finance/backend/src/stellar/utils/circuit-breaker.ts new file mode 100644 index 000000000..bd146e905 --- /dev/null +++ b/harvest-finance/backend/src/stellar/utils/circuit-breaker.ts @@ -0,0 +1,177 @@ +export type CircuitBreakerState = 'closed' | 'open' | 'half_open'; + +export interface CircuitBreakerStateChange { + name: string; + from: CircuitBreakerState; + to: CircuitBreakerState; + reason: string; + failureCount: number; + retryAfterMs: number; +} + +export interface CircuitBreakerSnapshot { + state: CircuitBreakerState; + failureCount: number; + retryAfterMs: number; +} + +export interface CircuitBreakerOptions { + name: string; + failureThreshold: number; + resetTimeoutMs: number; + shouldTrip: (err: unknown) => boolean; + now?: () => number; + onStateChange?: (change: CircuitBreakerStateChange) => void; +} + +export class CircuitBreakerOpenError extends Error { + constructor( + public readonly circuitName: string, + public readonly retryAfterMs: number, + context?: string, + ) { + super( + `${circuitName} circuit is open${context ? ` for ${context}` : ''}; retry after ${retryAfterMs}ms`, + ); + this.name = 'CircuitBreakerOpenError'; + } +} + +export class CircuitBreaker { + private state: CircuitBreakerState = 'closed'; + private failureCount = 0; + private openedAt = 0; + private halfOpenProbeInFlight = false; + private readonly failureThreshold: number; + private readonly resetTimeoutMs: number; + private readonly now: () => number; + + constructor(private readonly options: CircuitBreakerOptions) { + this.failureThreshold = Math.max( + 1, + Math.floor(options.failureThreshold), + ); + this.resetTimeoutMs = Math.max(1, Math.floor(options.resetTimeoutMs)); + this.now = options.now ?? Date.now; + } + + async execute(operation: () => Promise, context?: string): Promise { + const probeAcquired = this.beforeExecute(context); + + try { + const result = await operation(); + this.recordSuccess(); + return result; + } catch (err) { + this.recordFailure(err); + throw err; + } finally { + if (probeAcquired) { + this.halfOpenProbeInFlight = false; + } + } + } + + snapshot(): CircuitBreakerSnapshot { + return { + state: this.state, + failureCount: this.failureCount, + retryAfterMs: this.retryAfterMs(), + }; + } + + private beforeExecute(context?: string): boolean { + if (this.state === 'open') { + if (!this.openTimeoutElapsed()) { + throw new CircuitBreakerOpenError( + this.options.name, + this.retryAfterMs(), + context, + ); + } + this.transitionTo('half_open', 'reset_timeout_elapsed'); + } + + if (this.state === 'half_open') { + if (this.halfOpenProbeInFlight) { + throw new CircuitBreakerOpenError( + this.options.name, + this.retryAfterMs(), + context, + ); + } + this.halfOpenProbeInFlight = true; + return true; + } + + return false; + } + + private recordSuccess(): void { + if (this.state === 'half_open') { + this.transitionTo('closed', 'half_open_probe_succeeded'); + return; + } + this.failureCount = 0; + } + + private recordFailure(err: unknown): void { + if (!this.options.shouldTrip(err)) { + if (this.state === 'half_open') { + this.transitionTo('closed', 'half_open_probe_reached_horizon'); + } else { + this.failureCount = 0; + } + return; + } + + if (this.state === 'half_open') { + this.failureCount = 1; + this.transitionTo('open', 'half_open_probe_failed'); + return; + } + + this.failureCount += 1; + if (this.failureCount >= this.failureThreshold) { + this.transitionTo('open', 'failure_threshold_reached'); + } + } + + private transitionTo(to: CircuitBreakerState, reason: string): void { + const from = this.state; + if (from === to) return; + + this.state = to; + + if (to === 'open') { + this.openedAt = this.now(); + } + + if (to === 'closed') { + this.failureCount = 0; + this.openedAt = 0; + } + + if (to === 'half_open') { + this.failureCount = 0; + } + + this.options.onStateChange?.({ + name: this.options.name, + from, + to, + reason, + failureCount: this.failureCount, + retryAfterMs: this.retryAfterMs(), + }); + } + + private openTimeoutElapsed(): boolean { + return this.now() - this.openedAt >= this.resetTimeoutMs; + } + + private retryAfterMs(): number { + if (this.state !== 'open') return 0; + return Math.max(0, this.resetTimeoutMs - (this.now() - this.openedAt)); + } +} diff --git a/harvest-finance/backend/src/stellar/utils/stellar-retry.ts b/harvest-finance/backend/src/stellar/utils/stellar-retry.ts index 7c968be6f..f87bcc3fe 100644 --- a/harvest-finance/backend/src/stellar/utils/stellar-retry.ts +++ b/harvest-finance/backend/src/stellar/utils/stellar-retry.ts @@ -1,6 +1,6 @@ /** * Decides whether a failure from Horizon (or the underlying HTTP client) - * is worth retrying. We retry transient/network-level failures only — + * is worth retrying. We retry transient/network-level failures only: * deterministic Stellar transaction rejections (carrying `result_codes`) * will fail again on retry and only waste fee-bumps and ledger sequence. */ @@ -17,20 +17,21 @@ export function isRetryableStellarError(err: unknown): boolean { message?: string; }; - // Horizon embeds `result_codes` when the transaction was evaluated and - // rejected by the Stellar network (e.g. tx_bad_seq, op_no_trust). - // These are deterministic: the same transaction will be rejected again - // regardless of how many times we retry, so retrying only burns fees - // and consumes sequence numbers unnecessarily. + // Horizon embeds `result_codes` when the transaction was evaluated by + // Stellar Core and rejected by the network (e.g. tx_bad_seq, op_no_trust). + // These are ledger/transaction outcomes rather than Horizon availability + // failures, so they must not count toward the Horizon circuit breaker. + // Some result codes may require a rebuild, later poll, or caller-specific + // recovery flow; an immediate generic network retry is the wrong layer. if (e.response?.data?.extras?.result_codes) return false; const status = e.response?.status; if (typeof status === 'number') { - // 429 Too Many Requests — Horizon is rate-limiting this client. + // 429 Too Many Requests: Horizon is rate-limiting this client. // The request itself is valid; we just need to back off and try again. if (status === 429) return true; - // 5xx Server / Gateway errors — the failure is on Horizon's side + // 5xx Server / Gateway errors: the failure is on Horizon's side // (overload, restart, upstream timeout, etc.), not in our transaction. // These are transient by nature and safe to retry after a delay. if (status >= 500 && status < 600) return true; @@ -42,7 +43,7 @@ export function isRetryableStellarError(err: unknown): boolean { return false; } - // No HTTP response at all → the request never reached Horizon. + // No HTTP response at all means the request never reached Horizon. // These Node.js / OS-level error codes represent transient network // conditions (connection dropped, DNS hiccup, unreachable host) that // are safe to retry once connectivity is restored.