diff --git a/backend/src/index.ts b/backend/src/index.ts index 951a817..b6f6204 100644 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -264,6 +264,16 @@ app.get('/api/admin/metrics/failed-items', createAdminLimiter(), adminAuth, asyn } }); +app.get('/api/admin/metrics/api-latency', createAdminLimiter(), adminAuth, async (req, res) => { + try { + const metrics = await monitoringService.getApiLatencyMetrics(); + res.json(metrics); + } catch (error) { + logger.error('Error fetching API latency metrics:', error); + res.status(500).json({ error: 'Failed to fetch API latency metrics' }); + } +}); + app.get('/api/admin/metrics/ops-summary', createAdminLimiter(), adminAuth, async (req, res) => { try { const w = req.query.window as string; @@ -271,7 +281,7 @@ app.get('/api/admin/metrics/ops-summary', createAdminLimiter(), adminAuth, async if (isNaN(windowHours) || windowHours < 1 || windowHours > 720) { return res.status(400).json({ error: 'window must be between 1 and 720 hours' }); } - const [subscriptions, renewals, activity, trials, throughput, latency, retries] = + const [subscriptions, renewals, activity, trials, throughput, latency, retries, apiLatency] = await Promise.all([ monitoringService.getSubscriptionMetrics(), monitoringService.getRenewalMetrics(), @@ -280,6 +290,7 @@ app.get('/api/admin/metrics/ops-summary', createAdminLimiter(), adminAuth, async monitoringService.getThroughputMetrics(windowHours), monitoringService.getLatencyMetrics(windowHours), monitoringService.getRetryMetrics(windowHours), + monitoringService.getApiLatencyMetrics(), ]); res.json({ generated_at: new Date().toISOString(), @@ -291,6 +302,7 @@ app.get('/api/admin/metrics/ops-summary', createAdminLimiter(), adminAuth, async throughput, latency, retries, + api_latency: apiLatency, db_pool: monitoringService.getPoolMetrics(), }); } catch (error) { diff --git a/backend/src/middleware/requestLogger.ts b/backend/src/middleware/requestLogger.ts index 5587e4b..b525113 100644 --- a/backend/src/middleware/requestLogger.ts +++ b/backend/src/middleware/requestLogger.ts @@ -1,5 +1,6 @@ import { Request, Response, NextFunction } from 'express'; import logger from '../config/logger'; +import { apiLatencyService } from '../services/api-latency-service'; /** * Logs the start and end of every HTTP request including: @@ -23,7 +24,7 @@ export function requestLoggerMiddleware( }); // Hook into the response 'finish' event to log completion - res.on('finish', () => { + res.on('finish', async () => { const durationMs = Date.now() - startMs; const level = res.statusCode >= 500 ? 'error' : res.statusCode >= 400 ? 'warn' @@ -35,6 +36,14 @@ export function requestLoggerMiddleware( statusCode: res.statusCode, durationMs, }); + + // Record latency for endpoint family + try { + const family = apiLatencyService.getEndpointFamily(req.method, req.path); + await apiLatencyService.recordLatency(family, durationMs); + } catch (error) { + logger.error('Failed to record API latency metric:', error); + } }); next(); diff --git a/backend/src/services/api-latency-service.ts b/backend/src/services/api-latency-service.ts new file mode 100644 index 0000000..c352a3a --- /dev/null +++ b/backend/src/services/api-latency-service.ts @@ -0,0 +1,154 @@ + +import { RateLimitRedisStore } from '../lib/redis-store'; +import logger from '../config/logger'; + +export interface EndpointLatencyMetrics { + family: string; + p50_ms: number; + p95_ms: number; + p99_ms: number; + avg_ms: number; + sample_count: number; +} + +interface LatencyStoreEntry { + latencies: number[]; + timestamp: number; +} + +export class ApiLatencyService { + private static instance: ApiLatencyService | null = null; + private memoryStore: Map = new Map(); + private readonly WINDOW_MS = 24 * 60 * 60 * 1000; // 24 hour window + private readonly MAX_SAMPLES_PER_FAMILY = 10000; + private readonly REDIS_KEY_PREFIX = 'api_latency:'; + + private constructor() {} + + static getInstance(): ApiLatencyService { + if (!ApiLatencyService.instance) { + ApiLatencyService.instance = new ApiLatencyService(); + } + return ApiLatencyService.instance; + } + + /** + * Normalize a request path to an endpoint family. + * Replaces dynamic path segments like IDs with placeholders. + */ + getEndpointFamily(method: string, path: string): string { + let normalizedPath = path; + + // Remove leading /api/ prefix if present + if (normalizedPath.startsWith('/api/')) { + normalizedPath = normalizedPath.slice(4); + } + + // Split into segments and replace UUIDs/numbers with placeholders + const segments = normalizedPath.split('/').map(segment => { + if (/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(segment)) { + return ':id'; + } + if (/^\d+$/.test(segment)) { + return ':id'; + } + return segment; + }); + + return `${method.toUpperCase()} /${segments.join('/')}`; + } + + /** + * Record a latency measurement for an endpoint family. + */ + async recordLatency(family: string, latencyMs: number): Promise { + try { + const redisStore = RateLimitRedisStore.getInstance(); + if (redisStore.isAvailable()) { + // TODO: Implement Redis-based storage once we have access to raw Redis client + this.recordToMemory(family, latencyMs); + } else { + this.recordToMemory(family, latencyMs); + } + } catch (error) { + logger.error('Failed to record API latency:', error); + } + } + + private recordToMemory(family: string, latencyMs: number): void { + const now = Date.now(); + const entry = this.memoryStore.get(family) || { latencies: [], timestamp: now }; + + // Clean up old entries + if (now - entry.timestamp > this.WINDOW_MS) { + entry.latencies = []; + entry.timestamp = now; + } + + // Add new latency + entry.latencies.push(latencyMs); + + // Trim to max samples + if (entry.latencies.length > this.MAX_SAMPLES_PER_FAMILY) { + entry.latencies = entry.latencies.slice(-this.MAX_SAMPLES_PER_FAMILY); + } + + this.memoryStore.set(family, entry); + } + + /** + * Compute percentile metrics for all endpoint families. + */ + async getLatencyMetrics(): Promise { + const metrics: EndpointLatencyMetrics[] = []; + const now = Date.now(); + + for (const [family, entry] of this.memoryStore.entries()) { + // Skip if entry is too old + if (now - entry.timestamp > this.WINDOW_MS) { + continue; + } + + const sorted = [...entry.latencies].sort((a, b) => a - b); + const percentiles = this.computePercentiles(sorted); + + metrics.push({ + family, + ...percentiles, + }); + } + + // Sort by sample count descending + return metrics.sort((a, b) => b.sample_count - a.sample_count); + } + + private computePercentiles(sorted: number[]): Omit { + if (sorted.length === 0) { + return { + p50_ms: 0, + p95_ms: 0, + p99_ms: 0, + avg_ms: 0, + sample_count: 0, + }; + } + + const at = (pct: number) => { + const idx = Math.min(Math.ceil((pct / 100) * sorted.length) - 1, sorted.length - 1); + return sorted[Math.max(0, idx)]; + }; + + const avg = sorted.reduce((sum, val) => sum + val, 0) / sorted.length; + + return { + p50_ms: at(50), + p95_ms: at(95), + p99_ms: at(99), + avg_ms: Math.round(avg), + sample_count: sorted.length, + }; + } +} + +export const apiLatencyService = ApiLatencyService.getInstance(); + diff --git a/backend/src/services/monitoring-service.ts b/backend/src/services/monitoring-service.ts index 3ea60a1..46d0179 100644 --- a/backend/src/services/monitoring-service.ts +++ b/backend/src/services/monitoring-service.ts @@ -1,6 +1,7 @@ import { supabase, monitorPool, PoolMetrics } from '../config/database'; import logger from '../config/logger'; import { ExternalServiceClient, ServiceMetrics } from '../utils/external-service-client'; +import { apiLatencyService, EndpointLatencyMetrics } from './api-latency-service'; // ─── Existing interfaces ──────────────────────────────────────────────────── @@ -652,6 +653,13 @@ export class MonitoringService { } })(), contextId); } + + /** + * Get API latency percentiles per endpoint family. + */ + async getApiLatencyMetrics(): Promise { + return apiLatencyService.getLatencyMetrics(); + } } export const monitoringService = new MonitoringService(); diff --git a/backend/tests/monitoring-api.test.ts b/backend/tests/monitoring-api.test.ts index 1f7117d..10d37f1 100644 --- a/backend/tests/monitoring-api.test.ts +++ b/backend/tests/monitoring-api.test.ts @@ -15,6 +15,7 @@ jest.mock('../src/services/monitoring-service', () => ({ getRetryMetrics: jest.fn(), getFailedItems: jest.fn(), getPoolMetrics: jest.fn(), + getApiLatencyMetrics: jest.fn(), }, })); @@ -129,13 +130,22 @@ app.get('/api/admin/metrics/failed-items', adminAuth, async (req, res) => { } }); +app.get('/api/admin/metrics/api-latency', adminAuth, async (_req, res) => { + try { + const metrics = await monitoringService.getApiLatencyMetrics(); + res.json(metrics); + } catch { + res.status(500).json({ error: 'Failed to fetch API latency metrics' }); + } +}); + app.get('/api/admin/metrics/ops-summary', adminAuth, async (req, res) => { try { const windowHours = parseInt(req.query.window as string) || 24; if (windowHours < 1 || windowHours > 720) { return res.status(400).json({ error: 'window must be between 1 and 720 hours' }); } - const [subscriptions, renewals, activity, trials, throughput, latency, retries] = + const [subscriptions, renewals, activity, trials, throughput, latency, retries, apiLatency] = await Promise.all([ monitoringService.getSubscriptionMetrics(), monitoringService.getRenewalMetrics(), @@ -144,6 +154,7 @@ app.get('/api/admin/metrics/ops-summary', adminAuth, async (req, res) => { monitoringService.getThroughputMetrics(windowHours), monitoringService.getLatencyMetrics(windowHours), monitoringService.getRetryMetrics(windowHours), + monitoringService.getApiLatencyMetrics(), ]); res.json({ generated_at: new Date().toISOString(), @@ -155,6 +166,7 @@ app.get('/api/admin/metrics/ops-summary', adminAuth, async (req, res) => { throughput, latency, retries, + api_latency: apiLatency, db_pool: monitoringService.getPoolMetrics(), }); } catch { @@ -167,6 +179,60 @@ app.get('/api/admin/metrics/ops-summary', adminAuth, async (req, res) => { const AUTH = { 'x-admin-api-key': 'test-admin-key' }; const WRONG_AUTH = { 'x-admin-api-key': 'wrong-key' }; +// ── GET /api/admin/metrics/api-latency ─────────────────────────────────────── + +describe('GET /api/admin/metrics/api-latency', () => { + const API_LATENCY_FIXTURE = [ + { + family: 'GET /subscriptions', + p50_ms: 50, + p95_ms: 150, + p99_ms: 300, + avg_ms: 75, + sample_count: 100, + }, + { + family: 'POST /webhooks', + p50_ms: 100, + p95_ms: 200, + p99_ms: 400, + avg_ms: 120, + sample_count: 50, + }, + ]; + + it('returns 401 without auth key', async () => { + const res = await request(app).get('/api/admin/metrics/api-latency'); + expect(res.status).toBe(401); + }); + + it('returns 401 with wrong auth key', async () => { + const res = await request(app) + .get('/api/admin/metrics/api-latency') + .set(WRONG_AUTH); + expect(res.status).toBe(401); + }); + + it('returns 200 with correct shape', async () => { + (monitoringService.getApiLatencyMetrics as jest.Mock).mockResolvedValue(API_LATENCY_FIXTURE); + + const res = await request(app).get('/api/admin/metrics/api-latency').set(AUTH); + + expect(res.status).toBe(200); + expect(res.body).toHaveLength(2); + expect(res.body[0].family).toBe('GET /subscriptions'); + expect(res.body[0].p50_ms).toBe(50); + expect(res.body[0].sample_count).toBe(100); + }); + + it('returns 500 when service throws', async () => { + (monitoringService.getApiLatencyMetrics as jest.Mock).mockRejectedValue(new Error('fail')); + + const res = await request(app).get('/api/admin/metrics/api-latency').set(AUTH); + expect(res.status).toBe(500); + }); +}); + // ── Original access-control suite ──────────────────────────────────────────── describe('Monitoring API Access Control', () => { @@ -498,6 +564,9 @@ describe('GET /api/admin/metrics/ops-summary', () => { (monitoringService.getLatencyMetrics as jest.Mock).mockResolvedValue({ notification_delivery_latency: { p50_ms: 100 } }); (monitoringService.getRetryMetrics as jest.Mock).mockResolvedValue({ total_retried: 1 }); (monitoringService.getPoolMetrics as jest.Mock).mockReturnValue({ activeConnections: 0 }); + (monitoringService.getApiLatencyMetrics as jest.Mock).mockResolvedValue([ + { family: 'GET /subscriptions', p50_ms: 50, sample_count: 10 }, + ]); }; it('returns 401 without auth key', async () => { @@ -524,8 +593,10 @@ describe('GET /api/admin/metrics/ops-summary', () => { expect(res.body).toHaveProperty('throughput'); expect(res.body).toHaveProperty('latency'); expect(res.body).toHaveProperty('retries'); + expect(res.body).toHaveProperty('api_latency'); expect(res.body).toHaveProperty('db_pool'); expect(res.body.subscriptions.total_subscriptions).toBe(5); + expect(res.body.api_latency).toHaveLength(1); }); it('accepts custom window query param and passes it to all sub-metrics', async () => { diff --git a/contracts/Cargo.lock b/contracts/Cargo.lock index 7d28b28..1645608 100644 --- a/contracts/Cargo.lock +++ b/contracts/Cargo.lock @@ -2,6 +2,13 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "agent-registry" +version = "0.1.0" +dependencies = [ + "soroban-sdk", +] + [[package]] name = "ahash" version = "0.8.12"