Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion backend/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,24 @@ app.get('/api/admin/metrics/failed-items', createAdminLimiter(), adminAuth, asyn
}
});

app.get('/api/admin/metrics/api-latency', createAdminLimiter(), adminAuth, async (req, res) => {
try {
const metrics = await monitoringService.getApiLatencyMetrics();
res.json(metrics);
} catch (error) {
logger.error('Error fetching API latency metrics:', error);
res.status(500).json({ error: 'Failed to fetch API latency metrics' });
}
});

app.get('/api/admin/metrics/ops-summary', createAdminLimiter(), adminAuth, async (req, res) => {
try {
const w = req.query.window as string;
const windowHours = w ? parseInt(w, 10) : 24;
if (isNaN(windowHours) || windowHours < 1 || windowHours > 720) {
return res.status(400).json({ error: 'window must be between 1 and 720 hours' });
}
const [subscriptions, renewals, activity, trials, throughput, latency, retries] =
const [subscriptions, renewals, activity, trials, throughput, latency, retries, apiLatency] =
await Promise.all([
monitoringService.getSubscriptionMetrics(),
monitoringService.getRenewalMetrics(),
Expand All @@ -280,6 +290,7 @@ app.get('/api/admin/metrics/ops-summary', createAdminLimiter(), adminAuth, async
monitoringService.getThroughputMetrics(windowHours),
monitoringService.getLatencyMetrics(windowHours),
monitoringService.getRetryMetrics(windowHours),
monitoringService.getApiLatencyMetrics(),
]);
res.json({
generated_at: new Date().toISOString(),
Expand All @@ -291,6 +302,7 @@ app.get('/api/admin/metrics/ops-summary', createAdminLimiter(), adminAuth, async
throughput,
latency,
retries,
api_latency: apiLatency,
db_pool: monitoringService.getPoolMetrics(),
});
} catch (error) {
Expand Down
11 changes: 10 additions & 1 deletion backend/src/middleware/requestLogger.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { Request, Response, NextFunction } from 'express';
import logger from '../config/logger';
import { apiLatencyService } from '../services/api-latency-service';

/**
* Logs the start and end of every HTTP request including:
Expand All @@ -23,7 +24,7 @@ export function requestLoggerMiddleware(
});

// Hook into the response 'finish' event to log completion
res.on('finish', () => {
res.on('finish', async () => {
const durationMs = Date.now() - startMs;
const level = res.statusCode >= 500 ? 'error'
: res.statusCode >= 400 ? 'warn'
Expand All @@ -35,6 +36,14 @@ export function requestLoggerMiddleware(
statusCode: res.statusCode,
durationMs,
});

// Record latency for endpoint family
try {
const family = apiLatencyService.getEndpointFamily(req.method, req.path);
await apiLatencyService.recordLatency(family, durationMs);
} catch (error) {
logger.error('Failed to record API latency metric:', error);
}
});

next();
Expand Down
154 changes: 154 additions & 0 deletions backend/src/services/api-latency-service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@

import { RateLimitRedisStore } from '../lib/redis-store';
import logger from '../config/logger';

export interface EndpointLatencyMetrics {
family: string;
p50_ms: number;
p95_ms: number;
p99_ms: number;
avg_ms: number;
sample_count: number;
}

interface LatencyStoreEntry {
latencies: number[];
timestamp: number;
}

export class ApiLatencyService {
private static instance: ApiLatencyService | null = null;
private memoryStore: Map<string, LatencyStoreEntry> = new Map();
private readonly WINDOW_MS = 24 * 60 * 60 * 1000; // 24 hour window
private readonly MAX_SAMPLES_PER_FAMILY = 10000;
private readonly REDIS_KEY_PREFIX = 'api_latency:';

private constructor() {}

static getInstance(): ApiLatencyService {
if (!ApiLatencyService.instance) {
ApiLatencyService.instance = new ApiLatencyService();
}
return ApiLatencyService.instance;
}

/**
* Normalize a request path to an endpoint family.
* Replaces dynamic path segments like IDs with placeholders.
*/
getEndpointFamily(method: string, path: string): string {
let normalizedPath = path;

// Remove leading /api/ prefix if present
if (normalizedPath.startsWith('/api/')) {
normalizedPath = normalizedPath.slice(4);
}

// Split into segments and replace UUIDs/numbers with placeholders
const segments = normalizedPath.split('/').map(segment => {
if (/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(segment)) {
return ':id';
}
if (/^\d+$/.test(segment)) {
return ':id';
}
return segment;
});

return `${method.toUpperCase()} /${segments.join('/')}`;
}

/**
* Record a latency measurement for an endpoint family.
*/
async recordLatency(family: string, latencyMs: number): Promise<void> {
try {
const redisStore = RateLimitRedisStore.getInstance();
if (redisStore.isAvailable()) {
// TODO: Implement Redis-based storage once we have access to raw Redis client
this.recordToMemory(family, latencyMs);
} else {
this.recordToMemory(family, latencyMs);
}
} catch (error) {
logger.error('Failed to record API latency:', error);
}
}

private recordToMemory(family: string, latencyMs: number): void {
const now = Date.now();
const entry = this.memoryStore.get(family) || { latencies: [], timestamp: now };

// Clean up old entries
if (now - entry.timestamp > this.WINDOW_MS) {
entry.latencies = [];
entry.timestamp = now;
}

// Add new latency
entry.latencies.push(latencyMs);

// Trim to max samples
if (entry.latencies.length > this.MAX_SAMPLES_PER_FAMILY) {
entry.latencies = entry.latencies.slice(-this.MAX_SAMPLES_PER_FAMILY);
}

this.memoryStore.set(family, entry);
}

/**
* Compute percentile metrics for all endpoint families.
*/
async getLatencyMetrics(): Promise<EndpointLatencyMetrics[]> {
const metrics: EndpointLatencyMetrics[] = [];
const now = Date.now();

for (const [family, entry] of this.memoryStore.entries()) {
// Skip if entry is too old
if (now - entry.timestamp > this.WINDOW_MS) {
continue;
}

const sorted = [...entry.latencies].sort((a, b) => a - b);
const percentiles = this.computePercentiles(sorted);

metrics.push({
family,
...percentiles,
});
}

// Sort by sample count descending
return metrics.sort((a, b) => b.sample_count - a.sample_count);
}

private computePercentiles(sorted: number[]): Omit<EndpointLatencyMetrics, 'family'> {
if (sorted.length === 0) {
return {
p50_ms: 0,
p95_ms: 0,
p99_ms: 0,
avg_ms: 0,
sample_count: 0,
};
}

const at = (pct: number) => {
const idx = Math.min(Math.ceil((pct / 100) * sorted.length) - 1, sorted.length - 1);
return sorted[Math.max(0, idx)];
};

const avg = sorted.reduce((sum, val) => sum + val, 0) / sorted.length;

return {
p50_ms: at(50),
p95_ms: at(95),
p99_ms: at(99),
avg_ms: Math.round(avg),
sample_count: sorted.length,
};
}
}

export const apiLatencyService = ApiLatencyService.getInstance();

8 changes: 8 additions & 0 deletions backend/src/services/monitoring-service.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { supabase, monitorPool, PoolMetrics } from '../config/database';
import logger from '../config/logger';
import { ExternalServiceClient, ServiceMetrics } from '../utils/external-service-client';
import { apiLatencyService, EndpointLatencyMetrics } from './api-latency-service';

// ─── Existing interfaces ────────────────────────────────────────────────────

Expand Down Expand Up @@ -652,6 +653,13 @@ export class MonitoringService {
}
})(), contextId);
}

/**
* Get API latency percentiles per endpoint family.
*/
async getApiLatencyMetrics(): Promise<EndpointLatencyMetrics[]> {
return apiLatencyService.getLatencyMetrics();
}
}

export const monitoringService = new MonitoringService();
73 changes: 72 additions & 1 deletion backend/tests/monitoring-api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ jest.mock('../src/services/monitoring-service', () => ({
getRetryMetrics: jest.fn(),
getFailedItems: jest.fn(),
getPoolMetrics: jest.fn(),
getApiLatencyMetrics: jest.fn(),
},
}));

Expand Down Expand Up @@ -129,13 +130,22 @@ app.get('/api/admin/metrics/failed-items', adminAuth, async (req, res) => {
}
});

app.get('/api/admin/metrics/api-latency', adminAuth, async (_req, res) => {
try {
const metrics = await monitoringService.getApiLatencyMetrics();
res.json(metrics);
} catch {
res.status(500).json({ error: 'Failed to fetch API latency metrics' });
}
});

app.get('/api/admin/metrics/ops-summary', adminAuth, async (req, res) => {
try {
const windowHours = parseInt(req.query.window as string) || 24;
if (windowHours < 1 || windowHours > 720) {
return res.status(400).json({ error: 'window must be between 1 and 720 hours' });
}
const [subscriptions, renewals, activity, trials, throughput, latency, retries] =
const [subscriptions, renewals, activity, trials, throughput, latency, retries, apiLatency] =
await Promise.all([
monitoringService.getSubscriptionMetrics(),
monitoringService.getRenewalMetrics(),
Expand All @@ -144,6 +154,7 @@ app.get('/api/admin/metrics/ops-summary', adminAuth, async (req, res) => {
monitoringService.getThroughputMetrics(windowHours),
monitoringService.getLatencyMetrics(windowHours),
monitoringService.getRetryMetrics(windowHours),
monitoringService.getApiLatencyMetrics(),
]);
res.json({
generated_at: new Date().toISOString(),
Expand All @@ -155,6 +166,7 @@ app.get('/api/admin/metrics/ops-summary', adminAuth, async (req, res) => {
throughput,
latency,
retries,
api_latency: apiLatency,
db_pool: monitoringService.getPoolMetrics(),
});
} catch {
Expand All @@ -167,6 +179,60 @@ app.get('/api/admin/metrics/ops-summary', adminAuth, async (req, res) => {
const AUTH = { 'x-admin-api-key': 'test-admin-key' };
const WRONG_AUTH = { 'x-admin-api-key': 'wrong-key' };

// ── GET /api/admin/metrics/api-latency ───────────────────────────────────────

describe('GET /api/admin/metrics/api-latency', () => {
const API_LATENCY_FIXTURE = [
{
family: 'GET /subscriptions',
p50_ms: 50,
p95_ms: 150,
p99_ms: 300,
avg_ms: 75,
sample_count: 100,
},
{
family: 'POST /webhooks',
p50_ms: 100,
p95_ms: 200,
p99_ms: 400,
avg_ms: 120,
sample_count: 50,
},
];

it('returns 401 without auth key', async () => {
const res = await request(app).get('/api/admin/metrics/api-latency');
expect(res.status).toBe(401);
});

it('returns 401 with wrong auth key', async () => {
const res = await request(app)
.get('/api/admin/metrics/api-latency')
.set(WRONG_AUTH);
expect(res.status).toBe(401);
});

it('returns 200 with correct shape', async () => {
(monitoringService.getApiLatencyMetrics as jest.Mock).mockResolvedValue(API_LATENCY_FIXTURE);

const res = await request(app).get('/api/admin/metrics/api-latency').set(AUTH);

expect(res.status).toBe(200);
expect(res.body).toHaveLength(2);
expect(res.body[0].family).toBe('GET /subscriptions');
expect(res.body[0].p50_ms).toBe(50);
expect(res.body[0].sample_count).toBe(100);
});

it('returns 500 when service throws', async () => {
(monitoringService.getApiLatencyMetrics as jest.Mock).mockRejectedValue(new Error('fail'));

const res = await request(app).get('/api/admin/metrics/api-latency').set(AUTH);
expect(res.status).toBe(500);
});
});

// ── Original access-control suite ────────────────────────────────────────────

describe('Monitoring API Access Control', () => {
Expand Down Expand Up @@ -498,6 +564,9 @@ describe('GET /api/admin/metrics/ops-summary', () => {
(monitoringService.getLatencyMetrics as jest.Mock).mockResolvedValue({ notification_delivery_latency: { p50_ms: 100 } });
(monitoringService.getRetryMetrics as jest.Mock).mockResolvedValue({ total_retried: 1 });
(monitoringService.getPoolMetrics as jest.Mock).mockReturnValue({ activeConnections: 0 });
(monitoringService.getApiLatencyMetrics as jest.Mock).mockResolvedValue([
{ family: 'GET /subscriptions', p50_ms: 50, sample_count: 10 },
]);
};

it('returns 401 without auth key', async () => {
Expand All @@ -524,8 +593,10 @@ describe('GET /api/admin/metrics/ops-summary', () => {
expect(res.body).toHaveProperty('throughput');
expect(res.body).toHaveProperty('latency');
expect(res.body).toHaveProperty('retries');
expect(res.body).toHaveProperty('api_latency');
expect(res.body).toHaveProperty('db_pool');
expect(res.body.subscriptions.total_subscriptions).toBe(5);
expect(res.body.api_latency).toHaveLength(1);
});

it('accepts custom window query param and passes it to all sub-metrics', async () => {
Expand Down
7 changes: 7 additions & 0 deletions contracts/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading