diff --git a/docs/analytics.md b/docs/analytics.md index 36623dc6..fab40771 100644 --- a/docs/analytics.md +++ b/docs/analytics.md @@ -5,6 +5,7 @@ This document describes the lightweight analytics and cost-tracking additions. Endpoints: - POST /analytics/event - record a feature event (body: { category, action, label?, value? }) +- POST /metrics/cost - record an hourly infrastructure cost event (body: { amountUsd }) - GET /monitoring/cost/summary - returns last 24h estimated spend and avg hourly cost if enabled Metrics added (Prometheus): diff --git a/docs/monitoring-dashboard.md b/docs/monitoring-dashboard.md index 569e5843..b0a8c066 100644 --- a/docs/monitoring-dashboard.md +++ b/docs/monitoring-dashboard.md @@ -3,11 +3,11 @@ This document describes the Grafana monitoring dashboard for the teachLink backend, the panels it ships with, and the alerts that fire from it. -The backend exports metrics in Prometheus format from -`src/observability/observability.controller.ts` at: +The backend exports metrics in Prometheus format from the active backend +scrape endpoint at: ``` -GET /observability/metrics/export/prometheus +GET /metrics ``` Prometheus scrapes that endpoint, Grafana visualizes the metrics, and diff --git a/infra/monitoring/README.md b/infra/monitoring/README.md index 11e9eeac..7e70126a 100644 --- a/infra/monitoring/README.md +++ b/infra/monitoring/README.md @@ -2,7 +2,7 @@ Self-contained Prometheus + Alertmanager + Grafana stack for the teachLink backend. Scrapes the Prometheus exporter served from -`/observability/metrics/export/prometheus`. +`/metrics`. See [`docs/monitoring-dashboard.md`](../../docs/monitoring-dashboard.md) for the full guide and runbook. diff --git a/src/analytics/analytics.controller.spec.ts b/src/analytics/analytics.controller.spec.ts new file mode 100644 index 00000000..fcb73b1e --- /dev/null +++ b/src/analytics/analytics.controller.spec.ts @@ -0,0 +1,49 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { AnalyticsController } from './analytics.controller'; +import { AnalyticsService } from './analytics.service'; +import { CreateEventDto } from './dto/create-event.dto'; +import { EventType } from './entities/event.entity'; + +const mockAnalyticsService = { + trackEvent: jest.fn(), + getEvents: jest.fn(), + getAnalyticsSummary: jest.fn(), +}; + +describe('AnalyticsController', () => { + let controller: AnalyticsController; + + beforeEach(async () => { + jest.clearAllMocks(); + + const module: TestingModule = await Test.createTestingModule({ + controllers: [AnalyticsController], + providers: [ + { provide: AnalyticsService, useValue: mockAnalyticsService }, + ], + }).compile(); + + controller = module.get(AnalyticsController); + }); + + it('should record a compatibility analytics event on POST /analytics/event', async () => { + const dto: CreateEventDto = { + category: 'feature', + action: 'launch_button_clicked', + }; + + const req = { + ip: '127.0.0.1', + get: jest.fn().mockReturnValue('super-agent'), + } as any; + + await expect(controller.trackEventCompatibility(dto, req)).resolves.toEqual({ success: true }); + expect(mockAnalyticsService.trackEvent).toHaveBeenCalledWith({ + ...dto, + eventType: EventType.CUSTOM, + userId: undefined, + ipAddress: '127.0.0.1', + userAgent: 'super-agent', + }); + }); +}); diff --git a/src/analytics/analytics.controller.ts b/src/analytics/analytics.controller.ts index 3fa0466d..28476bed 100644 --- a/src/analytics/analytics.controller.ts +++ b/src/analytics/analytics.controller.ts @@ -30,6 +30,21 @@ export class AnalyticsController { return { success: true }; } + @Post('event') + @ApiOperation({ summary: 'Track a feature event (compatibility endpoint)' }) + @ApiResponse({ status: 201, description: 'Feature event tracked successfully' }) + async trackEventCompatibility(@Body() dto: CreateEventDto, @Request() req: any): Promise<{ success: boolean }> { + await this.analyticsService.trackEvent({ + ...dto, + eventType: EventType.CUSTOM, + userId: req.user?.id, + ipAddress: req.ip, + userAgent: req.get('user-agent'), + }); + + return { success: true }; + } + /** * Get analytics events with filtering */ diff --git a/src/analytics/analytics.service.ts b/src/analytics/analytics.service.ts index 15402b89..c0e311ce 100644 --- a/src/analytics/analytics.service.ts +++ b/src/analytics/analytics.service.ts @@ -1,12 +1,8 @@ -import { Logger } from '@nestjs/common'; +import { Injectable, Logger, BadRequestException, OnModuleInit } from '@nestjs/common'; import { Counter, Histogram } from 'prom-client'; -import { Logger, BadRequestException } from '@nestjs/common'; -import { Counter, Histogram } from 'prom-client'; -import { Injectable, Logger, BadRequestException } from '@nestjs/common'; import { InjectRepository } from '@nestjs/typeorm'; import { Repository } from 'typeorm'; import { AnalyticsEvent, EventType } from './entities/event.entity'; -import { Injectable, Logger, OnModuleInit } from '@nestjs/common'; import { MetricsCollectionService } from '../monitoring/metrics/metrics-collection.service'; import { EventBatchingService, ITrackEventDto } from './services/event-batching.service'; import { EventValidationService } from './services/event-validation.service'; @@ -14,120 +10,51 @@ import { EventValidationService } from './services/event-validation.service'; @Injectable() export class AnalyticsService implements OnModuleInit { private readonly logger = new Logger(AnalyticsService.name); + private featureEventsCounter: Counter<'category' | 'action' | 'eventType'> | null = null; + private assessmentDuration: Histogram<'status'> | null = null; - private readonly featureEvents: Counter<'category' | 'action' | 'label'> | null; - private readonly assessmentDuration: Histogram<'status'> | null; - - constructor(private readonly metrics: MetricsCollectionService) { - const registry = this.metrics.getRegistry(); - - this.featureEvents = this.registerMetric(() => - (registry.getSingleMetric('feature_events_total') as Counter<'category' | 'action' | 'label'>) ?? - new Counter({ - name: 'feature_events_total', - help: 'Feature analytics events', - labelNames: ['category', 'action', 'label'] as const, - registers: [registry], - }), - ); - - this.assessmentDuration = this.registerMetric(() => - (registry.getSingleMetric('assessment_duration_seconds') as Histogram<'status'>) ?? - new Histogram({ - name: 'assessment_duration_seconds', - help: 'Time from attempt start to submission or timeout, in seconds', - labelNames: ['status'] as const, - buckets: [30, 60, 120, 300, 600, 1200, 1800], - registers: [registry], - }), - ); - } - - // ── Generic event recording ──────────────────────────────────────────────── - - recordEvent(category: string, action: string, label = '', value = 1): void { - try { - this.featureEvents?.inc({ category, action, label }, value); - } catch (err) { - this.logger.error( - `Failed to record analytics event: ${category}.${action}`, - err as Error, - ); - } - } - - // ── Assessment-domain events ─────────────────────────────────────────────── - - recordAssessmentStarted(assessmentId: string): void { - this.recordEvent('assessment', 'started', assessmentId); - } - - recordAssessmentSubmitted(assessmentId: string, startedAt: Date): void { - this.recordEvent('assessment', 'submitted', assessmentId); - this.observeDuration(startedAt, 'submitted'); - } - - recordAssessmentTimedOut(assessmentId: string, startedAt: Date): void { - this.recordEvent('assessment', 'timed_out', assessmentId); - this.observeDuration(startedAt, 'timed_out'); - } - - recordAssessmentScore(score: number, maxScore: number): void { - const pct = maxScore > 0 ? Math.round((score / maxScore) * 100) : 0; - this.recordEvent('assessment', 'score_recorded', '', pct); - } - - // ── Private helpers ──────────────────────────────────────────────────────── - - private observeDuration(startedAt: Date, status: string): void { - try { - const seconds = (Date.now() - startedAt.getTime()) / 1000; - this.assessmentDuration?.observe({ status }, seconds); constructor( @InjectRepository(AnalyticsEvent) - private eventRepository: Repository, + private readonly eventRepository: Repository, private readonly metrics: MetricsCollectionService, private readonly batchingService: EventBatchingService, private readonly validationService: EventValidationService, - ) { - constructor(private readonly metrics: MetricsCollectionService) {} + ) {} - async onModuleInit() { + async onModuleInit(): Promise { try { const registry = this.metrics.getRegistry(); - // Lazy import prom-client to avoid import cycles const prom = await import('prom-client'); - // Create a shared counter for feature events with labels this.featureEventsCounter = - registry.getSingleMetric('feature_events_total') || + (registry.getSingleMetric('feature_events_total') as Counter<'category' | 'action' | 'eventType'>) ?? new prom.Counter({ name: 'feature_events_total', help: 'Feature analytics events', - labelNames: ['category', 'action', 'eventType'], + labelNames: ['category', 'action', 'eventType'] as const, + registers: [registry], + }); + + this.assessmentDuration = + (registry.getSingleMetric('assessment_duration_seconds') as Histogram<'status'>) ?? + new prom.Histogram({ + name: 'assessment_duration_seconds', + help: 'Time from attempt start to submission or timeout, in seconds', + labelNames: ['status'] as const, + buckets: [30, 60, 120, 300, 600, 1200, 1800], registers: [registry], }); } catch (err) { - this.logger.error('Failed to observe assessment duration', err as Error); + this.logger.error('Failed to initialize analytics metrics', err as Error); + this.featureEventsCounter = null; + this.assessmentDuration = null; } } - /** - * Wraps metric construction in a try/catch so a misconfigured registry - * (e.g. duplicate registration in tests) degrades to a null metric rather - * than crashing the service on startup. - */ - private registerMetric(factory: () => T): T | null { - try { - return factory(); - * Track an event with full validation and batching - */ async trackEvent(dto: ITrackEventDto): Promise { try { - // Validate event this.validationService.validateEventOrThrow(dto); - // Create event entity const event = new AnalyticsEvent(); event.eventType = dto.eventType; event.category = dto.category; @@ -142,16 +69,17 @@ export class AnalyticsService implements OnModuleInit { event.userAgent = dto.userAgent; event.timestamp = new Date(); - // Add to batch for processing this.batchingService.addEvent(event); - // Record Prometheus metrics if (this.featureEventsCounter) { - this.featureEventsCounter.inc({ - category: dto.category, - action: dto.action, - eventType: dto.eventType, - }); + this.featureEventsCounter.inc( + { + category: dto.category, + action: dto.action, + eventType: dto.eventType, + }, + dto.value ?? 1, + ); } this.logger.debug(`Event tracked: ${dto.eventType} - ${dto.category}.${dto.action}`); @@ -164,29 +92,41 @@ export class AnalyticsService implements OnModuleInit { } } - /** - * Legacy method for backward compatibility with Prometheus metrics only - */ - recordEvent(category: string, action: string, label?: string, value?: number): void { + recordEvent(category: string, action: string, label = '', value = 1): void { try { if (this.featureEventsCounter) { this.featureEventsCounter.inc( { category, action, eventType: EventType.CUSTOM }, - value ?? 1, + value, ); - } else { - this.logger.debug(`Analytics event (log only): ${category}.${action} value=${value}`); } } catch (err) { - this.logger.warn('Could not register metric; proceeding without it', err as Error); - return null; + this.logger.error( + `Failed to record analytics event: ${category}.${action}`, + err as Error, + ); } } -} - /** - * Query events with filters - */ + recordAssessmentStarted(assessmentId: string): void { + this.recordEvent('assessment', 'started', assessmentId); + } + + recordAssessmentSubmitted(assessmentId: string, startedAt: Date): void { + this.recordEvent('assessment', 'submitted', assessmentId); + this.observeDuration(startedAt, 'submitted'); + } + + recordAssessmentTimedOut(assessmentId: string, startedAt: Date): void { + this.recordEvent('assessment', 'timed_out', assessmentId); + this.observeDuration(startedAt, 'timed_out'); + } + + recordAssessmentScore(score: number, maxScore: number): void { + const pct = maxScore > 0 ? Math.round((score / maxScore) * 100) : 0; + this.recordEvent('assessment', 'score_recorded', '', pct); + } + async getEvents(filters: { eventType?: EventType; userId?: string; @@ -219,20 +159,12 @@ export class AnalyticsService implements OnModuleInit { } query.orderBy('event.timestamp', 'DESC'); - - const limit = filters.limit || 100; - const offset = filters.offset || 0; - - query.take(limit).skip(offset); + query.take(filters.limit ?? 100).skip(filters.offset ?? 0); const [events, total] = await query.getManyAndCount(); - return { events, total }; } - /** - * Get event analytics summary - */ async getAnalyticsSummary( startDate: Date, endDate: Date, @@ -242,12 +174,11 @@ export class AnalyticsService implements OnModuleInit { eventsByCategory: Record; topActions: Array<{ action: string; count: number }>; }> { - const query = this.eventRepository.createQueryBuilder('event'); - - query.where('event.timestamp >= :startDate', { startDate }); - query.andWhere('event.timestamp <= :endDate', { endDate }); - - const totalEvents = await query.getCount(); + const totalEvents = await this.eventRepository + .createQueryBuilder('event') + .where('event.timestamp >= :startDate', { startDate }) + .andWhere('event.timestamp <= :endDate', { endDate }) + .getCount(); const eventsByType = await this.eventRepository .createQueryBuilder('event') @@ -285,4 +216,13 @@ export class AnalyticsService implements OnModuleInit { topActions: topActions.map((e) => ({ action: e.action, count: e.count })), }; } + + private observeDuration(startedAt: Date, status: string): void { + try { + const seconds = (Date.now() - startedAt.getTime()) / 1000; + this.assessmentDuration?.observe({ status }, seconds); + } catch (err) { + this.logger.error('Failed to observe assessment duration', err as Error); + } + } } diff --git a/src/monitoring/cost.controller.spec.ts b/src/monitoring/cost.controller.spec.ts new file mode 100644 index 00000000..9b556e69 --- /dev/null +++ b/src/monitoring/cost.controller.spec.ts @@ -0,0 +1,40 @@ +import { Test, TestingModule } from '@nestjs/testing'; +import { CostController } from './cost.controller'; +import { CostTrackingService } from './cost-tracking.service'; + +const mockCostTrackingService = { + recordHourlyCost: jest.fn(), + getLast24hCost: jest.fn().mockReturnValue(12.5), + getAverageHourlyCost: jest.fn().mockReturnValue(0.52), +}; + +describe('CostController', () => { + let controller: CostController; + + beforeEach(async () => { + jest.clearAllMocks(); + + const module: TestingModule = await Test.createTestingModule({ + controllers: [CostController], + providers: [ + { provide: CostTrackingService, useValue: mockCostTrackingService }, + ], + }).compile(); + + controller = module.get(CostController); + }); + + it('should record a cost event', async () => { + await expect(controller.recordCost({ amountUsd: 5 })).resolves.toEqual({ success: true }); + expect(mockCostTrackingService.recordHourlyCost).toHaveBeenCalledWith(5); + }); + + it('should return cost summary', async () => { + await expect(controller.getCostSummary()).resolves.toEqual({ + last24hUsd: 12.5, + avgHourlyUsd: 0.52, + }); + expect(mockCostTrackingService.getLast24hCost).toHaveBeenCalled(); + expect(mockCostTrackingService.getAverageHourlyCost).toHaveBeenCalled(); + }); +}); diff --git a/src/monitoring/cost.controller.ts b/src/monitoring/cost.controller.ts new file mode 100644 index 00000000..820ba8d9 --- /dev/null +++ b/src/monitoring/cost.controller.ts @@ -0,0 +1,28 @@ +import { Body, Controller, Get, Post } from '@nestjs/common'; +import { ApiTags, ApiOperation, ApiResponse } from '@nestjs/swagger'; +import { CostTrackingService } from './cost-tracking.service'; +import { RecordCostDto } from './dto/record-cost.dto'; + +@ApiTags('Metrics') +@Controller() +export class CostController { + constructor(private readonly costTrackingService: CostTrackingService) {} + + @Post('metrics/cost') + @ApiOperation({ summary: 'Record an hourly infrastructure cost event' }) + @ApiResponse({ status: 201, description: 'Cost recorded successfully' }) + async recordCost(@Body() dto: RecordCostDto): Promise<{ success: boolean }> { + await this.costTrackingService.recordHourlyCost(dto.amountUsd); + return { success: true }; + } + + @Get('monitoring/cost/summary') + @ApiOperation({ summary: 'Get 24h infrastructure cost summary' }) + @ApiResponse({ status: 200, description: 'Returns last 24h spend and average hourly cost' }) + async getCostSummary(): Promise<{ last24hUsd: number; avgHourlyUsd: number }> { + return { + last24hUsd: this.costTrackingService.getLast24hCost(), + avgHourlyUsd: this.costTrackingService.getAverageHourlyCost(), + }; + } +} diff --git a/src/monitoring/dto/record-cost.dto.ts b/src/monitoring/dto/record-cost.dto.ts new file mode 100644 index 00000000..7372c4b0 --- /dev/null +++ b/src/monitoring/dto/record-cost.dto.ts @@ -0,0 +1,7 @@ +import { IsNumber, Min } from 'class-validator'; + +export class RecordCostDto { + @IsNumber() + @Min(0) + amountUsd: number; +} diff --git a/src/monitoring/metrics/prometheus.controller.spec.ts b/src/monitoring/metrics/prometheus.controller.spec.ts index d0f0859d..43dec958 100644 --- a/src/monitoring/metrics/prometheus.controller.spec.ts +++ b/src/monitoring/metrics/prometheus.controller.spec.ts @@ -84,6 +84,23 @@ describe('PrometheusController', () => { expect(res.statusSpy().send).toHaveBeenCalledWith(metricText); }); + it('returns 200 from the legacy observability alias endpoint', async () => { + const metricText = '# HELP process_memory_bytes\n# TYPE gauge\n'; + mockMetricsCollectionService.getMetrics.mockResolvedValue(metricText); + + const req = buildRequest({ path: '/observability/metrics/export/prometheus' }); + const res = buildResponse(); + + await controller.exportPrometheusMetrics(req, res); + + expect(res.setHeader).toHaveBeenCalledWith( + 'Content-Type', + 'text/plain; version=0.0.4; charset=utf-8', + ); + expect(res.status).toHaveBeenCalledWith(200); + expect(res.statusSpy().send).toHaveBeenCalledWith(metricText); + }); + it('returns 500 when getMetrics throws', async () => { mockMetricsCollectionService.getMetrics.mockRejectedValue(new Error('prom-client error')); diff --git a/src/monitoring/metrics/prometheus.controller.ts b/src/monitoring/metrics/prometheus.controller.ts index 77e0aaea..28ef2278 100644 --- a/src/monitoring/metrics/prometheus.controller.ts +++ b/src/monitoring/metrics/prometheus.controller.ts @@ -49,6 +49,27 @@ export class PrometheusController { async getMetrics(@Req() req: Request, @Res() res: Response): Promise { this.assertAuthorized(req); + return this.sendPrometheusMetrics(req, res); + } + + @Get('observability/metrics/export/prometheus') + @Header('Content-Type', 'text/plain; version=0.0.4; charset=utf-8') + @ApiOperation({ + summary: 'Legacy observability endpoint for Prometheus metrics export', + description: + 'Alias for the Prometheus scrape endpoint to support legacy observability integrations.', + }) + @ApiResponse({ + status: 200, + description: 'Prometheus metrics in text/plain exposition format', + }) + @ApiResponse({ status: 401, description: 'Unauthorized – invalid or missing bearer token' }) + async exportPrometheusMetrics(@Req() req: Request, @Res() res: Response): Promise { + this.assertAuthorized(req); + return this.sendPrometheusMetrics(req, res); + } + + private async sendPrometheusMetrics(req: Request, res: Response): Promise { try { const metrics = await this.metricsCollectionService.getMetrics(); res.setHeader('Content-Type', 'text/plain; version=0.0.4; charset=utf-8'); diff --git a/src/monitoring/monitoring.module.ts b/src/monitoring/monitoring.module.ts index 7058e364..6e7e2f96 100644 --- a/src/monitoring/monitoring.module.ts +++ b/src/monitoring/monitoring.module.ts @@ -6,9 +6,12 @@ import { AlertingService } from './alerting/alerting.service'; import { MetricsCollectionService } from './metrics/metrics-collection.service'; import { CustomMetricsService } from './custom-metrics.service'; import { PrometheusController } from './metrics/prometheus.controller'; +import { CostController } from './cost.controller'; import { HttpMetricsMiddleware } from './metrics/http-metrics.middleware'; import { DbMetricsSubscriber } from './metrics/db-metrics.subscriber'; import { DbPoolMetricsCollector } from './metrics/db-pool-metrics.collector'; +import { CostTrackingService } from './cost-tracking.service'; +import { CostSchedulerService } from './cost-scheduler.service'; import { CommonModule } from '../common/common.module'; /** @@ -31,13 +34,15 @@ import { CommonModule } from '../common/common.module'; TypeOrmModule, CommonModule, ], - controllers: [PrometheusController], + controllers: [PrometheusController, CostController], providers: [ AlertingService, MetricsCollectionService, CustomMetricsService, DbMetricsSubscriber, DbPoolMetricsCollector, + CostTrackingService, + CostSchedulerService, ], exports: [ AlertingService, @@ -45,6 +50,7 @@ import { CommonModule } from '../common/common.module'; CustomMetricsService, DbMetricsSubscriber, DbPoolMetricsCollector, + CostTrackingService, ], }) export class MonitoringModule implements NestModule { diff --git a/src/observability/README.md b/src/observability/README.md index 2ba34a50..fe2de101 100644 --- a/src/observability/README.md +++ b/src/observability/README.md @@ -448,7 +448,13 @@ export class DatabaseLogger implements Logger { ## Prometheus Integration -Export metrics in Prometheus format: +Export metrics in Prometheus format using the main scrape endpoint: + +```http +GET /metrics +``` + +A legacy observability alias is also supported for compatibility: ```http GET /observability/metrics/export/prometheus