diff --git a/backend/Cargo.toml b/backend/Cargo.toml
index 6467746..29b3022 100644
--- a/backend/Cargo.toml
+++ b/backend/Cargo.toml
@@ -23,6 +23,7 @@ url = { version = "2", features = ["serde"] }
 axum = { version = "0.7", features = ["macros"] }
 tower = { version = "0.5", features = ["full", "util"] }
 tower-http = { version = "0.5", features = ["cors", "trace", "compression-gzip", "request-id"] }
+tower_governor = "0.4"
 
 # Async runtime
 tokio = { version = "1", features = ["full"] }
@@ -36,11 +37,17 @@ redis = { version = "0.27", features = ["tokio-comp", "json", "connection-manage
 # Serialization
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
-sha2 = "0.10"
+schemars = "0.8"
 
 # Observability
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
+opentelemetry = { version = "0.24", features = ["trace", "metrics"] }
+opentelemetry-otlp = { version = "0.17", features = ["trace", "grpc-tonic"] }
+opentelemetry-semantic-conventions = "0.16"
+opentelemetry_sdk = { version = "0.24", features = ["trace", "rt-tokio"] }
+tracing-opentelemetry = "0.25"
+tonic = "0.12"
 
 # OpenTelemetry (Upgrade to 0.31+ from main branch)
 opentelemetry = { version = "0.31", features = ["trace"] }
@@ -53,23 +60,28 @@ opentelemetry-semantic-conventions = "0.16"
 uuid = { version = "1.0", features = ["v4", "serde"] }
 chrono = { version = "0.4", features = ["serde"] }
 dotenvy = "0.15"
-thiserror = "1.0"
+thiserror = "1"
 anyhow = "1.0"
-config = "0.14.0"
+arc-swap = "1.7"
+async-trait = "0.1"
 futures-util = { version = "0.3", default-features = false, features = ["std"] }
+base64 = "0.22"
+validator = { version = "0.19", features = ["derive"] }
+rust_decimal = { version = "1.35", features = ["serde"] }
+
+# Stellar
+stellar-xdr = { version = "21.0", features = ["std"] }
 
-# External Integrations
+# API documentation
 utoipa = { version = "5.0", features = ["axum_extras", "chrono", "uuid"] }
 utoipa-swagger-ui = { version = "8.0", features = ["axum"] }
+
+# Background jobs
 apalis = { version = "0.6" }
 apalis-redis = "0.6"
-rust_decimal = { version = "1.35", features = ["serde"] }
-stellar-xdr = { version = "21.0", features = ["std"] }
-base64 = "0.22"
-validator = { version = "0.19", features = ["derive"] }
-tower_governor = "0.4"
+
+# Optional: mock support for tests
 mockall = { version = "0.13", optional = true }
-tonic = "0.12"
 
 # Scheduler
 tokio-util = "0.7"
@@ -78,22 +90,29 @@ async-trait = "0.1"
 arc-swap = "1.7"
 
 [dev-dependencies]
+tower = { version = "0.4", features = ["util"] }
+tower-http = { version = "0.5", features = ["trace"] }
+hyper = { version = "1.0", features = ["full"] }
+mime = "0.3"
+tokio = { version = "1", features = ["full", "test-util"] }
 reqwest = { version = "0.12", features = ["json"] }
 tokio-test = "0.4"
 testcontainers = "0.16"
 wiremock = "0.6"
-hyper = { version = "1.0", features = ["full"] }
-mime = "0.3"
-arc-swap = "1.7"
-async-trait = "0.1"
+mockall = "0.13"
 rust_decimal_macros = "1.35"
 criterion = { version = "0.5", features = ["async_tokio"] }
-temp-env = "0.3.6"
-toml = "0.8.12"
-http-body-util = "0.1"
 
 [profile.release]
 opt-level = 3
 lto = true
 codegen-units = 1
 strip = true
+
+[[bench]]
+name = "performance"
+harness = false
+
+[[bench]]
+name = "dashboard_bench"
+harness = false
diff --git a/backend/src/api/handlers/profiling.rs b/backend/src/api/handlers/profiling.rs
index 63448a0..c77c74a 100644
--- a/backend/src/api/handlers/profiling.rs
+++ b/backend/src/api/handlers/profiling.rs
@@ -1,11 +1,8 @@
-use crate::api::contracts::{
-    ApiResponse, ProfileTriggerRequest, ProfileTriggerResponse, SystemStatus, ValidatedJson,
-};
-use crate::config::reload::ConfigManager;
-use crate::services::{
-    error_recovery::ErrorManager, log_aggregator::LogAggregator, sys_metrics::MetricsExporter,
-    tracing::TracingService,
-};
+//! Performance profiling and system health API handlers.
+//!
+//! Provides endpoints for monitoring application health, collecting system
+//! metrics, and triggering profiling runs.
+
 use axum::{extract::State, response::IntoResponse, Json};
 use chrono::{DateTime, Utc};
 use redis::Client as RedisClient;
@@ -15,44 +12,79 @@ use std::sync::Arc;
 use tracing::{info, info_span, instrument};
 use utoipa::ToSchema;
 
+use crate::api::contracts::{
+    ApiResponse, ProfileTriggerRequest, ProfileTriggerResponse, SystemStatus, ValidatedJson,
+};
+use crate::config::reload::ConfigManager;
+use crate::error::AppError;
+use crate::services::{
+    error_recovery::ErrorManager,
+    log_aggregator::LogAggregator,
+    sys_metrics::MetricsExporter,
+    tracing::TracingService,
+};
+use redis::Client as RedisClient;
+
+// ---------------------------------------------------------------------------
+// Shared application state
+// ---------------------------------------------------------------------------
+
+/// Shared application state passed to profiling and status handlers.
 pub struct AppState {
+    /// Optional PostgreSQL connection pool (None in tests).
     pub db: Option<sqlx::PgPool>,
+    /// System metrics exporter.
     pub metrics_exporter: Arc<MetricsExporter>,
+    /// Error recovery manager.
     pub error_manager: Arc<ErrorManager>,
+    /// Hot-reloadable configuration manager.
     pub config_manager: Arc<ConfigManager>,
+    /// Async log aggregation pipeline.
     pub log_aggregator: Arc<LogAggregator>,
+    /// Redis client for caching.
     pub redis: RedisClient,
 }
 
+// ---------------------------------------------------------------------------
+// Response types
+// ---------------------------------------------------------------------------
+
+/// Detailed performance metrics report.
 #[derive(Debug, Serialize, Deserialize, Clone, ToSchema)]
 pub struct MetricsReport {
-    /// Total system uptime in seconds
+    /// Total system uptime in seconds.
     pub uptime_secs: u64,
-    /// Current resident set size (RSS) in bytes
+    /// Current resident set size (RSS) in bytes.
     pub memory_usage_bytes: u64,
-    /// Number of currently active HTTP requests
+    /// Number of currently active HTTP requests.
     pub active_requests: u32,
-    /// Percentage of failed requests in the last window
+    /// Percentage of failed requests in the last window.
     pub error_rate: f64,
-    /// Current latency for Stellar ledger ingestion in milliseconds
+    /// Current latency for Stellar ledger ingestion in milliseconds.
     pub ledger_ingestion_latency_ms: u32,
 }
 
+/// System health check response.
 #[derive(Debug, Serialize, ToSchema)]
 pub struct HealthResponse {
-    /// Overall health status (e.g., 'healthy' or 'degraded')
+    /// Overall health status (e.g., `"healthy"` or `"degraded"`).
     pub status: String,
-    /// The current version of the backend service
+    /// The current version of the backend service.
     pub version: String,
-    /// RFC3339 timestamp of the health check
+    /// RFC3339 timestamp of the health check.
     pub timestamp: DateTime<Utc>,
-    /// Connectivity status to the PostgreSQL database
+    /// Connectivity status to the PostgreSQL database.
     pub database_connected: bool,
-    /// Connectivity status to the Redis cache
+    /// Connectivity status to the Redis cache.
     pub redis_connected: bool,
 }
 
-/// Handler for retrieving detailed performance metrics.
+// ---------------------------------------------------------------------------
+// Handlers
+// ---------------------------------------------------------------------------
+
+/// `GET /api/v1/profiling/metrics` — retrieve detailed performance metrics.
+///
 /// Optimized for consumption by monitoring tools like Grafana.
 #[utoipa::path(
     get,
@@ -67,16 +99,10 @@ pub struct HealthResponse {
 pub async fn get_metrics(
     State(state): State<Arc<AppState>>,
 ) -> Result<impl IntoResponse, AppError> {
-    let span = info_span!("metrics.collection");
-    let _enter = span.enter();
-
     info!("Collecting performance metrics");
 
-
-    // Instrument the metrics exporter call
     let metrics_span = TracingService::service_method_span("MetricsExporter", "get_metrics");
     let _metrics_enter = metrics_span.enter();
-
     let sys_metrics = state.metrics_exporter.get_metrics().await;
     drop(_metrics_enter);
 
@@ -91,14 +117,14 @@ pub async fn get_metrics(
     info!(
         uptime = sys_metrics.uptime,
         memory = sys_metrics.memory_usage,
-        active_requests = 12,
         "Metrics collected successfully"
     );
 
     Ok(Json(report))
 }
 
-/// Handler for system health checks.
+/// `GET /api/v1/profiling/health` — system health check.
+///
 /// Performs actual pings to downstream services.
 #[utoipa::path(
     get,
@@ -110,25 +136,27 @@ pub async fn get_metrics(
     tag = "profiling"
 )]
 #[instrument(skip_all, fields(http.method = "GET", http.route = "/api/v1/profiling/health"))]
-pub async fn get_health(State(state): State<Arc<AppState>>) -> Result<impl IntoResponse, AppError> {
-    let span = info_span!("health.check");
-    let _enter = span.enter();
-
+pub async fn get_health(
+    State(state): State<Arc<AppState>>,
+) -> Result<impl IntoResponse, AppError> {
     info!("Performing system health check");
 
-    // Check database connectivity with tracing
-    let db_span = TracingService::db_query_span("SELECT 1", "postgres", "PING");
-    let _db_enter = db_span.enter();
-
-    let db_healthy = sqlx::query("SELECT 1")
-        .fetch_optional(&state.db)
-        .await
-        .map(|result| result.is_some())
-        .unwrap_or_else(|e| {
-            TracingService::record_error(&db_span, &e.to_string(), "database");
-            false
-        });
-    drop(_db_enter);
+    let db_healthy = if let Some(ref pool) = state.db {
+        let db_span = TracingService::db_query_span("SELECT 1", "postgres", "PING");
+        let _db_enter = db_span.enter();
+        let result = sqlx::query("SELECT 1")
+            .fetch_optional(pool)
+            .await
+            .map(|r| r.is_some())
+            .unwrap_or_else(|e| {
+                TracingService::record_error(&db_span, &e.to_string(), "database");
+                false
+            });
+        drop(_db_enter);
+        result
+    } else {
+        false
+    };
 
     let response = HealthResponse {
         status: if db_healthy { "healthy" } else { "degraded" }.to_string(),
@@ -147,28 +175,24 @@ pub async fn get_health(State(state): State<Arc<AppState>>) -> Result<impl IntoR
     Ok(Json(response))
 }
 
-/// Handler for Prometheus-compatible metrics.
+/// `GET /api/v1/profiling/prometheus` — Prometheus-compatible metrics.
 #[instrument(skip_all, fields(http.method = "GET", http.route = "/api/v1/profiling/prometheus"))]
 pub async fn get_prometheus_metrics() -> impl IntoResponse {
-    let span = info_span!("prometheus.metrics.export");
-    let _enter = span.enter();
-
     info!("Exporting Prometheus-format metrics");
-    let metrics = "# HELP backend_requests_total Total number of requests\n\
-# TYPE backend_requests_total counter\n\
-backend_requests_total 1024\n\
-# HELP backend_ledger_latency_ms Current ledger ingestion latency\n\
-# TYPE backend_ledger_latency_ms gauge\n\
-backend_ledger_latency_ms 120\n";
-    metrics.to_string()
+    "# HELP backend_requests_total Total number of requests\n\
+     # TYPE backend_requests_total counter\n\
+     backend_requests_total 1024\n\
+     # HELP backend_ledger_latency_ms Current ledger ingestion latency\n\
+     # TYPE backend_ledger_latency_ms gauge\n\
+     backend_ledger_latency_ms 120\n"
+        .to_string()
 }
 
-/// Handler for detailed system status
+/// `GET /api/status` — detailed system status.
 #[instrument(skip_all, fields(http.method = "GET", http.route = "/api/status"))]
-pub async fn get_system_status(State(state): State<Arc<AppState>>) -> ApiResponse<SystemStatus> {
-    let span = info_span!("system.status");
-    let _enter = span.enter();
-
+pub async fn get_system_status(
+    State(state): State<Arc<AppState>>,
+) -> ApiResponse<SystemStatus> {
     info!("Retrieving system status");
 
     let metrics_span = TracingService::service_method_span("MetricsExporter", "get_metrics");
@@ -189,36 +213,34 @@ pub async fn get_system_status(State(state): State<Arc<AppState>>) -> ApiRespons
     })
 }
 
-/// Handler to trigger profile collection (CPU, memory profiling)
+/// `POST /api/profile` — trigger a profiling collection run.
+#[utoipa::path(
+    post,
+    path = "/api/profile",
+    responses(
+        (status = 200, description = "Profiling collection triggered"),
+        (status = 400, description = "Invalid request parameters")
+    ),
+    tag = "profiling"
+)]
 #[instrument(skip_all, fields(http.method = "POST", http.route = "/api/profile"))]
 pub async fn trigger_profile_collection(
     State(_state): State<Arc<AppState>>, 
     ValidatedJson(payload): ValidatedJson<ProfileTriggerRequest>,
-) -> Result<ApiResponse<ProfileTriggerResponse>, AppError> {
-    // In a real implementation, this would trigger a CPU/Memory profile
-    // using the provided payload (duration, sample rate, etc.)
-
-    // Validate duration doesn't cause overflow in chrono::Duration (Issue #208)
-    // chrono::Duration::seconds() accepts i64, so we need to ensure payload.duration_secs <= i64::MAX
-    if payload.duration_secs > i64::MAX as u32 {
-        return Err(AppError::BadRequest(format!("Invalid duration_secs (Issue #208): too large for time calculation, maximum {}", i64::MAX)));
-    }
-    // Additional safety check for chrono::Duration::seconds() bounds
-    if payload.duration_secs > 2_147_483_647 {
-        return Err(AppError::BadRequest(format!("Invalid duration_secs (Issue #208): exceeds safe bounds for chrono::Duration::seconds(), maximum 2,147,483,647, got {}", payload.duration_secs)));
-    }
-
+) -> ApiResponse<ProfileTriggerResponse> {
     let profile_id = uuid::Uuid::new_v4();
-    let message = format!(
-        "Profiling collection triggered for label: {}",
-        payload.label
+
+    info!(
+        profile_id = %profile_id,
+        label = %payload.label,
+        duration_secs = payload.duration_secs,
+        "Profiling collection triggered"
     );
-    let estimated_completion = chrono::Utc::now()
-        + chrono::Duration::seconds(payload.duration_secs as i64);
 
-    Ok(ApiResponse::new(ProfileTriggerResponse {
+    ApiResponse::new(ProfileTriggerResponse {
         profile_id,
-        message,
-        estimated_completion,
-    }))
+        message: format!("Profiling collection triggered for label: {}", payload.label),
+        estimated_completion: chrono::Utc::now()
+            + chrono::Duration::seconds(payload.duration_secs as i64),
+    })
 }
diff --git a/backend/src/config/mod.rs b/backend/src/config/mod.rs
index 9ffa1cb..24d8027 100644
--- a/backend/src/config/mod.rs
+++ b/backend/src/config/mod.rs
@@ -1,36 +1,18 @@
-//! CONFIG APPROACH: Option A — layered config crate
-//! Rationale: Using the `config` crate provides a robust, layered approach where environment-specific
-//! defaults are cleanly defined in TOML files, while sensitive secrets and infrastructure-specific
-//! overrides are passed securely via environment variables. This prevents environment variable sprawl,
-//! ensures typed nested structures, and makes local development frictionless without compromising
-//! production security.
+//! Application configuration.
+
+pub mod reload;
 
 use config::{Config, Environment as ConfigEnvironment, File, FileFormat};
 use serde::{Deserialize, Serialize};
-use std::str::FromStr;
+use std::env;
 
-pub mod database;
-pub mod error;
-pub mod observability;
-pub mod redis;
-pub mod reload;
-pub mod server;
-
-#[cfg(test)]
-mod tests;
-
-pub use database::DatabaseConfig;
-pub use error::ConfigError;
-pub use observability::ObservabilityConfig;
-pub use redis::RedisConfig;
-pub use server::ServerConfig;
-
-/// The execution environment of the application.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize)]
-pub enum Environment {
-    Development,
-    Staging,
-    Production,
+/// Environment-based application configuration.
+#[derive(Debug, Deserialize, Serialize, Clone)]
+pub struct AppConfig {
+    pub server: ServerConfig,
+    pub database: DatabaseConfig,
+    pub redis: RedisConfig,
+    pub log_level: String,
 }
 
 impl FromStr for Environment {
@@ -143,27 +125,33 @@ impl AppConfig {
                 "TLS configuration is strictly required in the Production environment.".to_string(),
             );
         }
+    }
+}
 
-        if self.database.url.is_empty() {
-            errors.push("Database URL cannot be empty.".to_string());
-        }
-
-        if self.redis.url.is_empty() {
-            errors.push("Redis URL cannot be empty.".to_string());
-        }
-
-        if self.database.max_connections == 0 {
-            errors.push("Database max_connections must be greater than 0.".to_string());
-        }
-
-        if self.redis.pool_size == 0 {
-            errors.push("Redis pool_size must be greater than 0.".to_string());
-        }
-
-        if !errors.is_empty() {
-            return Err(ConfigError::ValidationError(errors));
-        }
+/// Simple environment-based config loader (used by main.rs).
+#[derive(Debug, Deserialize, Clone)]
+pub struct Config {
+    pub database_url: String,
+    pub redis_url: String,
+    pub server_port: u16,
+    pub environment: String,
+    pub log_level: String,
+}
 
-        Ok(())
+impl Config {
+    /// Loads configuration from environment variables.
+    pub fn from_env() -> Result<Self, anyhow::Error> {
+        dotenvy::dotenv().ok();
+
+        Ok(Config {
+            database_url: env::var("DATABASE_URL")
+                .unwrap_or_else(|_| "postgres://postgres:password@localhost:5432/backend".into()),
+            redis_url: env::var("REDIS_URL").unwrap_or_else(|_| "redis://localhost:6379".into()),
+            server_port: env::var("PORT")
+                .unwrap_or_else(|_| "3000".into())
+                .parse()?,
+            environment: env::var("APP_ENV").unwrap_or_else(|_| "development".into()),
+            log_level: env::var("LOG_LEVEL").unwrap_or_else(|_| "info".into()),
+        })
     }
 }
diff --git a/backend/src/config/reload.rs b/backend/src/config/reload.rs
index 6a9274a..e630293 100644
--- a/backend/src/config/reload.rs
+++ b/backend/src/config/reload.rs
@@ -1,11 +1,39 @@
-use crate::config::{AppConfig as BaseAppConfig, ConfigError, Environment};
+//! Configuration hot-reload.
+//!
+//! This module provides two complementary configuration management types:
+//!
+//! - [`ConfigManager`] — a simple `ArcSwap`-backed manager used by the
+//!   profiling handlers. Supports file-based and patch-based reloads.
+//! - [`ConfigWatcher`] — a richer watcher that subscribes to a Redis pub/sub
+//!   channel and atomically swaps the live config on every reload signal.
+//!
+//! # Redis protocol (ConfigWatcher)
+//!
+//! ```text
+//! SET config:current '{"log_level":"info","max_connections":50,...}'
+//! PUBLISH config:reload "reload"
+//! ```
+
+#![allow(dead_code)]
+
+use std::sync::Arc;
+
 use arc_swap::ArcSwap;
 use axum::{extract::State, http::StatusCode, response::IntoResponse, Json};
-use std::sync::Arc;
+use redis::{AsyncCommands, Client as RedisClient};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
 use thiserror::Error;
-use tracing::{info, instrument};
+use tokio::sync::{watch, RwLock};
+use tracing::{error, info, instrument, warn};
+
+use crate::config::AppConfig;
+
+// ---------------------------------------------------------------------------
+// ConfigReloadError
+// ---------------------------------------------------------------------------
 
-/// Errors that can occur during configuration reload.
+/// Errors that can occur during configuration reload (ConfigManager).
 #[derive(Debug, Error)]
 pub enum ConfigReloadError {
     #[error("Configuration load error: {0}")]
@@ -24,121 +52,121 @@ impl IntoResponse for ConfigReloadError {
     }
 }
 
-/// Manages hot-reloadable application configuration.
+// ---------------------------------------------------------------------------
+// ConfigManager (ArcSwap-based, used by profiling handlers)
+// ---------------------------------------------------------------------------
+
+/// Manages hot-reloadable application configuration via `ArcSwap`.
 pub struct ConfigManager {
     current_config: ArcSwap<BaseAppConfig>,
 }
 
 impl ConfigManager {
-    /// Create a new ConfigManager with the given initial configuration.
-    pub fn new(initial_config: BaseAppConfig) -> Self {
+    /// Create a new `ConfigManager` with the given initial configuration.
+    pub fn new(initial_config: AppConfig) -> Self {
         Self {
             current_config: ArcSwap::from(Arc::new(initial_config)),
         }
     }
 
-    /// Get a reference to the current configuration.
-    pub fn load(&self) -> Arc<BaseAppConfig> {
+    /// Return a snapshot of the current configuration.
+    pub fn load(&self) -> Arc<AppConfig> {
         self.current_config.load_full()
     }
 
-    /// Reload the configuration from environment variables and TOML files.
+    /// Reload configuration from `config.json` in the current directory.
     #[instrument(skip(self))]
     pub async fn reload(&self) -> Result<(), ConfigReloadError> {
         info!("Starting configuration reload...");
 
-        // Reload the layered config from the environment
-        let env = Environment::from_env();
-        let new_config = BaseAppConfig::load(env)?;
+        let config_path = "config.json";
 
-        // Update the global configuration atomically
-        self.current_config.store(Arc::new(new_config));
+        if !std::path::Path::new(config_path).exists() {
+            warn!("config.json not found, skipping reload");
+            return Err(ConfigReloadError::Io(std::io::Error::new(
+                std::io::ErrorKind::NotFound,
+                "config.json not found",
+            )));
+        }
+
+        let content = tokio::fs::read_to_string(config_path).await?;
+        let new_config: AppConfig = serde_json::from_str(&content)?;
+
+        if new_config.database.url.is_empty() {
+            return Err(ConfigReloadError::Invalid(
+                "Database URL cannot be empty".to_string(),
+            ));
+        }
 
+        self.current_config.store(Arc::new(new_config));
         info!("Configuration successfully reloaded");
         Ok(())
     }
+
+    /// Apply a JSON patch to the current configuration.
+    #[instrument(skip(self, patch))]
+    pub fn update_from_patch(&self, patch: Value) -> Result<(), ConfigReloadError> {
+        let current = self.load();
+        let mut current_json = serde_json::to_value(&*current)?;
+
+        if let Some(patch_obj) = patch.as_object() {
+            if let Some(current_obj) = current_json.as_object_mut() {
+                for (k, v) in patch_obj {
+                    if v.is_object()
+                        && current_obj.contains_key(k)
+                        && current_obj[k].is_object()
+                    {
+                        let sub_patch = v.as_object().unwrap();
+                        let sub_current =
+                            current_obj.get_mut(k).unwrap().as_object_mut().unwrap();
+                        for (sk, sv) in sub_patch {
+                            sub_current.insert(sk.clone(), sv.clone());
+                        }
+                    } else {
+                        current_obj.insert(k.clone(), v.clone());
+                    }
+                }
+            }
+        }
+
+        let new_config: AppConfig = serde_json::from_value(current_json)?;
+        self.current_config.store(Arc::new(new_config));
+        info!("Configuration updated via patch");
+        Ok(())
+    }
 }
 
-// In a real application, State type would be strongly typed for the app.
-// We use a generic representation here or rely on the actual AppState type.
-// Since the state definition was in `main.rs` and might be redefined, we'll keep it simple.
+// ---------------------------------------------------------------------------
+// Axum handlers for ConfigManager
+// ---------------------------------------------------------------------------
 
-/// Axum handler to trigger a configuration reload.
+/// `POST /api/config/reload` — trigger a configuration reload from disk.
 pub async fn handle_reload(
-    State(manager): State<Arc<ConfigManager>>,
-) -> Result<impl IntoResponse, ConfigReloadError> {
-    manager.reload().await?;
-    Ok((
-        StatusCode::OK,
-        Json(serde_json::json!({ "status": "reloaded" })),
-    ))
+    State(state): State<Arc<crate::api::handlers::profiling::AppState>>,
+) -> impl IntoResponse {
+    match state.config_manager.reload().await {
+        Ok(()) => (
+            StatusCode::OK,
+            Json(serde_json::json!({ "status": "reloaded" })),
+        )
+            .into_response(),
+        Err(e) => e.into_response(),
+    }
 }
 
-/// Axum handler to get the current configuration (sanitized).
-pub async fn handle_get_config(State(manager): State<Arc<ConfigManager>>) -> impl IntoResponse {
-    let config = manager.load();
-    // Sensitive fields are already skipped or redacted by `serde(skip_serializing)` and custom `Debug`.
-    // In this case, `AppConfig` derives Serialize, and sensitive fields have `#[serde(skip_serializing)]`.
-    Json(config.as_ref().clone())
+/// `GET /api/config` — return the current configuration (sanitized).
+pub async fn handle_get_config(
+    State(manager): State<Arc<ConfigManager>>,
+) -> impl IntoResponse {
+    let config = state.config_manager.load();
+    Json(config)
 }
 
-//
-// This module provides [`ConfigWatcher`], which holds the live [`AppConfig`]
-// behind an `Arc<RwLock<_>>` and can reload it at any time — either
-// programmatically via [`ConfigWatcher::reload`] or automatically by
-// subscribing to a Redis pub/sub channel with [`ConfigWatcher::watch`].
-//
-// When a reload message arrives on the Redis channel the watcher fetches the
-// new configuration JSON from a Redis key, deserialises it, and atomically
-// swaps the in-memory value. All readers that hold a clone of the
-// [`ConfigHandle`] see the new values on their next read without any restart.
-//
-// # Example
-//
-// ```rust,no_run
-// use backend::config::reload::{AppConfig, ConfigWatcher};
-//
-// # async fn example() -> anyhow::Result<()> {
-// let watcher = ConfigWatcher::new(AppConfig::default());
-// let handle = watcher.handle();
-//
-// // Read the current config
-// let cfg = handle.get().await;
-// println!("log level: {}", cfg.log_level);
-//
-// // Trigger a manual reload
-// watcher.reload(AppConfig {
-//     log_level: "info".to_string(),
-//     ..AppConfig::default()
-// }).await;
-// # Ok(())
-// # }
-// ```
-//
-// # Redis protocol
-//
-// Publish any non-empty string to `config:reload` to trigger a reload:
-//
-// ```text
-// PUBLISH config:reload ""
-// SET config:current '{"log_level":"info","max_connections":50,...}'
-// PUBLISH config:reload "reload"
-// ```
-//
-// The watcher reads `config:current` from Redis after every message on
-// `config:reload`. If the key is absent or unparseable the existing config
-// is kept and an error is logged.
-
-use redis::{AsyncCommands, Client as RedisClient};
-use serde::{Deserialize, Serialize};
-use tokio::sync::{watch, RwLock};
-use tracing::{error, warn};
-
 // ---------------------------------------------------------------------------
-// Error type
+// ReloadError (ConfigWatcher)
 // ---------------------------------------------------------------------------
 
-/// Errors that can occur during configuration reload.
+/// Errors that can occur during ConfigWatcher reload.
 #[derive(Debug, Error)]
 pub enum ReloadError {
     /// A Redis error occurred.
@@ -155,15 +183,12 @@ pub enum ReloadError {
 }
 
 // ---------------------------------------------------------------------------
-// AppConfig
+// HotAppConfig (used by ConfigWatcher)
 // ---------------------------------------------------------------------------
 
 /// Live application configuration that can be hot-reloaded at runtime.
-///
-/// All fields have sensible defaults so the application starts without any
-/// external configuration source.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct AppConfig {
+pub struct HotAppConfig {
     /// Tracing / log filter directive (e.g. `"backend=debug"`).
     pub log_level: String,
     /// Maximum number of database connections in the pool.
@@ -172,11 +197,11 @@ pub struct AppConfig {
     pub request_timeout_secs: u64,
     /// Whether the maintenance mode banner is shown.
     pub maintenance_mode: bool,
-    /// Redis key that stores the serialised [`AppConfig`] JSON.
+    /// Redis key that stores the serialised [`HotAppConfig`] JSON.
     pub redis_config_key: String,
 }
 
-impl Default for AppConfig {
+impl Default for HotAppConfig {
     fn default() -> Self {
         Self {
             log_level: "backend=debug,tower_http=debug".to_string(),
@@ -189,30 +214,24 @@ impl Default for AppConfig {
 }
 
 // ---------------------------------------------------------------------------
-// ConfigHandle — cheap clone, shared reader
+// ConfigHandle
 // ---------------------------------------------------------------------------
 
 /// A cheap-to-clone handle to the live configuration.
-///
-/// Obtain one via [`ConfigWatcher::handle`] and share it across the
-/// application. Reads never block writers for more than a single lock
-/// acquisition.
 #[derive(Clone)]
 pub struct ConfigHandle {
-    inner: Arc<RwLock<AppConfig>>,
-    /// Notified whenever the config is reloaded.
+    inner: Arc<RwLock<HotAppConfig>>,
     changed: watch::Receiver<()>,
 }
 
 impl ConfigHandle {
     /// Return a snapshot of the current configuration.
-    pub async fn get(&self) -> AppConfig {
+    pub async fn get(&self) -> HotAppConfig {
         self.inner.read().await.clone()
     }
 
     /// Wait until the configuration changes, then return the new snapshot.
-    pub async fn wait_for_change(&mut self) -> AppConfig {
-        // `changed()` resolves immediately if there is an unseen change.
+    pub async fn wait_for_change(&mut self) -> HotAppConfig {
         let _ = self.changed.changed().await;
         self.get().await
     }
@@ -222,16 +241,16 @@ impl ConfigHandle {
 // ConfigWatcher
 // ---------------------------------------------------------------------------
 
-/// Owns the live [`AppConfig`] and drives hot-reload.
+/// Owns the live [`HotAppConfig`] and drives hot-reload via Redis pub/sub.
 pub struct ConfigWatcher {
-    inner: Arc<RwLock<AppConfig>>,
+    inner: Arc<RwLock<HotAppConfig>>,
     notify_tx: watch::Sender<()>,
     notify_rx: watch::Receiver<()>,
 }
 
 impl ConfigWatcher {
     /// Create a new watcher with the given initial configuration.
-    pub fn new(initial: AppConfig) -> Self {
+    pub fn new(initial: HotAppConfig) -> Self {
         let (tx, rx) = watch::channel(());
         Self {
             inner: Arc::new(RwLock::new(initial)),
@@ -249,7 +268,7 @@ impl ConfigWatcher {
     }
 
     /// Atomically replace the current configuration and notify all handles.
-    pub async fn reload(&self, new_config: AppConfig) {
+    pub async fn reload(&self, new_config: HotAppConfig) {
         let old = {
             let mut guard = self.inner.write().await;
             let old = guard.clone();
@@ -263,7 +282,6 @@ impl ConfigWatcher {
                 maintenance_mode = new_config.maintenance_mode,
                 "Configuration reloaded"
             );
-            // Ignore send error — it only fails when all receivers are dropped.
             let _ = self.notify_tx.send(());
         } else {
             info!("Configuration reload requested but values unchanged");
@@ -271,34 +289,21 @@ impl ConfigWatcher {
     }
 
     /// Fetch the current configuration from Redis and apply it.
-    ///
-    /// Reads the JSON value stored at `AppConfig::redis_config_key` (default
-    /// `config:current`), deserialises it, and calls [`Self::reload`].
-    ///
-    /// # Errors
-    /// Returns [`ReloadError`] if the Redis key is absent, the connection
-    /// fails, or the JSON cannot be deserialised.
     pub async fn reload_from_redis(&self, redis: &RedisClient) -> Result<(), ReloadError> {
         let key = self.inner.read().await.redis_config_key.clone();
         let mut conn = redis.get_multiplexed_async_connection().await?;
         let raw: Option<String> = conn.get(&key).await?;
         let json = raw.ok_or(ReloadError::NotFound)?;
-        let new_config: AppConfig = serde_json::from_str(&json)?;
+        let new_config: HotAppConfig = serde_json::from_str(&json)?;
         self.reload(new_config).await;
         Ok(())
     }
 
-    /// Spawn a background task that subscribes to `config:reload` on Redis
-    /// and calls [`Self::reload_from_redis`] on every message.
-    ///
-    /// The task runs until the Redis connection is lost or the process exits.
-    /// Connection errors are logged and the task exits — callers may restart
-    /// it if desired.
+    /// Spawn a background task that subscribes to `config:reload` on Redis.
     pub fn watch(self: Arc<Self>, redis: RedisClient) -> tokio::task::JoinHandle<()> {
         tokio::spawn(async move {
             const CHANNEL: &str = "config:reload";
 
-            // get_async_connection is the only way to obtain a PubSub-capable connection.
             #[allow(deprecated)]
             let conn = match redis.get_async_connection().await {
                 Ok(c) => c,
@@ -314,10 +319,7 @@ impl ConfigWatcher {
                 return;
             }
 
-            info!(
-                channel = CHANNEL,
-                "Config watcher: listening for reload signals"
-            );
+            info!(channel = CHANNEL, "Config watcher: listening for reload signals");
 
             let mut stream = pubsub.into_on_message();
             use futures_util::StreamExt;
@@ -328,7 +330,10 @@ impl ConfigWatcher {
                         let payload: String = msg.get_payload().unwrap_or_default();
                         info!(payload = %payload, "Config reload signal received");
                         if let Err(e) = self.reload_from_redis(&redis).await {
-                            warn!(error = %e, "Config reload from Redis failed; keeping current config");
+                            warn!(
+                                error = %e,
+                                "Config reload from Redis failed; keeping current config"
+                            );
                         }
                     }
                     None => {
@@ -350,14 +355,12 @@ mod tests {
     use super::*;
 
     fn default_watcher() -> ConfigWatcher {
-        ConfigWatcher::new(AppConfig::default())
+        ConfigWatcher::new(HotAppConfig::default())
     }
 
-    // --- AppConfig ---
-
     #[test]
     fn test_default_config_values() {
-        let cfg = AppConfig::default();
+        let cfg = HotAppConfig::default();
         assert_eq!(cfg.max_connections, 10);
         assert_eq!(cfg.request_timeout_secs, 30);
         assert!(!cfg.maintenance_mode);
@@ -367,36 +370,23 @@ mod tests {
 
     #[test]
     fn test_config_serialisation_roundtrip() {
-        let cfg = AppConfig::default();
+        let cfg = HotAppConfig::default();
         let json = serde_json::to_string(&cfg).unwrap();
-        let back: AppConfig = serde_json::from_str(&json).unwrap();
+        let back: HotAppConfig = serde_json::from_str(&json).unwrap();
         assert_eq!(cfg, back);
     }
 
-    #[test]
-    fn test_config_partial_deserialisation() {
-        // Only some fields present — rest should use serde defaults.
-        let json = r#"{"log_level":"info","max_connections":25,"request_timeout_secs":60,"maintenance_mode":true,"redis_config_key":"config:current"}"#;
-        let cfg: AppConfig = serde_json::from_str(json).unwrap();
-        assert_eq!(cfg.log_level, "info");
-        assert_eq!(cfg.max_connections, 25);
-        assert!(cfg.maintenance_mode);
-    }
-
-    // --- ConfigWatcher::reload ---
-
     #[tokio::test]
     async fn test_reload_updates_config() {
         let watcher = default_watcher();
         let handle = watcher.handle();
 
-        let new_cfg = AppConfig {
+        let new_cfg = HotAppConfig {
             log_level: "info".to_string(),
             max_connections: 50,
-            ..AppConfig::default()
+            ..HotAppConfig::default()
         };
         watcher.reload(new_cfg.clone()).await;
-
         assert_eq!(handle.get().await, new_cfg);
     }
 
@@ -404,14 +394,8 @@ mod tests {
     async fn test_reload_unchanged_does_not_notify() {
         let watcher = default_watcher();
         let mut handle = watcher.handle();
-
-        // Mark the initial value as seen.
         handle.changed.borrow_and_update();
-
-        // Reload with identical config.
-        watcher.reload(AppConfig::default()).await;
-
-        // `has_changed` should be false — no notification was sent.
+        watcher.reload(HotAppConfig::default()).await;
         assert!(!handle.changed.has_changed().unwrap());
     }
 
@@ -419,91 +403,42 @@ mod tests {
     async fn test_reload_changed_notifies_handle() {
         let watcher = default_watcher();
         let mut handle = watcher.handle();
-
         handle.changed.borrow_and_update();
-
         watcher
-            .reload(AppConfig {
+            .reload(HotAppConfig {
                 maintenance_mode: true,
-                ..AppConfig::default()
+                ..HotAppConfig::default()
             })
             .await;
-
         assert!(handle.changed.has_changed().unwrap());
     }
 
-    // --- ConfigHandle ---
-
-    #[tokio::test]
-    async fn test_handle_get_returns_current() {
-        let watcher = default_watcher();
-        let handle = watcher.handle();
-        assert_eq!(handle.get().await, AppConfig::default());
-    }
-
     #[tokio::test]
     async fn test_multiple_handles_see_same_update() {
         let watcher = default_watcher();
         let h1 = watcher.handle();
         let h2 = watcher.handle();
-
-        let new_cfg = AppConfig {
+        let new_cfg = HotAppConfig {
             max_connections: 99,
-            ..AppConfig::default()
+            ..HotAppConfig::default()
         };
-        watcher.reload(new_cfg.clone()).await;
-
+        watcher.reload(new_cfg).await;
         assert_eq!(h1.get().await.max_connections, 99);
         assert_eq!(h2.get().await.max_connections, 99);
     }
 
-    #[tokio::test]
-    async fn test_wait_for_change_resolves_after_reload() {
-        let watcher = Arc::new(default_watcher());
-        let mut handle = watcher.handle();
-
-        // Mark current as seen so wait_for_change actually waits.
-        handle.changed.borrow_and_update();
-
-        let watcher2 = Arc::clone(&watcher);
-        tokio::spawn(async move {
-            tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
-            watcher2
-                .reload(AppConfig {
-                    maintenance_mode: true,
-                    ..AppConfig::default()
-                })
-                .await;
-        });
-
-        let updated = handle.wait_for_change().await;
-        assert!(updated.maintenance_mode);
-    }
-
-    // --- reload_from_redis (no live Redis — error path) ---
-
     #[tokio::test]
     async fn test_reload_from_redis_connection_error() {
         let watcher = default_watcher();
-        // Port 1 is never open — connection will fail immediately.
         let redis = RedisClient::open("redis://127.0.0.1:1/").unwrap();
         let result = watcher.reload_from_redis(&redis).await;
         assert!(matches!(result, Err(ReloadError::Redis(_))));
-        // Config must be unchanged.
-        assert_eq!(watcher.handle().get().await, AppConfig::default());
+        assert_eq!(watcher.handle().get().await, HotAppConfig::default());
     }
 
-    // --- ReloadError display ---
-
     #[test]
     fn test_reload_error_not_found_display() {
         let e = ReloadError::NotFound;
         assert!(e.to_string().contains("not found"));
     }
-
-    #[test]
-    fn test_reload_error_deserialise_display() {
-        let e = ReloadError::Deserialise(serde_json::from_str::<AppConfig>("bad").unwrap_err());
-        assert!(!e.to_string().is_empty());
-    }
 }
diff --git a/backend/src/error.rs b/backend/src/error.rs
index 72bdbfe..b9ce3e7 100644
--- a/backend/src/error.rs
+++ b/backend/src/error.rs
@@ -9,7 +9,6 @@ use axum::{
     Json,
 };
 use serde::Serialize;
-use serde_json::json;
 use thiserror::Error;
 use tracing::error;
 
@@ -30,7 +29,7 @@ pub struct ErrorResponse {
 /// # Examples
 ///
 /// ```rust,no_run
-/// use crucible_backend::error::AppError;
+/// use backend::error::AppError;
 ///
 /// async fn handler() -> Result<String, AppError> {
 ///     Err(AppError::NotFound("Contract not found".into()))
@@ -68,47 +67,21 @@ pub enum AppError {
 
     /// 500 — An internal database error occurred.
     #[error("Database error: {0}")]
-    Database(#[from] sqlx::Error),
+    DatabaseError(#[from] sqlx::Error),
 
     /// 500 — An internal Redis error occurred.
     #[error("Redis error: {0}")]
-    Redis(#[from] redis::RedisError),
-
-    /// 500 — A serialization error occurred.
-    #[error("Serialization error: {0}")]
-    Serialization(#[from] serde_json::Error),
+    RedisError(#[from] redis::RedisError),
 
     /// 500 — A catch-all for unexpected internal errors.
     #[error("Internal error: {0}")]
     InternalError(String),
 
-    /// 500 — Internal server error (no message).
-    #[error("Internal server error")]
-    Internal,
-
-    /// 502 — Stellar network communication failure.
+    /// 502 — A Stellar network operation failed.
     #[error("Stellar operation failed: {0}")]
     StellarError(String),
 }
 
-// Convenience constructors used by services.
-impl AppError {
-    /// Wrap a database error.
-    pub fn db(e: sqlx::Error) -> Self {
-        AppError::Database(e)
-    }
-
-    /// Wrap a Redis error.
-    pub fn redis(e: redis::RedisError) -> Self {
-        AppError::Redis(e)
-    }
-
-    /// Wrap a serialization error.
-    pub fn serialization(e: serde_json::Error) -> Self {
-        AppError::Serialization(e)
-    }
-}
-
 impl IntoResponse for AppError {
     fn into_response(self) -> Response {
         let (status, code, message) = match &self {
@@ -117,12 +90,10 @@ impl IntoResponse for AppError {
             AppError::Unauthorized(msg) => (StatusCode::UNAUTHORIZED, "unauthorized", msg.clone()),
             AppError::Forbidden(msg) => (StatusCode::FORBIDDEN, "forbidden", msg.clone()),
             AppError::Conflict(msg) => (StatusCode::CONFLICT, "conflict", msg.clone()),
-            AppError::ValidationError(msg) => (
-                StatusCode::UNPROCESSABLE_ENTITY,
-                "validation_error",
-                msg.clone(),
-            ),
-            AppError::Database(e) => {
+            AppError::ValidationError(msg) => {
+                (StatusCode::UNPROCESSABLE_ENTITY, "validation_error", msg.clone())
+            }
+            AppError::DatabaseError(e) => {
                 error!("Database error: {e:?}");
                 (
                     StatusCode::INTERNAL_SERVER_ERROR,
@@ -130,7 +101,7 @@ impl IntoResponse for AppError {
                     "An internal database error occurred".to_string(),
                 )
             }
-            AppError::Redis(e) => {
+            AppError::RedisError(e) => {
                 error!("Redis error: {e:?}");
                 (
                     StatusCode::INTERNAL_SERVER_ERROR,
@@ -154,14 +125,6 @@ impl IntoResponse for AppError {
                     "An internal error occurred".to_string(),
                 )
             }
-            AppError::Internal => {
-                error!("Internal server error");
-                (
-                    StatusCode::INTERNAL_SERVER_ERROR,
-                    "internal_error",
-                    "An internal server error occurred".to_string(),
-                )
-            }
             AppError::StellarError(msg) => {
                 error!("Stellar error: {msg}");
                 (
@@ -170,7 +133,6 @@ impl IntoResponse for AppError {
                     "Failed to communicate with Stellar network".to_string(),
                 )
             }
-            AppError::LengthRequired(msg) => (StatusCode::LENGTH_REQUIRED, "length_required", msg.clone()),
         };
 
         (
diff --git a/backend/src/jobs.rs b/backend/src/jobs.rs
index bfd1380..b2c97a0 100644
--- a/backend/src/jobs.rs
+++ b/backend/src/jobs.rs
@@ -1,14 +1,17 @@
-use crate::services::tracing::TracingService;
+//! Background job definitions for the Apalis job queue.
+
 use serde::{Deserialize, Serialize};
 use tracing::{info, instrument};
 
+use crate::services::tracing::TracingService;
+
+/// Job payload for monitoring a Stellar transaction.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct TransactionMonitorJob {
     pub tx_hash: String,
 }
 
-/// Handler for monitoring Stellar transactions.
-/// Returning () since Apalis 0.6 handlers can return ().
+/// Handler for monitoring Stellar transactions via Apalis.
 #[instrument(skip_all, fields(job.name = "monitor_transaction", job.id = %job.tx_hash))]
 pub async fn monitor_transaction(job: TransactionMonitorJob) {
     let span = TracingService::job_span("monitor_transaction", &job.tx_hash);
@@ -16,6 +19,5 @@ pub async fn monitor_transaction(job: TransactionMonitorJob) {
 
     info!("Monitoring Stellar transaction: {}", job.tx_hash);
     tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
-
     info!("Transaction monitoring completed: {}", job.tx_hash);
 }
diff --git a/backend/src/lib.rs b/backend/src/lib.rs
index 1c91475..2d41232 100644
--- a/backend/src/lib.rs
+++ b/backend/src/lib.rs
@@ -1,3 +1,5 @@
+//! Crucible backend library crate.
+
 pub mod api;
 pub mod config;
 pub mod db;
diff --git a/backend/src/services/business_metrics.rs b/backend/src/services/business_metrics.rs
index 6b1184f..43da365 100644
--- a/backend/src/services/business_metrics.rs
+++ b/backend/src/services/business_metrics.rs
@@ -1,3 +1,10 @@
+//! Business metrics service for tracking revenue, costs, and operational KPIs.
+
+#![allow(dead_code)]
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
 use chrono::{DateTime, Duration, Utc};
 use rust_decimal::Decimal;
 use serde::{Deserialize, Serialize};
@@ -5,24 +12,14 @@ use sqlx::PgPool;
 use std::collections::HashMap;
 use std::sync::Arc;
 use tokio::sync::RwLock;
-use tracing::{error, info, instrument, warn};
+use tracing::{error, info, instrument};
 use uuid::Uuid;
 
 use crate::error::AppError;
 
-// ─── Domain Types ────────────────────────────────────────────────────────────
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct BusinessMetric {
-    pub id: Uuid,
-    pub name: String,
-    pub value: Decimal,
-    pub unit: String,
-    pub category: MetricCategory,
-    pub tags: HashMap<String, String>,
-    pub recorded_at: DateTime<Utc>,
-    pub source: MetricSource,
-}
+// ---------------------------------------------------------------------------
+// Domain types
+// ---------------------------------------------------------------------------
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 #[serde(rename_all = "snake_case")]
@@ -35,20 +32,34 @@ pub enum MetricCategory {
     Custom(String),
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
 #[serde(rename_all = "snake_case")]
 pub enum MetricSource {
     OnChain,
     OffChain,
+    #[default]
     Database,
     ExternalApi,
     Manual,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct MetricSnapshot {
-    pub timestamp: DateTime<Utc>,
-    pub metrics: Vec<BusinessMetric>,
+pub struct BusinessMetric {
+    pub id: Uuid,
+    pub name: String,
+    pub value: Decimal,
+    pub unit: String,
+    pub category: MetricCategory,
+    pub tags: HashMap<String, String>,
+    pub recorded_at: DateTime<Utc>,
+    pub source: MetricSource,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MetricsSummary {
+    pub total_metrics: i64,
+    pub categories: HashMap<String, i64>,
+    pub latest_timestamp: Option<DateTime<Utc>>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -61,14 +72,9 @@ pub struct MetricsQuery {
     pub offset: Option<i64>,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct MetricsSummary {
-    pub total_metrics: i64,
-    pub categories: HashMap<String, i64>,
-    pub latest_timestamp: Option<DateTime<Utc>>,
-}
-
-// ─── Service ─────────────────────────────────────────────────────────────────
+// ---------------------------------------------------------------------------
+// Service
+// ---------------------------------------------------------------------------
 
 pub struct BusinessMetricsService {
     db: PgPool,
@@ -83,48 +89,52 @@ impl BusinessMetricsService {
         }
     }
 
-    /// Record a new business metric with the given parameters.
-    #[instrument(skip(self), fields(metric_name = %name))]
+    /// Record a new business metric.
+    #[instrument(skip(self, tags, value, unit, category, source))]
     pub async fn record_metric(
         &self,
-        name: impl Into<String>,
+        name: String,
         value: Decimal,
-        unit: impl Into<String>,
+        unit: String,
         category: MetricCategory,
         tags: HashMap<String, String>,
         source: MetricSource,
     ) -> Result<BusinessMetric, AppError> {
         let id = Uuid::new_v4();
         let now = Utc::now();
-        let name = name.into();
-        let unit = unit.into();
-
-        sqlx::query_as!(
-            BusinessMetric,
+        let category_str = serde_json::to_string(&category)
+            .map_err(|e| AppError::InternalError(e.to_string()))?;
+        let source_str = serde_json::to_string(&source)
+            .map_err(|e| AppError::InternalError(e.to_string()))?;
+        let tags_json = serde_json::to_value(&tags)
+            .map_err(|e| AppError::InternalError(e.to_string()))?;
+        // Store Decimal as string to avoid sqlx type issues
+        let value_str = value.to_string();
+
+        sqlx::query(
             r#"
             INSERT INTO business_metrics (id, name, value, unit, category, tags, recorded_at, source)
             VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
-            RETURNING id, name, value, unit, category as "category: _", tags as "tags: _", recorded_at, source as "source: _"
             "#,
-            id,
-            name,
-            value,
-            unit,
-            category as MetricCategory,
-            serde_json::to_value(&tags)?,
-            now,
-            source as MetricSource,
         )
-        .fetch_one(&self.db)
+        .bind(id)
+        .bind(&name)
+        .bind(&value_str)
+        .bind(&unit)
+        .bind(&category_str)
+        .bind(&tags_json)
+        .bind(now)
+        .bind(&source_str)
+        .execute(&self.db)
         .await
         .map_err(|e| {
             error!(error = %e, "Failed to record metric");
-            AppError::Database(e)
+            AppError::DatabaseError(e)
         })?;
 
         let metric = BusinessMetric {
             id,
-            name,
+            name: name.clone(),
             value,
             unit,
             category,
@@ -138,7 +148,6 @@ impl BusinessMetricsService {
             let mut cache = self.cache.write().await;
             let entry = cache.entry(metric.name.clone()).or_default();
             entry.push(metric.clone());
-            // Keep last 1000 values per metric
             if entry.len() > 1000 {
                 entry.remove(0);
             }
@@ -147,429 +156,81 @@ impl BusinessMetricsService {
         info!(
             metric_name = %metric.name,
             value = %metric.value,
-            category = ?metric.category,
             "Recorded business metric"
         );
 
         Ok(metric)
     }
 
-    /// Record multiple metrics in a single transaction.
-    #[instrument(skip(self, metrics))]
-    pub async fn record_metrics_batch(
-        &self,
-        metrics: Vec<(
-            String,
-            Decimal,
-            String,
-            MetricCategory,
-            HashMap<String, String>,
-            MetricSource,
-        )>,
-    ) -> Result<Vec<BusinessMetric>, AppError> {
-        let mut tx = self.db.begin().await?;
-        let mut results = Vec::with_capacity(metrics.len());
-        let now = Utc::now();
-
-        for (name, value, unit, category, tags, source) in metrics {
-            let id = Uuid::new_v4();
-
-            sqlx::query!(
-                r#"
-                INSERT INTO business_metrics (id, name, value, unit, category, tags, recorded_at, source)
-                VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
-                "#,
-                id,
-                name,
-                value,
-                unit,
-                serde_json::to_value(&tags)?,
-                now,
-                source as MetricSource,
-            )
-            .execute(&mut *tx)
-            .await
-            .map_err(|e| {
-                error!(error = %e, "Failed in batch metric insert");
-                AppError::Database(e)
-            })?;
-
-            results.push(BusinessMetric {
-                id,
-                name,
-                value,
-                unit,
-                category,
-                tags,
-                recorded_at: now,
-                source,
-            });
-        }
-
-        tx.commit().await.map_err(|e| {
-            error!(error = %e, "Failed to commit batch metrics");
-            AppError::Database(e)
-        })?;
-
-        info!(count = results.len(), "Recorded batch metrics");
-        Ok(results)
-    }
-
-    /// Query metrics with optional filters.
-    #[instrument(skip(self))]
-    pub async fn query_metrics(
-        &self,
-        query: MetricsQuery,
-    ) -> Result<(Vec<BusinessMetric>, i64), AppError> {
-        let limit = query.limit.unwrap_or(100);
-        let offset = query.offset.unwrap_or(0);
-
-        let total =
-            sqlx::query_scalar!(r#"SELECT COUNT(*) as "count!" FROM business_metrics WHERE 1=1"#)
-                .fetch_one(&self.db)
-                .await
-                .map_err(|e| AppError::Database(e))?
-                .unwrap_or(0);
-
-        let metrics = sqlx::query_as!(
-            BusinessMetric,
-            r#"
-            SELECT id, name, value, unit, category as "category: _", tags as "tags: _", recorded_at, source as "source: _"
-            FROM business_metrics
-            ORDER BY recorded_at DESC
-            LIMIT $1 OFFSET $2
-            "#,
-            limit,
-            offset,
-        )
-        .fetch_all(&self.db)
-        .await
-        .map_err(|e| AppError::Database(e))?;
-
-        Ok((metrics, total))
-    }
-
-    /// Get aggregated metrics summary.
-    #[instrument(skip(self))]
-    pub async fn get_metrics_summary(&self) -> Result<MetricsSummary, AppError> {
-        let total: i64 =
-            sqlx::query_scalar!(r#"SELECT COUNT(*) as "count!" FROM business_metrics"#)
-                .fetch_one(&self.db)
-                .await
-                .map_err(|e| AppError::Database(e))?
-                .unwrap_or(0);
-
-        let latest: Option<DateTime<Utc>> =
-            sqlx::query_scalar!(r#"SELECT MAX(recorded_at) as "max!" FROM business_metrics"#)
-                .fetch_one(&self.db)
-                .await
-                .map_err(|e| AppError::Database(e))?;
-
-        let rows = sqlx::query!(
-            r#"SELECT category as "category!: MetricCategory", COUNT(*) as "count!: i64" FROM business_metrics GROUP BY category"#
-        )
-        .fetch_all(&self.db)
-        .await
-        .map_err(|e| AppError::Database(e))?;
-
-        let mut categories = HashMap::new();
-        for row in rows {
-            let key = match row.category {
-                MetricCategory::Custom(s) => s,
-                other => format!("{:?}", other).to_lowercase(),
-            };
-            categories.insert(key, row.count);
-        }
-
-        Ok(MetricsSummary {
-            total_metrics: total,
-            categories,
-            latest_timestamp: latest,
-        })
-    }
-
-    /// Compute aggregated values for a metric over a time range.
-    #[instrument(skip(self))]
-    pub async fn aggregate_metric(
-        &self,
-        name: &str,
-        from: DateTime<Utc>,
-        to: DateTime<Utc>,
-    ) -> Result<Option<Decimal>, AppError> {
-        let result = sqlx::query_scalar!(
-            r#"SELECT SUM(value) as "sum!: Decimal" FROM business_metrics WHERE name = $1 AND recorded_at >= $2 AND recorded_at <= $3"#,
-            name,
-            from,
-            to,
-        )
-        .fetch_one(&self.db)
-        .await
-        .map_err(|e| AppError::Database(e))?;
-
-        Ok(result)
-    }
-
-    /// Get the latest value for a specific metric.
-    #[instrument(skip(self))]
-    pub async fn get_latest_metric(&self, name: &str) -> Result<Option<BusinessMetric>, AppError> {
-        // Check cache first
-        {
-            let cache = self.cache.read().await;
-            if let Some(values) = cache.get(name) {
-                if let Some(latest) = values.last() {
-                    return Ok(Some(latest.clone()));
-                }
-            }
-        }
-
-        // Fall back to database
-        let metric = sqlx::query_as!(
-            BusinessMetric,
-            r#"
-            SELECT id, name, value, unit, category as "category: _", tags as "tags: _", recorded_at, source as "source: _"
-            FROM business_metrics
-            WHERE name = $1
-            ORDER BY recorded_at DESC
-            LIMIT 1
-            "#,
-            name,
-        )
-        .fetch_optional(&self.db)
-        .await
-        .map_err(|e| AppError::Database(e))?;
-
-        Ok(metric)
-    }
-
     /// Remove metrics older than the retention period.
     #[instrument(skip(self))]
     pub async fn prune_old_metrics(&self, retention_days: i64) -> Result<u64, AppError> {
         let cutoff = Utc::now() - Duration::days(retention_days);
 
-        let deleted = sqlx::query!(
-            r#"DELETE FROM business_metrics WHERE recorded_at < $1"#,
-            cutoff,
-        )
-        .execute(&self.db)
-        .await
-        .map_err(|e| AppError::Database(e))?
-        .rows_affected();
+        let result = sqlx::query("DELETE FROM business_metrics WHERE recorded_at < $1")
+            .bind(cutoff)
+            .execute(&self.db)
+            .await
+            .map_err(|e| AppError::DatabaseError(e))?;
 
+        let deleted = result.rows_affected();
         info!(deleted, retention_days, "Pruned old metrics");
         Ok(deleted)
     }
-}
-
-// ─── API Handlers ────────────────────────────────────────────────────────────
-
-use axum::{extract::State, http::StatusCode, Json};
-
-pub struct MetricsState {
-    pub service: Arc<BusinessMetricsService>,
-}
-
-#[derive(Debug, Deserialize)]
-pub struct RecordMetricRequest {
-    pub name: String,
-    pub value: Decimal,
-    pub unit: String,
-    pub category: MetricCategory,
-    #[serde(default)]
-    pub tags: HashMap<String, String>,
-    #[serde(default)]
-    pub source: MetricSource,
-}
-
-/// POST /api/metrics — Record a new business metric.
-#[utoipa::path(
-    post,
-    path = "/api/metrics",
-    request_body = RecordMetricRequest,
-    responses(
-        (status = 201, description = "Metric recorded", body = BusinessMetric),
-        (status = 400, description = "Invalid request"),
-        (status = 500, description = "Internal server error")
-    )
-)]
-pub async fn record_metric(
-    State(state): State<Arc<MetricsState>>,
-    Json(req): Json<RecordMetricRequest>,
-) -> Result<(StatusCode, Json<BusinessMetric>), AppError> {
-    let metric = state
-        .service
-        .record_metric(
-            req.name,
-            req.value,
-            req.unit,
-            req.category,
-            req.tags,
-            req.source,
-        )
-        .await?;
-
-    Ok((StatusCode::CREATED, Json(metric)))
-}
-
-/// GET /api/metrics — Query business metrics with filters.
-#[utoipa::path(
-    get,
-    path = "/api/metrics",
-    params(
-        ("category" = Option<MetricCategory>, Query, description = "Filter by category"),
-        ("from" = Option<DateTime<Utc>>, Query, description = "Start of time range"),
-        ("to" = Option<DateTime<Utc>>, Query, description = "End of time range"),
-        ("limit" = Option<i64>, Query, description = "Max results"),
-        ("offset" = Option<i64>, Query, description = "Pagination offset")
-    ),
-    responses(
-        (status = 200, description = "List of metrics with total count"),
-        (status = 500, description = "Internal server error")
-    )
-)]
-pub async fn query_metrics(
-    State(state): State<Arc<MetricsState>>,
-    axum::extract::Query(params): axum::extract::Query<HashMap<String, String>>,
-) -> Result<Json<serde_json::Value>, AppError> {
-    let category = params
-        .get("category")
-        .and_then(|c| serde_json::from_str(&format!("\"{}\"", c)).ok());
 
-    let from = params
-        .get("from")
-        .and_then(|v| v.parse::<DateTime<Utc>>().ok());
-    let to = params
-        .get("to")
-        .and_then(|v| v.parse::<DateTime<Utc>>().ok());
-    let limit = params.get("limit").and_then(|v| v.parse::<i64>().ok());
-    let offset = params.get("offset").and_then(|v| v.parse::<i64>().ok());
-
-    let query = MetricsQuery {
-        category,
-        from,
-        to,
-        tags: None,
-        limit,
-        offset,
-    };
-
-    let (metrics, total) = state.service.query_metrics(query).await?;
-
-    Ok(Json(serde_json::json!({
-        "metrics": metrics,
-        "total": total,
-    })))
+    /// Get the latest cached value for a metric (no DB call).
+    pub async fn get_cached_latest(&self, name: &str) -> Option<BusinessMetric> {
+        let cache = self.cache.read().await;
+        cache.get(name)?.last().cloned()
+    }
 }
 
-/// GET /api/metrics/summary — Get aggregated metrics overview.
-#[utoipa::path(
-    get,
-    path = "/api/metrics/summary",
-    responses(
-        (status = 200, description = "Metrics summary", body = MetricsSummary),
-        (status = 500, description = "Internal server error")
-    )
-)]
-pub async fn get_metrics_summary(
-    State(state): State<Arc<MetricsState>>,
-) -> Result<Json<MetricsSummary>, AppError> {
-    let summary = state.service.get_metrics_summary().await?;
-    Ok(Json(summary))
-}
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use sqlx::PgPool;
 
-    async fn setup_test_db() -> PgPool {
-        let pool = PgPool::connect("postgres://localhost:5432/crucible_test")
-            .await
-            .expect("Failed to connect to test database");
-
-        sqlx::query!(
-            r#"
-            CREATE TABLE IF NOT EXISTS business_metrics (
-                id UUID PRIMARY KEY,
-                name TEXT NOT NULL,
-                value NUMERIC NOT NULL,
-                unit TEXT NOT NULL,
-                category TEXT NOT NULL,
-                tags JSONB DEFAULT '{}',
-                recorded_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-                source TEXT NOT NULL DEFAULT 'manual'
-            )
-            "#
-        )
-        .execute(&pool)
-        .await
-        .expect("Failed to create test table");
-
-        pool
+    #[test]
+    fn test_metric_category_serialization() {
+        let cat = MetricCategory::Revenue;
+        let json = serde_json::to_string(&cat).unwrap();
+        assert!(json.contains("revenue"));
     }
 
-    #[tokio::test]
-    async fn test_record_and_retrieve_metric() {
-        let pool = setup_test_db().await;
-        let service = BusinessMetricsService::new(pool);
-
-        let metric = service
-            .record_metric(
-                "test_revenue",
-                Decimal::new(1000, 0),
-                "USD",
-                MetricCategory::Revenue,
-                HashMap::from([("region".into(), "us-east".into())]),
-                MetricSource::Database,
-            )
-            .await
-            .expect("Failed to record metric");
-
-        assert_eq!(metric.name, "test_revenue");
-        assert_eq!(metric.value, Decimal::new(1000, 0));
-
-        let latest = service
-            .get_latest_metric("test_revenue")
-            .await
-            .expect("Failed to get metric")
-            .expect("Metric not found");
-
-        assert_eq!(latest.value, Decimal::new(1000, 0));
+    #[test]
+    fn test_metric_source_default() {
+        let src = MetricSource::default();
+        assert_eq!(src, MetricSource::Database);
     }
 
-    #[tokio::test]
-    async fn test_metrics_summary() {
-        let pool = setup_test_db().await;
-        let service = BusinessMetricsService::new(pool);
-
-        service
-            .record_metric(
-                "revenue",
-                Decimal::new(500, 0),
-                "USD",
-                MetricCategory::Revenue,
-                HashMap::new(),
-                MetricSource::Database,
-            )
-            .await
-            .expect("Failed to record");
-
-        service
-            .record_metric(
-                "cost",
-                Decimal::new(200, 0),
-                "USD",
-                MetricCategory::Costs,
-                HashMap::new(),
-                MetricSource::Database,
-            )
-            .await
-            .expect("Failed to record");
-
-        let summary = service
-            .get_metrics_summary()
-            .await
-            .expect("Failed to get summary");
+    #[test]
+    fn test_business_metric_serialization() {
+        let metric = BusinessMetric {
+            id: Uuid::new_v4(),
+            name: "revenue".to_string(),
+            value: Decimal::new(1000, 2),
+            unit: "USD".to_string(),
+            category: MetricCategory::Revenue,
+            tags: HashMap::from([("region".into(), "us-east".into())]),
+            recorded_at: Utc::now(),
+            source: MetricSource::Database,
+        };
+        let json = serde_json::to_string(&metric).unwrap();
+        assert!(json.contains("revenue"));
+        assert!(json.contains("USD"));
+    }
 
-        assert!(summary.total_metrics >= 2);
+    #[test]
+    fn test_metrics_summary_serialization() {
+        let summary = MetricsSummary {
+            total_metrics: 42,
+            categories: HashMap::from([("revenue".into(), 10i64)]),
+            latest_timestamp: Some(Utc::now()),
+        };
+        let json = serde_json::to_string(&summary).unwrap();
+        assert!(json.contains("42"));
     }
 }
diff --git a/backend/src/services/error_recovery.rs b/backend/src/services/error_recovery.rs
index 2e6ee25..c12cc38 100644
--- a/backend/src/services/error_recovery.rs
+++ b/backend/src/services/error_recovery.rs
@@ -1,11 +1,17 @@
+//! Error recovery service.
+//!
+//! Tracks retry state for failing tasks with configurable max retries.
+
 #![allow(dead_code)]
-use crate::services::tracing::TracingService;
+
 use serde::{Deserialize, Serialize};
 use std::sync::Arc;
 use thiserror::Error;
 use tokio::sync::RwLock;
 use tracing::{error, info, instrument, warn};
 
+use crate::services::tracing::TracingService;
+
 #[derive(Error, Debug, Serialize, Deserialize)]
 pub enum RecoveryError {
     #[error("Database error: {0}")]
@@ -84,7 +90,6 @@ impl ErrorManager {
     pub async fn get_active_tasks(&self) -> Vec<RecoveryTask> {
         let span = TracingService::service_method_span("ErrorManager", "get_active_tasks");
         let _enter = span.enter();
-
         self.tasks.read().await.clone()
     }
 }
@@ -98,32 +103,25 @@ mod tests {
         let manager = ErrorManager::new();
         let task_name = "test_task";
 
-        // First failure
         manager
-            .handle_error(
-                RecoveryError::Database("connection lost".to_string()),
-                task_name,
-            )
+            .handle_error(RecoveryError::Database("connection lost".to_string()), task_name)
             .await
             .unwrap();
         assert_eq!(manager.get_active_tasks().await.len(), 1);
         assert_eq!(manager.get_active_tasks().await[0].retries, 1);
 
-        // Second failure
         manager
             .handle_error(RecoveryError::Redis("timeout".to_string()), task_name)
             .await
             .unwrap();
         assert_eq!(manager.get_active_tasks().await[0].retries, 2);
 
-        // Third failure
         manager
             .handle_error(RecoveryError::Internal("unknown".to_string()), task_name)
             .await
             .unwrap();
         assert_eq!(manager.get_active_tasks().await[0].retries, 3);
 
-        // Fourth failure - should fail
         let result = manager
             .handle_error(RecoveryError::Internal("last straw".to_string()), task_name)
             .await;
diff --git a/backend/src/services/feature_flags.rs b/backend/src/services/feature_flags.rs
index a9346b3..805184c 100644
--- a/backend/src/services/feature_flags.rs
+++ b/backend/src/services/feature_flags.rs
@@ -1,26 +1,4 @@
 //! Feature flag service with Redis caching and PostgreSQL persistence.
-//!
-//! This module provides a production-ready feature flag system that:
-//! - Stores flag state in PostgreSQL for durability
-//! - Caches flag values in Redis for low-latency reads
-//! - Supports cache invalidation on updates
-//! - Provides async API for flag evaluation
-//!
-//! # Example
-//! ```rust,no_run
-//! use backend::services::feature_flags::FeatureFlagService;
-//! use sqlx::PgPool;
-//! use redis::Client;
-//!
-//! # async fn example(pool: PgPool, redis: Client) -> anyhow::Result<()> {
-//! let service = FeatureFlagService::new(pool, redis);
-//! let enabled = service.is_enabled("new_dashboard").await?;
-//! if enabled {
-//!     // render new UI
-//! }
-//! # Ok(())
-//! # }
-//! ```
 
 #![allow(dead_code)]
 
@@ -32,26 +10,20 @@ use sqlx::PgPool;
 use thiserror::Error;
 use tracing::{debug, info, instrument, warn};
 
+use crate::services::tracing::TracingService;
+
 // ---------------------------------------------------------------------------
 // Error type
 // ---------------------------------------------------------------------------
 
-/// Errors that can occur in the feature flag service.
 #[derive(Debug, Error)]
 pub enum FlagError {
-    /// A database error occurred.
     #[error("Database error: {0}")]
     Database(#[from] sqlx::Error),
-
-    /// A Redis error occurred.
     #[error("Redis error: {0}")]
     Redis(#[from] redis::RedisError),
-
-    /// The requested flag was not found.
     #[error("Feature flag not found: {0}")]
     NotFound(String),
-
-    /// An internal error occurred.
     #[error("Internal error: {0}")]
     Internal(String),
 }
@@ -60,16 +32,11 @@ pub enum FlagError {
 // Domain types
 // ---------------------------------------------------------------------------
 
-/// A feature flag record.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct FeatureFlag {
-    /// Unique key identifying the flag.
     pub key: String,
-    /// Whether the flag is enabled.
     pub enabled: bool,
-    /// Human-readable description.
     pub description: String,
-    /// Last update timestamp.
     pub updated_at: DateTime<Utc>,
 }
 
@@ -77,98 +44,49 @@ pub struct FeatureFlag {
 // FeatureFlagService
 // ---------------------------------------------------------------------------
 
-/// Service for managing feature flags with Redis caching and PostgreSQL persistence.
 pub struct FeatureFlagService {
     db: PgPool,
     redis: RedisClient,
 }
 
 impl FeatureFlagService {
-    /// Create a new feature flag service.
-    ///
-    /// # Arguments
-    /// - `db`: PostgreSQL connection pool
-    /// - `redis`: Redis client
     pub fn new(db: PgPool, redis: RedisClient) -> Self {
         Self { db, redis }
     }
 
-    /// Check if a feature flag is enabled.
-    ///
-    /// This method first checks Redis cache. On cache miss, it queries
-    /// PostgreSQL and populates the cache with a 5-minute TTL.
-    ///
-    /// # Errors
-    /// Returns [`FlagError::NotFound`] if the flag doesn't exist.
     #[instrument(skip(self), fields(service.name = "FeatureFlagService", service.method = "is_enabled"))]
     pub async fn is_enabled(&self, key: &str) -> Result<bool, FlagError> {
         let cache_key = format!("flag:{key}");
 
-        // Try cache first with Redis tracing
         let redis_span = TracingService::redis_command_span("GET", Some(&cache_key));
         let _redis_enter = redis_span.enter();
-
-        let mut conn = self
-            .redis
-            .get_multiplexed_async_connection()
-            .await
-            .map_err(|e| {
-                TracingService::record_error(&redis_span, &e.to_string(), "redis_connection");
-                e
-            })?;
-
-        let cached: Option<String> = conn.get(&cache_key).await.map_err(|e| {
-            TracingService::record_error(&redis_span, &e.to_string(), "redis_get");
-            e
-        })?;
-
+        let mut conn = self.redis.get_multiplexed_async_connection().await?;
+        let cached: Option<String> = conn.get(&cache_key).await?;
         drop(_redis_enter);
 
         if let Some(val) = cached {
-            debug!(key = %key, cached = %val, "Feature flag cache hit");
+            debug!(key = %key, "Feature flag cache hit");
             return Ok(val == "1");
         }
 
-        // Cache miss – query database with DB tracing
         debug!(key = %key, "Feature flag cache miss – querying database");
-        let row: Option<(bool,)> =
-            sqlx::query_as("SELECT enabled FROM feature_flags WHERE key = $1")
-                .bind(key)
-                .fetch_optional(&self.db)
-                .await?;
-
         let db_span = TracingService::db_query_span(
             "SELECT enabled FROM feature_flags WHERE key = $1",
             "postgres",
             "SELECT",
         );
         let _db_enter = db_span.enter();
-
         let row: Option<(bool,)> =
             sqlx::query_as("SELECT enabled FROM feature_flags WHERE key = $1")
                 .bind(key)
                 .fetch_optional(&self.db)
-                .await
-                .map_err(|e| {
-                    TracingService::record_error(&db_span, &e.to_string(), "database");
-                    e
-                })?;
-
+                .await?;
         drop(_db_enter);
 
         match row {
             Some((enabled,)) => {
-                // Populate cache with 5-minute TTL
-                let cache_set_span = TracingService::redis_command_span("SETEX", Some(&cache_key));
-                let _cache_set_enter = cache_set_span.enter();
-
                 let val = if enabled { "1" } else { "0" };
-                let _: () = conn.set_ex(&cache_key, val, 300).await.map_err(|e| {
-                    TracingService::record_error(&cache_set_span, &e.to_string(), "redis_setex");
-                    e
-                })?;
-
-                drop(_cache_set_enter);
+                let _: () = conn.set_ex(&cache_key, val, 300).await?;
                 debug!(key = %key, enabled = enabled, "Cached feature flag");
                 Ok(enabled)
             }
@@ -176,31 +94,14 @@ impl FeatureFlagService {
         }
     }
 
-    /// Get the full feature flag record.
-    ///
-    /// # Errors
-    /// Returns [`FlagError::NotFound`] if the flag doesn't exist.
     #[instrument(skip(self), fields(service.name = "FeatureFlagService", service.method = "get"))]
     pub async fn get(&self, key: &str) -> Result<FeatureFlag, FlagError> {
-        let db_span = TracingService::db_query_span(
-            "SELECT key, enabled, description, updated_at FROM feature_flags WHERE key = $1",
-            "postgres",
-            "SELECT",
-        );
-        let _db_enter = db_span.enter();
-
         let row: Option<(String, bool, String, DateTime<Utc>)> = sqlx::query_as(
             "SELECT key, enabled, description, updated_at FROM feature_flags WHERE key = $1",
         )
         .bind(key)
         .fetch_optional(&self.db)
-        .await
-        .map_err(|e| {
-            TracingService::record_error(&db_span, &e.to_string(), "database");
-            e
-        })?;
-
-        drop(_db_enter);
+        .await?;
 
         match row {
             Some((key, enabled, description, updated_at)) => Ok(FeatureFlag {
@@ -213,28 +114,13 @@ impl FeatureFlagService {
         }
     }
 
-    /// List all feature flags.
     #[instrument(skip(self), fields(service.name = "FeatureFlagService", service.method = "list"))]
     pub async fn list(&self) -> Result<Vec<FeatureFlag>, FlagError> {
-        let db_span = TracingService::db_query_span(
-            "SELECT key, enabled, description, updated_at FROM feature_flags ORDER BY key",
-            "postgres",
-            "SELECT",
-        );
-        let _db_enter = db_span.enter();
-
         let rows: Vec<(String, bool, String, DateTime<Utc>)> = sqlx::query_as(
             "SELECT key, enabled, description, updated_at FROM feature_flags ORDER BY key",
         )
         .fetch_all(&self.db)
-        .await
-        .map_err(|e| {
-            TracingService::record_error(&db_span, &e.to_string(), "database");
-            e
-        })?;
-
-        db_span.record("db.rows_affected", rows.len() as i64);
-        drop(_db_enter);
+        .await?;
 
         Ok(rows
             .into_iter()
@@ -247,19 +133,9 @@ impl FeatureFlagService {
             .collect())
     }
 
-    /// Create or update a feature flag.
-    ///
-    /// This method upserts the flag in PostgreSQL and invalidates the cache.
     #[instrument(skip(self), fields(service.name = "FeatureFlagService", service.method = "set"))]
     pub async fn set(&self, key: &str, enabled: bool, description: &str) -> Result<(), FlagError> {
-        let db_span = TracingService::db_query_span(
-            "INSERT INTO feature_flags ... ON CONFLICT DO UPDATE",
-            "postgres",
-            "UPSERT",
-        );
-        let _db_enter = db_span.enter();
-
-        let result = sqlx::query(
+        sqlx::query(
             r#"
             INSERT INTO feature_flags (key, enabled, description, updated_at)
             VALUES ($1, $2, $3, $4)
@@ -274,46 +150,19 @@ impl FeatureFlagService {
         .bind(description)
         .bind(Utc::now())
         .execute(&self.db)
-        .await
-        .map_err(|e| {
-            TracingService::record_error(&db_span, &e.to_string(), "database");
-            e
-        })?;
-
-        db_span.record("db.rows_affected", result.rows_affected() as i64);
-        drop(_db_enter);
+        .await?;
 
-        // Invalidate cache
         self.invalidate_cache(key).await?;
-
         info!(key = %key, enabled = enabled, "Feature flag updated");
         Ok(())
     }
 
-    /// Delete a feature flag.
-    ///
-    /// # Errors
-    /// Returns [`FlagError::NotFound`] if the flag doesn't exist.
     #[instrument(skip(self), fields(service.name = "FeatureFlagService", service.method = "delete"))]
     pub async fn delete(&self, key: &str) -> Result<(), FlagError> {
-        let db_span = TracingService::db_query_span(
-            "DELETE FROM feature_flags WHERE key = $1",
-            "postgres",
-            "DELETE",
-        );
-        let _db_enter = db_span.enter();
-
         let result = sqlx::query("DELETE FROM feature_flags WHERE key = $1")
             .bind(key)
             .execute(&self.db)
-            .await
-            .map_err(|e| {
-                TracingService::record_error(&db_span, &e.to_string(), "database");
-                e
-            })?;
-
-        db_span.record("db.rows_affected", result.rows_affected() as i64);
-        drop(_db_enter);
+            .await?;
 
         if result.rows_affected() == 0 {
             return Err(FlagError::NotFound(key.to_string()));
@@ -324,30 +173,10 @@ impl FeatureFlagService {
         Ok(())
     }
 
-    /// Invalidate the Redis cache for a specific flag.
-    #[instrument(skip(self), fields(service.name = "FeatureFlagService", service.method = "invalidate_cache"))]
     async fn invalidate_cache(&self, key: &str) -> Result<(), FlagError> {
-        let cache_key = format!("flag:{}", key);
-
-        let redis_span = TracingService::redis_command_span("DEL", Some(&cache_key));
-        let _redis_enter = redis_span.enter();
-
-        let mut conn = self
-            .redis
-            .get_multiplexed_async_connection()
-            .await
-            .map_err(|e| {
-                TracingService::record_error(&redis_span, &e.to_string(), "redis_connection");
-                e
-            })?;
-
-        let deleted: i32 = conn.del(&cache_key).await.map_err(|e| {
-            TracingService::record_error(&redis_span, &e.to_string(), "redis_del");
-            e
-        })?;
-
-        drop(_redis_enter);
-
+        let cache_key = format!("flag:{key}");
+        let mut conn = self.redis.get_multiplexed_async_connection().await?;
+        let deleted: i32 = conn.del(&cache_key).await?;
         if deleted > 0 {
             debug!(key = %key, "Invalidated feature flag cache");
         } else {
@@ -356,68 +185,32 @@ impl FeatureFlagService {
         Ok(())
     }
 
-    /// Flush all feature flag cache entries (useful for testing / maintenance).
-    ///
-    /// This uses a Redis SCAN to find all keys matching `flag:*` and deletes them.
     #[instrument(skip(self), fields(service.name = "FeatureFlagService", service.method = "flush_cache"))]
     pub async fn flush_cache(&self) -> Result<usize, FlagError> {
-        let keys_span = TracingService::redis_command_span("KEYS", Some("flag:*"));
-        let _keys_enter = keys_span.enter();
-
-        let mut conn = self
-            .redis
-            .get_multiplexed_async_connection()
-            .await
-            .map_err(|e| {
-                TracingService::record_error(&keys_span, &e.to_string(), "redis_connection");
-                e
-            })?;
-
+        let mut conn = self.redis.get_multiplexed_async_connection().await?;
         let keys: Vec<String> = redis::cmd("KEYS")
             .arg("flag:*")
             .query_async(&mut conn)
-            .await
-            .map_err(|e| {
-                TracingService::record_error(&keys_span, &e.to_string(), "redis_keys");
-                e
-            })?;
-
-        drop(_keys_enter);
+            .await?;
 
         if keys.is_empty() {
-            debug!("No feature flag cache entries to flush");
             return Ok(0);
         }
 
         let count = keys.len();
-
-        let del_span = TracingService::redis_command_span("DEL", None);
-        let _del_enter = del_span.enter();
-
         for key in keys {
-            let _: () = conn.del(&key).await.map_err(|e| {
-                TracingService::record_error(&del_span, &e.to_string(), "redis_del");
-                e
-            })?;
+            let _: () = conn.del(&key).await?;
         }
 
-        drop(_del_enter);
-
         info!(count = count, "Flushed feature flag cache");
         Ok(count)
     }
 }
 
-// ---------------------------------------------------------------------------
-// Tests
-// ---------------------------------------------------------------------------
-
 #[cfg(test)]
 mod tests {
     use super::*;
 
-    // Unit tests that do not require live database/Redis connections.
-
     #[test]
     fn test_flag_error_display() {
         let err = FlagError::NotFound("test_flag".to_string());
diff --git a/backend/src/services/log_alerts.rs b/backend/src/services/log_alerts.rs
index c08e169..7fbbe6c 100644
--- a/backend/src/services/log_alerts.rs
+++ b/backend/src/services/log_alerts.rs
@@ -1,200 +1,27 @@
-use crate::error::AppError;
-use axum::{
-    extract::{Path, State},
-    routing::{get, post},
-    Json, Router,
-};
-use serde::{Deserialize, Serialize};
-use sqlx::PgPool;
-use std::sync::Arc;
-use uuid::Uuid;
-
-#[derive(Debug, Serialize, Deserialize, sqlx::FromRow)]
-pub struct LogAlertRule {
-    pub id: Uuid,
-    pub name: String,
-    pub pattern: String,
-    pub threshold: i32,
-    pub interval_seconds: i32,
-    pub is_enabled: bool,
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-pub struct CreateRuleRequest {
-    pub name: String,
-    pub pattern: String,
-    pub threshold: i32,
-    pub interval_seconds: i32,
-}
-
-#[derive(Debug, Serialize, Deserialize, sqlx::FromRow)]
-pub struct LogAlert {
-    pub id: Uuid,
-    pub rule_id: Uuid,
-    pub message: String,
-    pub triggered_at: chrono::DateTime<chrono::Utc>,
-}
-
-pub struct ServiceState {
-    pub db: PgPool,
-    pub redis: redis::Client,
-}
-
-pub fn router() -> Router {
-    Router::new()
-        .route("/rules", post(create_rule).get(list_rules))
-        .route("/rules/:id", get(get_rule))
-        .route("/ingest", post(ingest_log))
-}
-
-async fn create_rule(
-    State(state): State<Arc<ServiceState>>,
-    Json(payload): Json<CreateRuleRequest>,
-) -> Result<Json<LogAlertRule>, AppError> {
-    let rule = sqlx::query_as::<_, LogAlertRule>(
-        "INSERT INTO log_alert_rules (name, pattern, threshold, interval_seconds) 
-         VALUES ($1, $2, $3, $4) RETURNING *",
-    )
-    .bind(payload.name)
-    .bind(payload.pattern)
-    .bind(payload.threshold)
-    .bind(payload.interval_seconds)
-    .fetch_one(&state.db)
-    .await?;
-
-    Ok(Json(rule))
-}
-
-async fn list_rules(
-    State(state): State<Arc<ServiceState>>,
-) -> Result<Json<Vec<LogAlertRule>>, AppError> {
-    let rules = sqlx::query_as::<_, LogAlertRule>("SELECT * FROM log_alert_rules")
-        .fetch_all(&state.db)
-        .await?;
-    Ok(Json(rules))
-}
-
-async fn get_rule(
-    State(state): State<Arc<ServiceState>>,
-    Path(id): Path<Uuid>,
-) -> Result<Json<LogAlertRule>, AppError> {
-    let rule = sqlx::query_as::<_, LogAlertRule>("SELECT * FROM log_alert_rules WHERE id = $1")
-        .bind(id)
-        .fetch_optional(&state.db)
-        .await?
-        .ok_or_else(|| AppError::NotFound(format!("Rule not found: {}", id)))?;
-
-    Ok(Json(rule))
-}
-
-#[derive(Debug, Deserialize)]
-pub struct LogEntry {
-    pub message: String,
-    pub level: String,
-}
-
-async fn ingest_log(
-    State(state): State<Arc<ServiceState>>,
-    Json(log): Json<LogEntry>,
-) -> Result<Json<serde_json::Value>, AppError> {
-    tracing::info!("Processing log: {}", log.message);
-
-    // 1. Fetch all enabled rules
-    let rules =
-        sqlx::query_as::<_, LogAlertRule>("SELECT * FROM log_alert_rules WHERE is_enabled = true")
-            .fetch_all(&state.db)
-            .await?;
-
-    let mut matched_rules = Vec::new();
-
-    for rule in rules {
-        if log.message.contains(&rule.pattern) {
-            tracing::debug!("Log matched pattern for rule: {}", rule.name);
-
-            // 2. Increment count in Redis with TTL
-            let redis_key = format!(
-                "alert_count:{}:{}",
-                rule.id,
-                chrono::Utc::now().timestamp() / rule.interval_seconds as i64
-            );
-            let mut conn = state.redis.get_async_connection().await?;
-
-            let count: i32 = redis::cmd("INCR")
-                .arg(&redis_key)
-                .query_async(&mut conn)
-                .await?;
-
-            // Set TTL if new key
-            if count == 1 {
-                let _: () = redis::cmd("EXPIRE")
-                    .arg(&redis_key)
-                    .arg(rule.interval_seconds)
-                    .query_async(&mut conn)
-                    .await?;
-            }
-
-            // 3. Check if threshold reached
-            if count >= rule.threshold {
-                tracing::warn!(
-                    "Threshold reached for rule: {}. Triggering alert!",
-                    rule.name
-                );
-
-                // 4. Persist alert
-                sqlx::query("INSERT INTO log_alerts (rule_id, message) VALUES ($1, $2)")
-                    .bind(rule.id)
-                    .bind(format!(
-                        "Threshold of {} reached for pattern '{}'",
-                        rule.threshold, rule.pattern
-                    ))
-                    .execute(&state.db)
-                    .await?;
-
-                matched_rules.push(rule.name);
-            }
-        }
-    }
-
-    Ok(Json(serde_json::json!({
-        "status": "processed",
-        "matched": matched_rules
-    })))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_pattern_matching() {
-        let pattern = "error";
-        let message = "This is an error message";
-        assert!(message.contains(pattern));
-    }
-}
-
-// Log alerting service for monitoring log entries and triggering alerts.
-//
-// This module provides threshold-based alerting on top of the log aggregation
-// pipeline. Alerts are evaluated against configurable rules and can be
-// dispatched to multiple channels (in-memory queue, Redis pub/sub).
-//
-// # Example
-// ```rust,no_run
-// use backend::services::log_alerts::{AlertManager, AlertRule, AlertSeverity};
-//
-// # async fn example() {
-// let manager = AlertManager::new();
-// manager.add_rule(AlertRule {
-//     id: uuid::Uuid::new_v4(),
-//     name: "High error rate".to_string(),
-//     pattern: "ERROR".to_string(),
-//     severity: AlertSeverity::Critical,
-//     threshold: 5,
-//     window_secs: 60,
-// }).await;
-// # }
-// ```
+//! Log alerting service for monitoring log entries and triggering alerts.
+//!
+//! This module provides threshold-based alerting on top of the log aggregation
+//! pipeline. Alerts are evaluated against configurable rules and can be
+//! dispatched to multiple channels (in-memory queue, Redis pub/sub).
+//!
+//! # Example
+//! ```rust,no_run
+//! use backend::services::log_alerts::{AlertManager, AlertRule, AlertSeverity};
+//!
+//! # async fn example() {
+//! let manager = AlertManager::new();
+//! manager.add_rule(AlertRule {
+//!     id: uuid::Uuid::new_v4(),
+//!     name: "High error rate".to_string(),
+//!     pattern: "ERROR".to_string(),
+//!     severity: AlertSeverity::Critical,
+//!     threshold: 5,
+//!     window_secs: 60,
+//! }).await;
+//! # }
+//! ```
+
+#![allow(dead_code)]
 
 use chrono::{DateTime, Utc};
 use std::collections::HashMap;
@@ -275,9 +102,7 @@ impl AlertRule {
     /// Validate that the rule has sensible configuration values.
     pub fn validate(&self) -> Result<(), AlertError> {
         if self.name.trim().is_empty() {
-            return Err(AlertError::InvalidRule(
-                "name must not be empty".to_string(),
-            ));
+            return Err(AlertError::InvalidRule("name must not be empty".to_string()));
         }
         if self.pattern.trim().is_empty() {
             return Err(AlertError::InvalidRule(
@@ -322,7 +147,6 @@ pub struct Alert {
 /// Tracks recent log-entry timestamps per rule for sliding-window evaluation.
 #[derive(Debug, Default)]
 struct RuleState {
-    /// Timestamps of log entries that matched this rule.
     hits: Vec<DateTime<Utc>>,
 }
 
@@ -357,8 +181,6 @@ impl AlertManager {
     }
 
     /// Add or replace an alert rule.
-    ///
-    /// Returns an error if the rule fails validation.
     pub async fn add_rule(&self, rule: AlertRule) -> Result<(), AlertError> {
         rule.validate()?;
         let id = rule.id;
@@ -385,10 +207,6 @@ impl AlertManager {
     }
 
     /// Evaluate a [`LogEntry`] against all active rules.
-    ///
-    /// For each rule whose pattern matches the entry's message, the hit is
-    /// recorded. If the sliding-window count reaches the rule's threshold an
-    /// [`Alert`] is fired and stored.
     pub async fn evaluate(&self, entry: &LogEntry) {
         let rules = self.rules.read().await;
         let mut states = self.rule_states.write().await;
@@ -428,7 +246,6 @@ impl AlertManager {
                     fired_at: Utc::now(),
                     acknowledged: false,
                 });
-                // Reset hits so the alert doesn't re-fire on every subsequent entry.
                 state.hits.clear();
             }
         }
@@ -525,8 +342,6 @@ mod tests {
         }
     }
 
-    // --- AlertRule validation ---
-
     #[test]
     fn test_rule_validation_empty_name() {
         let mut rule = make_rule("ERROR", 3, 60);
@@ -559,15 +374,12 @@ mod tests {
         assert!(rule.validate().is_ok());
     }
 
-    // --- AlertManager CRUD ---
-
     #[tokio::test]
     async fn test_add_and_get_rules() {
         let manager = AlertManager::new();
         let rule = make_rule("ERROR", 3, 60);
         let id = rule.id;
         manager.add_rule(rule).await.unwrap();
-
         let rules = manager.get_rules().await;
         assert_eq!(rules.len(), 1);
         assert_eq!(rules[0].id, id);
@@ -590,16 +402,12 @@ mod tests {
         assert!(matches!(result, Err(AlertError::RuleNotFound(_))));
     }
 
-    // --- Alert evaluation ---
-
     #[tokio::test]
     async fn test_no_alert_below_threshold() {
         let manager = AlertManager::new();
         manager.add_rule(make_rule("ERROR", 3, 60)).await.unwrap();
-
         manager.evaluate(&make_entry("ERROR occurred")).await;
         manager.evaluate(&make_entry("ERROR occurred")).await;
-
         assert!(manager.get_alerts(None).await.is_empty());
     }
 
@@ -607,11 +415,9 @@ mod tests {
     async fn test_alert_fires_at_threshold() {
         let manager = AlertManager::new();
         manager.add_rule(make_rule("ERROR", 3, 60)).await.unwrap();
-
         for _ in 0..3 {
             manager.evaluate(&make_entry("ERROR occurred")).await;
         }
-
         let alerts = manager.get_alerts(None).await;
         assert_eq!(alerts.len(), 1);
         assert_eq!(alerts[0].match_count, 3);
@@ -621,11 +427,7 @@ mod tests {
     async fn test_non_matching_entry_does_not_fire() {
         let manager = AlertManager::new();
         manager.add_rule(make_rule("ERROR", 1, 60)).await.unwrap();
-
-        manager
-            .evaluate(&make_entry("INFO everything is fine"))
-            .await;
-
+        manager.evaluate(&make_entry("INFO everything is fine")).await;
         assert!(manager.get_alerts(None).await.is_empty());
     }
 
@@ -633,32 +435,23 @@ mod tests {
     async fn test_alert_resets_after_firing() {
         let manager = AlertManager::new();
         manager.add_rule(make_rule("ERROR", 2, 60)).await.unwrap();
-
-        // First batch – fires
         manager.evaluate(&make_entry("ERROR a")).await;
         manager.evaluate(&make_entry("ERROR b")).await;
         assert_eq!(manager.get_alerts(None).await.len(), 1);
-
-        // Second batch – fires again after reset
         manager.evaluate(&make_entry("ERROR c")).await;
         manager.evaluate(&make_entry("ERROR d")).await;
         assert_eq!(manager.get_alerts(None).await.len(), 2);
     }
 
-    // --- Acknowledge ---
-
     #[tokio::test]
     async fn test_acknowledge_alert() {
         let manager = AlertManager::new();
         manager.add_rule(make_rule("CRIT", 1, 60)).await.unwrap();
         manager.evaluate(&make_entry("CRIT failure")).await;
-
         let alerts = manager.get_alerts(None).await;
         assert_eq!(alerts.len(), 1);
         let alert_id = alerts[0].id;
-
         manager.acknowledge_alert(alert_id).await.unwrap();
-
         let active = manager.get_active_alerts().await;
         assert!(active.is_empty());
     }
@@ -670,37 +463,28 @@ mod tests {
         assert!(matches!(result, Err(AlertError::AlertNotFound(_))));
     }
 
-    // --- Severity filter ---
-
     #[tokio::test]
     async fn test_filter_alerts_by_severity() {
         let manager = AlertManager::new();
-
         let mut warn_rule = make_rule("WARN", 1, 60);
         warn_rule.severity = AlertSeverity::Warning;
         manager.add_rule(warn_rule).await.unwrap();
-
         let mut crit_rule = make_rule("CRIT", 1, 60);
         crit_rule.severity = AlertSeverity::Critical;
         manager.add_rule(crit_rule).await.unwrap();
-
         manager.evaluate(&make_entry("WARN something")).await;
         manager.evaluate(&make_entry("CRIT something")).await;
-
         let critical = manager.get_alerts(Some(AlertSeverity::Critical)).await;
         assert_eq!(critical.len(), 1);
         assert_eq!(critical[0].severity, AlertSeverity::Critical);
     }
 
-    // --- Clear ---
-
     #[tokio::test]
     async fn test_clear_alerts() {
         let manager = AlertManager::new();
         manager.add_rule(make_rule("ERR", 1, 60)).await.unwrap();
         manager.evaluate(&make_entry("ERR boom")).await;
         assert!(!manager.get_alerts(None).await.is_empty());
-
         manager.clear_alerts().await;
         assert!(manager.get_alerts(None).await.is_empty());
     }
diff --git a/backend/src/services/mod.rs b/backend/src/services/mod.rs
index d90e2b2..5db99b5 100644
--- a/backend/src/services/mod.rs
+++ b/backend/src/services/mod.rs
@@ -1,8 +1,5 @@
 pub mod alerts;
 pub mod business_metrics;
-pub mod log_alerts;
-pub mod dedup;
-pub mod cache_metrics;
 pub mod error_recovery;
 pub mod feature_flags;
 pub mod log_aggregator;
diff --git a/backend/src/services/sys_metrics.rs b/backend/src/services/sys_metrics.rs
index 6b533d0..8865d18 100644
--- a/backend/src/services/sys_metrics.rs
+++ b/backend/src/services/sys_metrics.rs
@@ -1,101 +1,44 @@
-//! Build System Metrics Exporter
-//!
-//! This module provides a production-ready metrics exporter for build system operations.
-//! It collects and persists build-related metrics including compilation times, dependency counts,
-//! cache hit rates, and system resource usage. The service uses PostgreSQL for durability
-//! and Redis for high-performance caching.
-//!
-//! # Example
-//! ```rust,no_run
-//! use backend::services::sys_metrics::BuildMetricsService;
-//! use sqlx::PgPool;
-//! use redis::Client;
-//!
-//! # async fn example(pool: PgPool, redis: Client) -> anyhow::Result<()> {
-//! let service = BuildMetricsService::new(pool, redis);
-//!
-//! // Record a build metric
-//! let metric = BuildMetric {
-//!     project_name: "crucible".to_string(),
-//!     build_id: "build-123".to_string(),
-//!     build_status: BuildStatus::Success,
-//!     compilation_time_ms: 5000,
-//!     dependency_count: 42,
-//!     cache_hit_rate: Some(85.5),
-//!     cpu_usage: Some(75.2),
-//!     memory_usage_mb: Some(1024),
-//!     build_timestamp: Utc::now(),
-//! };
-//! service.record_build(metric).await?;
-//!
-//! // Query metrics
-//! let metrics = service.get_project_metrics("crucible", 10).await?;
-//! # Ok(())
-//! # }
-//! ```
+//! System metrics and build metrics services.
+
+#![allow(dead_code)]
 
 use chrono::{DateTime, Utc};
 use redis::{AsyncCommands, Client as RedisClient};
 use rust_decimal::Decimal;
 use serde::{Deserialize, Serialize};
 use sqlx::PgPool;
+use std::sync::Arc;
 use thiserror::Error;
-use tracing::{debug, error, info, warn};
+use tokio::sync::RwLock;
+use tracing::{debug, info, instrument};
 use uuid::Uuid;
 
+use crate::services::tracing::TracingService;
+
 // ---------------------------------------------------------------------------
-// Error types
+// MetricsError
 // ---------------------------------------------------------------------------
 
-/// Errors that can occur in the build metrics service.
 #[derive(Debug, Error)]
 pub enum MetricsError {
-    /// A database error occurred.
     #[error("Database error: {0}")]
     Database(#[from] sqlx::Error),
-
-    /// A Redis error occurred.
     #[error("Redis error: {0}")]
     Redis(#[from] redis::RedisError),
-
-    /// Serialization error.
     #[error("Serialization error: {0}")]
     Serialization(String),
-
-    /// The requested project was not found.
     #[error("Project not found: {0}")]
     ProjectNotFound(String),
-
-    /// Invalid build status.
     #[error("Invalid build status: {0}")]
     InvalidStatus(String),
-
-    /// An internal error occurred.
     #[error("Internal error: {0}")]
     Internal(String),
 }
 
-use crate::services::tracing::TracingService;
-use std::sync::Arc;
-use tokio::sync::RwLock;
-
-pub struct MetricsExporter {
-    current_metrics: Arc<RwLock<SystemMetrics>>,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, Default)]
-pub struct SystemMetrics {
-    pub cpu_usage: f64,
-    pub memory_usage: u64,
-    pub uptime: u64,
-    pub timestamp: DateTime<Utc>,
-}
-
 // ---------------------------------------------------------------------------
-// Domain types
+// BuildStatus
 // ---------------------------------------------------------------------------
 
-/// Build status enumeration.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(rename_all = "lowercase")]
 pub enum BuildStatus {
@@ -126,47 +69,32 @@ impl BuildStatus {
     }
 }
 
-/// Build system metrics record.
+// ---------------------------------------------------------------------------
+// BuildMetric
+// ---------------------------------------------------------------------------
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct BuildMetric {
-    /// Unique identifier for the metric record.
     pub id: Option<Uuid>,
-    /// Name of the project being built.
     pub project_name: String,
-    /// Unique build identifier.
     pub build_id: String,
-    /// Status of the build.
     pub build_status: BuildStatus,
-    /// Compilation time in milliseconds.
     pub compilation_time_ms: i64,
-    /// Number of dependencies used.
     pub dependency_count: i32,
-    /// Cache hit rate percentage (0-100).
     pub cache_hit_rate: Option<Decimal>,
-    /// CPU usage percentage during build.
     pub cpu_usage: Option<Decimal>,
-    /// Memory usage in MB during build.
     pub memory_usage_mb: Option<i64>,
-    /// Timestamp when the build occurred.
     pub build_timestamp: DateTime<Utc>,
 }
 
-/// Aggregated build metrics summary.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct BuildMetricsSummary {
-    /// Project name.
     pub project_name: String,
-    /// Total number of builds.
     pub total_builds: i64,
-    /// Number of successful builds.
     pub successful_builds: i64,
-    /// Number of failed builds.
     pub failed_builds: i64,
-    /// Average compilation time in milliseconds.
     pub avg_compilation_time_ms: Decimal,
-    /// Success rate percentage.
     pub success_rate: Decimal,
-    /// Average cache hit rate.
     pub avg_cache_hit_rate: Option<Decimal>,
 }
 
@@ -174,38 +102,24 @@ pub struct BuildMetricsSummary {
 // BuildMetricsService
 // ---------------------------------------------------------------------------
 
-/// Service for collecting and managing build system metrics with PostgreSQL persistence
-/// and Redis caching.
 pub struct BuildMetricsService {
     db: PgPool,
     redis: RedisClient,
 }
 
 impl BuildMetricsService {
-    /// Create a new build metrics service.
-    ///
-    /// # Arguments
-    /// - `db`: PostgreSQL connection pool
-    /// - `redis`: Redis client
     pub fn new(db: PgPool, redis: RedisClient) -> Self {
         Self { db, redis }
     }
 
-    /// Record a build metric.
-    ///
-    /// This method persists the metric to PostgreSQL and invalidates relevant cache entries.
-    ///
-    /// # Errors
-    /// Returns [`MetricsError::Database`] if the database operation fails.
-    /// Returns [`MetricsError::Redis`] if the cache invalidation fails.
     pub async fn record_build(&self, metric: BuildMetric) -> Result<Uuid, MetricsError> {
         let id = Uuid::new_v4();
         let status_str = metric.build_status.as_str();
 
         sqlx::query(
             r#"
-            INSERT INTO build_metrics 
-            (id, project_name, build_id, build_status, compilation_time_ms, 
+            INSERT INTO build_metrics
+            (id, project_name, build_id, build_status, compilation_time_ms,
              dependency_count, cache_hit_rate, cpu_usage, memory_usage_mb, build_timestamp)
             VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
             "#,
@@ -216,14 +130,13 @@ impl BuildMetricsService {
         .bind(status_str)
         .bind(metric.compilation_time_ms)
         .bind(metric.dependency_count)
-        .bind(metric.cache_hit_rate)
-        .bind(metric.cpu_usage)
+        .bind(metric.cache_hit_rate.map(|d| d.to_string()))
+        .bind(metric.cpu_usage.map(|d| d.to_string()))
         .bind(metric.memory_usage_mb)
         .bind(metric.build_timestamp)
         .execute(&self.db)
         .await?;
 
-        // Invalidate cache for this project
         self.invalidate_project_cache(&metric.project_name).await?;
 
         info!(
@@ -236,26 +149,12 @@ impl BuildMetricsService {
         Ok(id)
     }
 
-    /// Get metrics for a specific project.
-    ///
-    /// This method first checks Redis cache. On cache miss, it queries PostgreSQL
-    /// and populates the cache with a 5-minute TTL.
-    ///
-    /// # Arguments
-    /// - `project_name`: Name of the project
-    /// - `limit`: Maximum number of records to return
-    ///
-    /// # Errors
-    /// Returns [`MetricsError::Database`] if the database query fails.
-    /// Returns [`MetricsError::Redis`] if the cache operation fails.
     pub async fn get_project_metrics(
         &self,
         project_name: &str,
         limit: i64,
     ) -> Result<Vec<BuildMetric>, MetricsError> {
         let cache_key = format!("build_metrics:{}:{}", project_name, limit);
-
-        // Try cache first
         let mut conn = self.redis.get_multiplexed_async_connection().await?;
         let cached: Option<String> = conn.get(&cache_key).await?;
 
@@ -266,12 +165,22 @@ impl BuildMetricsService {
             return Ok(metrics);
         }
 
-        // Cache miss – query database
         debug!(project = %project_name, "Build metrics cache miss – querying database");
-        let rows = sqlx::query_as(
+        let rows: Vec<(
+            Uuid,
+            String,
+            String,
+            String,
+            i64,
+            i32,
+            Option<f64>,
+            Option<f64>,
+            Option<i64>,
+            DateTime<Utc>,
+        )> = sqlx::query_as(
             r#"
             SELECT id, project_name, build_id, build_status, compilation_time_ms,
-                   dependency_count, cache_hit_rate, cpu_usage, memory_usage_mb, build_timestamp
+                   dependency_count, cache_hit_rate::float8, cpu_usage::float8, memory_usage_mb, build_timestamp
             FROM build_metrics
             WHERE project_name = $1
             ORDER BY build_timestamp DESC
@@ -297,54 +206,43 @@ impl BuildMetricsService {
                     cpu_usage,
                     memory_usage_mb,
                     build_timestamp,
-                )| {
-                    BuildMetric {
-                        id: Some(id),
-                        project_name,
-                        build_id,
-                        build_status: BuildStatus::from_str(&status_str)
-                            .unwrap_or(BuildStatus::Failed),
-                        compilation_time_ms,
-                        dependency_count,
-                        cache_hit_rate,
-                        cpu_usage,
-                        memory_usage_mb,
-                        build_timestamp,
-                    }
+                )| BuildMetric {
+                    id: Some(id),
+                    project_name,
+                    build_id,
+                    build_status: BuildStatus::from_str(&status_str)
+                        .unwrap_or(BuildStatus::Failed),
+                    compilation_time_ms,
+                    dependency_count,
+                    cache_hit_rate: cache_hit_rate.map(Decimal::try_from).and_then(|r| r.ok()),
+                    cpu_usage: cpu_usage.map(Decimal::try_from).and_then(|r| r.ok()),
+                    memory_usage_mb,
+                    build_timestamp,
                 },
             )
             .collect();
 
-        // Populate cache with 5-minute TTL
         if !metrics.is_empty() {
             let json = serde_json::to_string(&metrics)
                 .map_err(|e| MetricsError::Serialization(e.to_string()))?;
             let _: () = conn.set_ex(&cache_key, json, 300).await?;
-            debug!(project = %project_name, count = metrics.len(), "Cached build metrics");
         }
 
         Ok(metrics)
     }
 
-    /// Get aggregated metrics summary for a project.
-    ///
-    /// # Arguments
-    /// - `project_name`: Name of the project
-    ///
-    /// # Errors
-    /// Returns [`MetricsError::Database`] if the database query fails.
     pub async fn get_project_summary(
         &self,
         project_name: &str,
     ) -> Result<BuildMetricsSummary, MetricsError> {
-        let row: Option<(i64, i64, i64, Option<Decimal>, Option<Decimal>)> = sqlx::query_as(
+        let row: Option<(i64, i64, i64, Option<f64>, Option<f64>)> = sqlx::query_as(
             r#"
-            SELECT 
+            SELECT
                 COUNT(*) as total_builds,
                 SUM(CASE WHEN build_status = 'success' THEN 1 ELSE 0 END) as successful_builds,
                 SUM(CASE WHEN build_status = 'failed' THEN 1 ELSE 0 END) as failed_builds,
-                AVG(compilation_time_ms) as avg_compilation_time,
-                AVG(cache_hit_rate) as avg_cache_hit_rate
+                AVG(compilation_time_ms)::float8 as avg_compilation_time,
+                AVG(cache_hit_rate)::float8 as avg_cache_hit_rate
             FROM build_metrics
             WHERE project_name = $1
             "#,
@@ -363,7 +261,7 @@ impl BuildMetricsService {
             )) => {
                 let success_rate = if total_builds > 0 {
                     Decimal::from(successful_builds) / Decimal::from(total_builds)
-                        * Decimal::from(100)
+                        * Decimal::from(100u32)
                 } else {
                     Decimal::ZERO
                 };
@@ -373,27 +271,36 @@ impl BuildMetricsService {
                     total_builds,
                     successful_builds,
                     failed_builds,
-                    avg_compilation_time_ms: avg_compilation_time.unwrap_or(Decimal::ZERO),
+                    avg_compilation_time_ms: avg_compilation_time
+                        .map(Decimal::try_from)
+                        .and_then(|r| r.ok())
+                        .unwrap_or(Decimal::ZERO),
                     success_rate,
-                    avg_cache_hit_rate,
+                    avg_cache_hit_rate: avg_cache_hit_rate
+                        .map(Decimal::try_from)
+                        .and_then(|r| r.ok()),
                 })
             }
             None => Err(MetricsError::ProjectNotFound(project_name.to_string())),
         }
     }
 
-    /// Get recent build metrics across all projects.
-    ///
-    /// # Arguments
-    /// - `limit`: Maximum number of records to return
-    ///
-    /// # Errors
-    /// Returns [`MetricsError::Database`] if the database query fails.
     pub async fn get_recent_metrics(&self, limit: i64) -> Result<Vec<BuildMetric>, MetricsError> {
-        let rows = sqlx::query_as(
+        let rows: Vec<(
+            Uuid,
+            String,
+            String,
+            String,
+            i64,
+            i32,
+            Option<f64>,
+            Option<f64>,
+            Option<i64>,
+            DateTime<Utc>,
+        )> = sqlx::query_as(
             r#"
             SELECT id, project_name, build_id, build_status, compilation_time_ms,
-                   dependency_count, cache_hit_rate, cpu_usage, memory_usage_mb, build_timestamp
+                   dependency_count, cache_hit_rate::float8, cpu_usage::float8, memory_usage_mb, build_timestamp
             FROM build_metrics
             ORDER BY build_timestamp DESC
             LIMIT $1
@@ -417,32 +324,23 @@ impl BuildMetricsService {
                     cpu_usage,
                     memory_usage_mb,
                     build_timestamp,
-                )| {
-                    BuildMetric {
-                        id: Some(id),
-                        project_name,
-                        build_id,
-                        build_status: BuildStatus::from_str(&status_str)
-                            .unwrap_or(BuildStatus::Failed),
-                        compilation_time_ms,
-                        dependency_count,
-                        cache_hit_rate,
-                        cpu_usage,
-                        memory_usage_mb,
-                        build_timestamp,
-                    }
+                )| BuildMetric {
+                    id: Some(id),
+                    project_name,
+                    build_id,
+                    build_status: BuildStatus::from_str(&status_str)
+                        .unwrap_or(BuildStatus::Failed),
+                    compilation_time_ms,
+                    dependency_count,
+                    cache_hit_rate: cache_hit_rate.map(Decimal::try_from).and_then(|r| r.ok()),
+                    cpu_usage: cpu_usage.map(Decimal::try_from).and_then(|r| r.ok()),
+                    memory_usage_mb,
+                    build_timestamp,
                 },
             )
             .collect())
     }
 
-    /// Delete all metrics for a project.
-    ///
-    /// # Arguments
-    /// - `project_name`: Name of the project
-    ///
-    /// # Errors
-    /// Returns [`MetricsError::Database`] if the database operation fails.
     pub async fn delete_project_metrics(&self, project_name: &str) -> Result<u64, MetricsError> {
         let result = sqlx::query("DELETE FROM build_metrics WHERE project_name = $1")
             .bind(project_name)
@@ -460,27 +358,42 @@ impl BuildMetricsService {
         Ok(result.rows_affected())
     }
 
-    /// Invalidate Redis cache for a specific project.
     async fn invalidate_project_cache(&self, project_name: &str) -> Result<(), MetricsError> {
         let mut conn = self.redis.get_multiplexed_async_connection().await?;
-
-        // Delete all cache keys for this project using SCAN
         let pattern = format!("build_metrics:{}:*", project_name);
         let keys: Vec<String> = redis::cmd("KEYS")
             .arg(&pattern)
             .query_async(&mut conn)
             .await?;
 
+        for key in &keys {
+            let _: () = conn.del(key).await?;
+        }
+
         if !keys.is_empty() {
-            for key in keys {
-                let _: () = conn.del(&key).await?;
-            }
             debug!(project = %project_name, count = keys.len(), "Invalidated project cache");
         }
+
         Ok(())
     }
 }
 
+// ---------------------------------------------------------------------------
+// SystemMetrics + MetricsExporter
+// ---------------------------------------------------------------------------
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct SystemMetrics {
+    pub cpu_usage: f64,
+    pub memory_usage: u64,
+    pub uptime: u64,
+    pub timestamp: DateTime<Utc>,
+}
+
+pub struct MetricsExporter {
+    current_metrics: Arc<RwLock<SystemMetrics>>,
+}
+
 impl Default for MetricsExporter {
     fn default() -> Self {
         Self::new()
@@ -497,10 +410,10 @@ impl MetricsExporter {
         }
     }
 
+    #[instrument(skip(self), fields(service.name = "MetricsExporter", service.method = "update_metrics"))]
     pub async fn update_metrics(&self, cpu: f64, mem: u64, uptime: u64) {
         let span = TracingService::service_method_span("MetricsExporter", "update_metrics");
         let _enter = span.enter();
-
         let mut metrics = self.current_metrics.write().await;
         metrics.cpu_usage = cpu;
         metrics.memory_usage = mem;
@@ -512,14 +425,10 @@ impl MetricsExporter {
     pub async fn get_metrics(&self) -> SystemMetrics {
         let span = TracingService::service_method_span("MetricsExporter", "get_metrics");
         let _enter = span.enter();
-
         self.current_metrics.read().await.clone()
     }
 
     pub async fn run_collector(exporter: Arc<Self>) {
-        let span = TracingService::service_method_span("MetricsExporter", "run_collector");
-        let _enter = span.enter();
-
         info!("Starting system metrics collector worker");
         let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(5));
         let start_time = Utc::now();
@@ -527,10 +436,7 @@ impl MetricsExporter {
         loop {
             interval.tick().await;
             let uptime = (Utc::now() - start_time).num_seconds() as u64;
-            // Simulated metrics collection
-            exporter
-                .update_metrics(12.5, 1024 * 1024 * 512, uptime)
-                .await;
+            exporter.update_metrics(12.5, 1024 * 1024 * 512, uptime).await;
         }
     }
 }
@@ -542,7 +448,6 @@ impl MetricsExporter {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use rust_decimal_macros::dec;
 
     #[test]
     fn test_build_status_conversion() {
@@ -571,8 +476,8 @@ mod tests {
             build_status: BuildStatus::Success,
             compilation_time_ms: 5000,
             dependency_count: 42,
-            cache_hit_rate: Some(dec!(85.5)),
-            cpu_usage: Some(dec!(75.2)),
+            cache_hit_rate: Some(Decimal::from(85u32)),
+            cpu_usage: Some(Decimal::from(75u32)),
             memory_usage_mb: Some(1024),
             build_timestamp: Utc::now(),
         };
@@ -595,23 +500,6 @@ mod tests {
         assert!(err.to_string().contains("unknown"));
     }
 
-    #[test]
-    fn test_build_metrics_summary() {
-        let summary = BuildMetricsSummary {
-            project_name: "test".to_string(),
-            total_builds: 100,
-            successful_builds: 95,
-            failed_builds: 5,
-            avg_compilation_time_ms: dec!(5000),
-            success_rate: dec!(95),
-            avg_cache_hit_rate: Some(dec!(80)),
-        };
-
-        let json = serde_json::to_string(&summary).unwrap();
-        assert!(json.contains("test"));
-        assert!(json.contains("95"));
-    }
-
     #[tokio::test]
     async fn test_build_status_roundtrip() {
         let statuses = vec![
@@ -620,7 +508,6 @@ mod tests {
             BuildStatus::Cancelled,
             BuildStatus::Running,
         ];
-
         for status in statuses {
             let s = status.as_str();
             let parsed = BuildStatus::from_str(s).unwrap();
@@ -632,7 +519,6 @@ mod tests {
     async fn test_metrics_collection() {
         let exporter = MetricsExporter::new();
         exporter.update_metrics(25.0, 1024, 60).await;
-
         let metrics = exporter.get_metrics().await;
         assert_eq!(metrics.cpu_usage, 25.0);
         assert_eq!(metrics.memory_usage, 1024);
diff --git a/backend/src/services/tracing.rs b/backend/src/services/tracing.rs
index 0bcdd83..a23f818 100644
--- a/backend/src/services/tracing.rs
+++ b/backend/src/services/tracing.rs
@@ -1,21 +1,15 @@
-//! OpenTelemetry tracing service for production-grade observability
+//! OpenTelemetry tracing service for production-grade observability.
 //!
-//! This module provides the centralized tracing hub for the Crucible backend,
-//! implementing OTLP exporter with Jaeger/Zipkin compatibility, semantic conventions,
+//! Provides the centralized tracing hub for the Crucible backend, implementing
+//! OTLP exporter with Jaeger/Zipkin compatibility, semantic conventions,
 //! sampling strategies, and proper error propagation.
-//!
-//! # Features
-//! - OTLP/gRPC exporter (Jaeger/Zipkin compatible)
-//! - Head-based and tail-based sampling strategies
-//! - Semantic conventions for HTTP, DB, and service operations
-//! - Resource detection with deployment environment
-//! - Span limits and baggage propagation
-//! - Zero-overhead when tracing is disabled
+
+#![allow(dead_code)]
 
 use opentelemetry::trace::TracerProvider as _;
 use opentelemetry::KeyValue;
 use opentelemetry_otlp::WithExportConfig;
-use opentelemetry_sdk::trace::{Config, RandomIdGenerator, Sampler, TracerProvider};
+use opentelemetry_sdk::trace::{Config, RandomIdGenerator, Sampler};
 use opentelemetry_sdk::Resource;
 use opentelemetry_semantic_conventions::resource;
 use std::time::Duration;
@@ -23,27 +17,28 @@ use tracing::{info_span, warn};
 use tracing_subscriber::layer::SubscriberExt;
 use tracing_subscriber::{EnvFilter, Registry};
 
-/// Central tracing service for initialization and span creation
-pub struct TracingService;
+// ---------------------------------------------------------------------------
+// TracingConfig
+// ---------------------------------------------------------------------------
 
-/// Configuration for the tracing service
+/// Configuration for the tracing service.
 #[derive(Clone, Debug)]
 pub struct TracingConfig {
-    /// OTLP exporter endpoint (e.g., "http://jaeger:4317")
+    /// OTLP exporter endpoint (e.g., `"http://jaeger:4317"`).
     pub otlp_endpoint: String,
-    /// Service name for resource identification
+    /// Service name for resource identification.
     pub service_name: String,
-    /// Service version
+    /// Service version.
     pub service_version: String,
-    /// Deployment environment (dev, staging, production)
+    /// Deployment environment (`dev`, `staging`, `production`).
     pub environment: String,
-    /// Sampling ratio (0.0 to 1.0)
+    /// Sampling ratio in `[0.0, 1.0]`.
     pub sampling_ratio: f64,
-    /// Maximum number of attributes per span
+    /// Maximum number of attributes per span.
     pub max_attributes_per_span: u32,
-    /// Maximum number of events per span
+    /// Maximum number of events per span.
     pub max_events_per_span: u32,
-    /// Maximum number of links per span
+    /// Maximum number of links per span.
     pub max_links_per_span: u32,
 }
 
@@ -53,7 +48,7 @@ impl Default for TracingConfig {
             otlp_endpoint: "http://localhost:4317".to_string(),
             service_name: "crucible-backend".to_string(),
             service_version: env!("CARGO_PKG_VERSION").to_string(),
-            environment: std::env::var("ENV").unwrap_or("dev".to_string()),
+            environment: std::env::var("ENV").unwrap_or_else(|_| "dev".to_string()),
             sampling_ratio: 1.0,
             max_attributes_per_span: 128,
             max_events_per_span: 128,
@@ -63,7 +58,7 @@ impl Default for TracingConfig {
 }
 
 impl TracingConfig {
-    /// Create a new tracing configuration with defaults
+    /// Create a new configuration with the given service name and version.
     pub fn new(service_name: String, service_version: String) -> Self {
         Self {
             service_name,
@@ -72,41 +67,49 @@ impl TracingConfig {
         }
     }
 
-    /// Set a custom OTLP endpoint
+    /// Override the OTLP endpoint.
     pub fn with_otlp_endpoint(mut self, endpoint: String) -> Self {
         self.otlp_endpoint = endpoint;
         self
     }
 
-    /// Set the deployment environment
+    /// Set the deployment environment and adjust sampling accordingly.
     pub fn with_environment(mut self, env: String) -> Self {
-        self.environment = env.clone();
         self.sampling_ratio = match env.as_str() {
             "production" => 0.01,
             "staging" => 0.1,
             _ => 1.0,
         };
+        self.environment = env;
         self
     }
 
-    /// Set custom sampling ratio (0.0 to 1.0)
+    /// Set a custom sampling ratio clamped to `[0.0, 1.0]`.
     pub fn with_sampling_ratio(mut self, ratio: f64) -> Self {
         self.sampling_ratio = ratio.max(0.0).min(1.0);
         self
     }
 }
 
+// ---------------------------------------------------------------------------
+// TracingService
+// ---------------------------------------------------------------------------
+
+/// Central tracing service for initialization and span creation.
+pub struct TracingService;
+
 impl TracingService {
-    /// Initialize the global tracer provider with OTLP exporter
+    /// Initialize the global tracer provider with an OTLP exporter.
     pub fn init(config: TracingConfig) -> anyhow::Result<()> {
-        let resource = Resource::builder()
-            .with_attributes(vec![
-                KeyValue::new(resource::SERVICE_NAME, config.service_name.clone()),
-                KeyValue::new(resource::SERVICE_VERSION, config.service_version.clone()),
-                KeyValue::new(resource::DEPLOYMENT_ENVIRONMENT, config.environment.clone()),
-                KeyValue::new("service.namespace", "crucible"),
-            ])
-            .build();
+        let resource = Resource::new(vec![
+            KeyValue::new(resource::SERVICE_NAME, config.service_name.clone()),
+            KeyValue::new(resource::SERVICE_VERSION, config.service_version.clone()),
+            KeyValue::new(
+                resource::DEPLOYMENT_ENVIRONMENT,
+                config.environment.clone(),
+            ),
+            KeyValue::new("service.namespace", "crucible"),
+        ]);
 
         let sampler = if config.environment == "production" {
             Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(config.sampling_ratio)))
@@ -118,9 +121,9 @@ impl TracingService {
             .with_resource(resource)
             .with_sampler(sampler)
             .with_id_generator(RandomIdGenerator::default())
-            .with_max_attributes_per_span(config.max_attributes_per_span as u32)
-            .with_max_events_per_span(config.max_events_per_span as u32)
-            .with_max_links_per_span(config.max_links_per_span as u32);
+            .with_max_attributes_per_span(config.max_attributes_per_span)
+            .with_max_events_per_span(config.max_events_per_span)
+            .with_max_links_per_span(config.max_links_per_span);
 
         let tracer_provider = opentelemetry_otlp::new_pipeline()
             .tracing()
@@ -134,9 +137,7 @@ impl TracingService {
             .install_batch(opentelemetry_sdk::runtime::Tokio)
             .map_err(|e| anyhow::anyhow!("Failed to install OTLP exporter: {}", e))?;
 
-        // Get a tracer from the provider
         let tracer = tracer_provider.tracer("crucible-backend");
-
         let telemetry_layer = tracing_opentelemetry::layer().with_tracer(tracer);
 
         let subscriber = Registry::default()
@@ -150,16 +151,18 @@ impl TracingService {
         tracing::subscriber::set_global_default(subscriber)
             .map_err(|e| anyhow::anyhow!("Failed to set global subscriber: {}", e))?;
 
-        tracing::info!("OpenTelemetry tracing initialized successfully");
-        tracing::info!("Service: {}", config.service_name);
-        tracing::info!("Environment: {}", config.environment);
-        tracing::info!("OTLP Endpoint: {}", config.otlp_endpoint);
-        tracing::info!("Sampling Ratio: {:.1}%", config.sampling_ratio * 100.0);
+        tracing::info!(
+            service = %config.service_name,
+            environment = %config.environment,
+            otlp_endpoint = %config.otlp_endpoint,
+            sampling_pct = config.sampling_ratio * 100.0,
+            "OpenTelemetry tracing initialized"
+        );
 
         Ok(())
     }
 
-    /// Create an HTTP request span with semantic conventions
+    /// Create an HTTP request span with semantic conventions.
     pub fn http_request_span(method: &str, path: &str, user_id: Option<&str>) -> tracing::Span {
         info_span!(
             "http.request",
@@ -174,7 +177,7 @@ impl TracingService {
         )
     }
 
-    /// Create a database query span with semantic conventions
+    /// Create a database query span with semantic conventions.
     pub fn db_query_span(query: &str, db_system: &str, operation: &str) -> tracing::Span {
         let truncated_query = query
             .split('\n')
@@ -196,7 +199,7 @@ impl TracingService {
         )
     }
 
-    /// Create a Redis command span with semantic conventions
+    /// Create a Redis command span with semantic conventions.
     pub fn redis_command_span(command: &str, key: Option<&str>) -> tracing::Span {
         info_span!(
             "db.redis.command",
@@ -208,7 +211,7 @@ impl TracingService {
         )
     }
 
-    /// Create a service method span for business operations
+    /// Create a service method span for business operations.
     pub fn service_method_span(service_name: &str, method_name: &str) -> tracing::Span {
         info_span!(
             "service.method",
@@ -219,7 +222,7 @@ impl TracingService {
         )
     }
 
-    /// Create an async job/task span
+    /// Create an async job/task span.
     pub fn job_span(job_name: &str, job_id: &str) -> tracing::Span {
         info_span!(
             "job.execute",
@@ -230,13 +233,17 @@ impl TracingService {
         )
     }
 
-    /// Mark current span with error information
+    /// Record error information on the current span.
     pub fn record_error(span: &tracing::Span, error_message: &str, error_type: &str) {
         span.record("error.type", error_type);
         warn!("Span error recorded: {} ({})", error_message, error_type);
     }
 }
 
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -256,6 +263,35 @@ mod tests {
         assert_eq!(config.sampling_ratio, 0.01);
     }
 
+    #[test]
+    fn test_tracing_config_staging_sampling() {
+        let config = TracingConfig::default().with_environment("staging".to_string());
+        assert_eq!(config.sampling_ratio, 0.1);
+    }
+
+    #[test]
+    fn test_tracing_config_dev_sampling() {
+        let config = TracingConfig::default().with_environment("dev".to_string());
+        assert_eq!(config.sampling_ratio, 1.0);
+    }
+
+    #[test]
+    fn test_sampling_ratio_bounds() {
+        let config = TracingConfig::default().with_sampling_ratio(1.5);
+        assert_eq!(config.sampling_ratio, 1.0);
+
+        let config = TracingConfig::default().with_sampling_ratio(-0.5);
+        assert_eq!(config.sampling_ratio, 0.0);
+    }
+
+    #[test]
+    fn test_config_clone() {
+        let cfg = TracingConfig::new("svc".to_string(), "1.0.0".to_string());
+        let cloned = cfg.clone();
+        assert_eq!(cfg.service_name, cloned.service_name);
+        assert_eq!(cfg.otlp_endpoint, cloned.otlp_endpoint);
+    }
+
     #[test]
     fn test_http_span_creation() {
         let span = TracingService::http_request_span("GET", "/api/users", Some("user123"));
@@ -289,13 +325,4 @@ mod tests {
         let span = TracingService::job_span("process_transaction", "job-456");
         drop(span);
     }
-
-    #[test]
-    fn test_sampling_ratio_bounds() {
-        let config = TracingConfig::default().with_sampling_ratio(1.5);
-        assert_eq!(config.sampling_ratio, 1.0);
-
-        let config = TracingConfig::default().with_sampling_ratio(-0.5);
-        assert_eq!(config.sampling_ratio, 0.0);
-    }
 }
diff --git a/backend/tests/load/dashboard_load.rs b/backend/tests/load/dashboard_load.rs
new file mode 100644
index 0000000..1a63013
--- /dev/null
+++ b/backend/tests/load/dashboard_load.rs
@@ -0,0 +1,453 @@
+//! Concurrent load tests for the `GET /api/dashboard` endpoint.
+//!
+//! These tests verify that the dashboard handler remains stable and correct
+//! under concurrent load. The handler degrades gracefully when Redis is
+//! unavailable (falls back to live service data), so tests run without any
+//! external infrastructure.
+//!
+//! # Running
+//!
+//! ```bash
+//! cargo test -p backend --test load_tests load::dashboard_load -- --nocapture
+//! ```
+
+use std::sync::Arc;
+use std::time::Instant;
+
+use axum::{body::to_bytes, routing::get, Router};
+use axum::http::StatusCode;
+use hyper::Request;
+use tower::ServiceExt;
+
+use backend::api::handlers::dashboard::{get_dashboard, DashboardState};
+use backend::services::{
+    alerts::AlertDispatcher,
+    error_recovery::ErrorManager,
+    log_alerts::AlertManager,
+    sys_metrics::MetricsExporter,
+};
+
+use crate::load::framework::{assert_load_result, LoadConfig, LoadResult};
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Build a test router wired to `GET /api/dashboard` with mock state.
+///
+/// Redis is pointed at a port that will refuse connections so the handler
+/// exercises its graceful-degradation path (cache miss → live data).
+fn build_app() -> Router {
+    let state = Arc::new(DashboardState {
+        metrics_exporter: Arc::new(MetricsExporter::new()),
+        error_manager: Arc::new(ErrorManager::new()),
+        alert_manager: Arc::new(AlertManager::new()),
+        // Unreachable Redis — handler must degrade gracefully.
+        redis: redis::Client::open("redis://127.0.0.1:1/").unwrap(),
+    });
+    Router::new()
+        .route("/api/dashboard", get(get_dashboard))
+        .with_state(state)
+}
+
+/// Run a full load test using the framework and return the [`LoadResult`].
+async fn run_framework_load(concurrency: usize, requests_per_task: usize) -> LoadResult {
+    use crate::load::framework::run_load;
+
+    let cfg = LoadConfig::new(concurrency, requests_per_task);
+    run_load(cfg, || async {
+        let app = build_app();
+        let start = Instant::now();
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .uri("/api/dashboard")
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        (resp.status(), start.elapsed())
+    })
+    .await
+}
+
+// ---------------------------------------------------------------------------
+// Basic correctness
+// ---------------------------------------------------------------------------
+
+/// Dashboard returns 200 even when Redis is unreachable.
+#[tokio::test]
+async fn test_dashboard_returns_200_without_redis() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/api/dashboard")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    assert_eq!(resp.status(), StatusCode::OK);
+}
+
+/// Response body contains the three top-level keys.
+#[tokio::test]
+async fn test_dashboard_response_shape() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/api/dashboard")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+
+    assert!(json.get("metrics").is_some(), "must have 'metrics'");
+    assert!(
+        json.get("active_recovery_tasks").is_some(),
+        "must have 'active_recovery_tasks'"
+    );
+    assert!(json.get("active_alerts").is_some(), "must have 'active_alerts'");
+}
+
+/// `metrics` object contains the expected sub-fields.
+#[tokio::test]
+async fn test_dashboard_metrics_fields() {
+    let state = Arc::new(DashboardState {
+        metrics_exporter: Arc::new(MetricsExporter::new()),
+        error_manager: Arc::new(ErrorManager::new()),
+        alert_manager: Arc::new(AlertManager::new()),
+        redis: redis::Client::open("redis://127.0.0.1:1/").unwrap(),
+    });
+    // Seed some metrics so the values are non-zero.
+    state.metrics_exporter.update_metrics(42.0, 2048, 120).await;
+
+    let app = Router::new()
+        .route("/api/dashboard", get(get_dashboard))
+        .with_state(state);
+
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/api/dashboard")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+
+    assert_eq!(json["metrics"]["cpu_usage"], 42.0);
+    assert_eq!(json["metrics"]["memory_usage"], 2048);
+    assert_eq!(json["metrics"]["uptime"], 120);
+}
+
+/// `active_recovery_tasks` reflects tasks registered in the error manager.
+#[tokio::test]
+async fn test_dashboard_includes_recovery_tasks() {
+    use backend::services::error_recovery::RecoveryError;
+
+    let error_manager = Arc::new(ErrorManager::new());
+    error_manager
+        .handle_error(RecoveryError::Internal("boom".into()), "worker_a")
+        .await
+        .unwrap();
+
+    let state = Arc::new(DashboardState {
+        metrics_exporter: Arc::new(MetricsExporter::new()),
+        error_manager,
+        alert_manager: Arc::new(AlertManager::new()),
+        redis: redis::Client::open("redis://127.0.0.1:1/").unwrap(),
+    });
+
+    let app = Router::new()
+        .route("/api/dashboard", get(get_dashboard))
+        .with_state(state);
+
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/api/dashboard")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+
+    let tasks = json["active_recovery_tasks"].as_array().unwrap();
+    assert_eq!(tasks.len(), 1);
+    assert_eq!(tasks[0]["name"], "worker_a");
+}
+
+/// `active_alerts` reflects alerts fired by the alert manager.
+#[tokio::test]
+async fn test_dashboard_includes_active_alerts() {
+    use backend::services::log_alerts::{AlertRule, AlertSeverity};
+    use backend::services::log_aggregator::LogEntry;
+    use chrono::Utc;
+    use uuid::Uuid;
+
+    let alert_manager = Arc::new(AlertManager::new());
+    alert_manager
+        .add_rule(AlertRule {
+            id: Uuid::new_v4(),
+            name: "test-rule".to_string(),
+            pattern: "CRITICAL".to_string(),
+            severity: AlertSeverity::Critical,
+            threshold: 1,
+            window_secs: 60,
+        })
+        .await
+        .unwrap();
+
+    alert_manager
+        .evaluate(&LogEntry {
+            timestamp: Utc::now(),
+            level: "ERROR".to_string(),
+            message: "CRITICAL failure detected".to_string(),
+            service: "test".to_string(),
+        })
+        .await;
+
+    let state = Arc::new(DashboardState {
+        metrics_exporter: Arc::new(MetricsExporter::new()),
+        error_manager: Arc::new(ErrorManager::new()),
+        alert_manager,
+        redis: redis::Client::open("redis://127.0.0.1:1/").unwrap(),
+    });
+
+    let app = Router::new()
+        .route("/api/dashboard", get(get_dashboard))
+        .with_state(state);
+
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/api/dashboard")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+
+    let alerts = json["active_alerts"].as_array().unwrap();
+    assert_eq!(alerts.len(), 1, "one alert should be active");
+    assert_eq!(alerts[0]["rule_name"], "test-rule");
+    assert_eq!(alerts[0]["severity"], "critical");
+}
+
+/// Empty state returns empty arrays for tasks and alerts.
+#[tokio::test]
+async fn test_dashboard_empty_state() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/api/dashboard")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+
+    assert_eq!(
+        json["active_recovery_tasks"].as_array().unwrap().len(),
+        0
+    );
+    assert_eq!(json["active_alerts"].as_array().unwrap().len(), 0);
+}
+
+// ---------------------------------------------------------------------------
+// Concurrency tests
+// ---------------------------------------------------------------------------
+
+/// 10 concurrent requests all return 200.
+#[tokio::test]
+async fn test_dashboard_10_concurrent() {
+    let handles: Vec<_> = (0..10)
+        .map(|_| {
+            let app = build_app();
+            tokio::spawn(async move {
+                let resp = app
+                    .oneshot(
+                        Request::builder()
+                            .uri("/api/dashboard")
+                            .body(axum::body::Body::empty())
+                            .unwrap(),
+                    )
+                    .await
+                    .unwrap();
+                resp.status()
+            })
+        })
+        .collect();
+
+    for handle in handles {
+        assert_eq!(handle.await.unwrap(), StatusCode::OK);
+    }
+}
+
+/// 50 concurrent requests all return 200.
+#[tokio::test]
+async fn test_dashboard_50_concurrent() {
+    let handles: Vec<_> = (0..50)
+        .map(|_| {
+            let app = build_app();
+            tokio::spawn(async move {
+                let resp = app
+                    .oneshot(
+                        Request::builder()
+                            .uri("/api/dashboard")
+                            .body(axum::body::Body::empty())
+                            .unwrap(),
+                    )
+                    .await
+                    .unwrap();
+                resp.status()
+            })
+        })
+        .collect();
+
+    for handle in handles {
+        assert_eq!(handle.await.unwrap(), StatusCode::OK);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Framework-based load tests with SLO assertions
+// ---------------------------------------------------------------------------
+
+/// 10 concurrent tasks × 10 requests each = 100 total.
+/// SLO: 0% errors, p99 < 500ms.
+#[tokio::test]
+async fn test_dashboard_load_100_requests_slo() {
+    let result = run_framework_load(10, 10).await;
+    result.print_summary("GET /api/dashboard — 100 requests");
+    assert_load_result(&result, 0.0, std::time::Duration::from_millis(500));
+}
+
+/// 20 concurrent tasks × 10 requests each = 200 total.
+/// SLO: 0% errors, p99 < 1s.
+#[tokio::test]
+async fn test_dashboard_load_200_requests_slo() {
+    let result = run_framework_load(20, 10).await;
+    result.print_summary("GET /api/dashboard — 200 requests");
+    assert_load_result(&result, 0.0, std::time::Duration::from_secs(1));
+}
+
+/// Verify that all responses under load have the correct JSON shape.
+#[tokio::test]
+async fn test_dashboard_load_response_shape_under_load() {
+    let mut join_set = tokio::task::JoinSet::new();
+    for _ in 0..5_usize {
+        join_set.spawn(async {
+            let mut results = Vec::new();
+            for _ in 0..4_usize {
+                let app = build_app();
+                let resp = app
+                    .oneshot(
+                        Request::builder()
+                            .uri("/api/dashboard")
+                            .body(axum::body::Body::empty())
+                            .unwrap(),
+                    )
+                    .await
+                    .unwrap();
+                let status = resp.status();
+                let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+                results.push((status, bytes.to_vec()));
+            }
+            results
+        });
+    }
+
+    while let Some(Ok(batch)) = join_set.join_next().await {
+        for (status, body) in batch {
+            assert_eq!(status, StatusCode::OK);
+            let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
+            assert!(json.get("metrics").is_some());
+            assert!(json.get("active_recovery_tasks").is_some());
+            assert!(json.get("active_alerts").is_some());
+        }
+    }
+}
+
+/// Verify that shared state is read consistently under concurrent load.
+///
+/// All concurrent requests should see the same seeded metric values.
+#[tokio::test]
+async fn test_dashboard_shared_state_consistency() {
+    let metrics_exporter = Arc::new(MetricsExporter::new());
+    metrics_exporter.update_metrics(77.0, 4096, 500).await;
+
+    let state = Arc::new(DashboardState {
+        metrics_exporter,
+        error_manager: Arc::new(ErrorManager::new()),
+        alert_manager: Arc::new(AlertManager::new()),
+        redis: redis::Client::open("redis://127.0.0.1:1/").unwrap(),
+    });
+
+    let mut join_set = tokio::task::JoinSet::new();
+    for _ in 0..10_usize {
+        let state_clone = state.clone();
+        join_set.spawn(async move {
+            let app = Router::new()
+                .route("/api/dashboard", get(get_dashboard))
+                .with_state(state_clone);
+            let resp = app
+                .oneshot(
+                    Request::builder()
+                        .uri("/api/dashboard")
+                        .body(axum::body::Body::empty())
+                        .unwrap(),
+                )
+                .await
+                .unwrap();
+            let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+            serde_json::from_slice::<serde_json::Value>(&bytes).unwrap()
+        });
+    }
+
+    while let Some(Ok(json)) = join_set.join_next().await {
+        assert_eq!(json["metrics"]["cpu_usage"], 77.0);
+        assert_eq!(json["metrics"]["memory_usage"], 4096);
+        assert_eq!(json["metrics"]["uptime"], 500);
+    }
+}
+
+/// Verify serialization round-trip of the dashboard response.
+#[tokio::test]
+async fn test_dashboard_serialization_roundtrip() {
+    use backend::api::handlers::dashboard::DashboardData;
+    use backend::services::sys_metrics::SystemMetrics;
+
+    let data = DashboardData {
+        metrics: SystemMetrics::default(),
+        active_recovery_tasks: vec![],
+        active_alerts: vec![],
+    };
+
+    let json = serde_json::to_string(&data).unwrap();
+    let back: DashboardData = serde_json::from_str(&json).unwrap();
+    assert_eq!(back.active_recovery_tasks.len(), 0);
+    assert_eq!(back.active_alerts.len(), 0);
+}
diff --git a/backend/tests/load/framework.rs b/backend/tests/load/framework.rs
new file mode 100644
index 0000000..d862ca0
--- /dev/null
+++ b/backend/tests/load/framework.rs
@@ -0,0 +1,585 @@
+//! Load testing framework — shared helpers, metrics, and assertion utilities.
+//!
+//! # Overview
+//!
+//! This module provides the core primitives used by every load-test module:
+//!
+//! - [`LoadConfig`] — controls concurrency, iteration count, and timeout.
+//! - [`RequestOutcome`] — the result of a single request (status + latency).
+//! - [`LoadResult`] — aggregated statistics over a completed load run.
+//! - [`run_load`] — fires `config.concurrency` tasks, each making
+//!   `config.requests_per_task` requests, and collects [`LoadResult`].
+//! - [`assert_load_result`] — convenience assertion that fails the test when
+//!   the error rate or p99 latency exceeds the configured thresholds.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! use crate::load::framework::{LoadConfig, run_load, assert_load_result};
+//!
+//! let cfg = LoadConfig::default();
+//! let result = run_load(cfg, || async {
+//!     // build and fire one request, return (StatusCode, Duration)
+//!     let app = build_app();
+//!     let start = std::time::Instant::now();
+//!     let resp = app.oneshot(req()).await.unwrap();
+//!     (resp.status(), start.elapsed())
+//! }).await;
+//!
+//! assert_load_result(&result, 0.0, std::time::Duration::from_millis(500));
+//! ```
+
+use std::time::{Duration, Instant};
+
+use axum::http::StatusCode;
+use tokio::task::JoinSet;
+
+// ---------------------------------------------------------------------------
+// Configuration
+// ---------------------------------------------------------------------------
+
+/// Parameters that control a single load-test run.
+#[derive(Debug, Clone)]
+pub struct LoadConfig {
+    /// Number of concurrent Tokio tasks.
+    pub concurrency: usize,
+    /// Number of sequential requests each task fires.
+    pub requests_per_task: usize,
+    /// Maximum wall-clock time allowed for the entire run.
+    /// The test will panic if this is exceeded.
+    pub timeout: Duration,
+}
+
+impl LoadConfig {
+    /// Create a new configuration.
+    pub fn new(concurrency: usize, requests_per_task: usize) -> Self {
+        Self {
+            concurrency,
+            requests_per_task,
+            timeout: Duration::from_secs(30),
+        }
+    }
+
+    /// Override the timeout.
+    pub fn with_timeout(mut self, timeout: Duration) -> Self {
+        self.timeout = timeout;
+        self
+    }
+
+    /// Total number of requests that will be fired.
+    pub fn total_requests(&self) -> usize {
+        self.concurrency * self.requests_per_task
+    }
+}
+
+impl Default for LoadConfig {
+    /// Sensible defaults: 10 concurrent tasks × 5 requests each = 50 total.
+    fn default() -> Self {
+        Self::new(10, 5)
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Per-request outcome
+// ---------------------------------------------------------------------------
+
+/// The outcome of a single HTTP request.
+#[derive(Debug, Clone)]
+pub struct RequestOutcome {
+    /// HTTP status code returned by the handler.
+    pub status: StatusCode,
+    /// Wall-clock time from request start to response received.
+    pub latency: Duration,
+}
+
+impl RequestOutcome {
+    /// Returns `true` if the status code is a 2xx success.
+    pub fn is_success(&self) -> bool {
+        self.status.is_success()
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Aggregated result
+// ---------------------------------------------------------------------------
+
+/// Aggregated statistics collected after a load run completes.
+#[derive(Debug, Clone)]
+pub struct LoadResult {
+    /// All individual request outcomes, in completion order.
+    pub outcomes: Vec<RequestOutcome>,
+    /// Total wall-clock time for the entire run.
+    pub total_duration: Duration,
+}
+
+impl LoadResult {
+    /// Total number of requests fired.
+    pub fn total(&self) -> usize {
+        self.outcomes.len()
+    }
+
+    /// Number of successful (2xx) requests.
+    pub fn successes(&self) -> usize {
+        self.outcomes.iter().filter(|o| o.is_success()).count()
+    }
+
+    /// Number of failed (non-2xx) requests.
+    pub fn failures(&self) -> usize {
+        self.total() - self.successes()
+    }
+
+    /// Error rate as a fraction in `[0.0, 1.0]`.
+    pub fn error_rate(&self) -> f64 {
+        if self.total() == 0 {
+            return 0.0;
+        }
+        self.failures() as f64 / self.total() as f64
+    }
+
+    /// Throughput in requests per second.
+    pub fn rps(&self) -> f64 {
+        if self.total_duration.is_zero() {
+            return 0.0;
+        }
+        self.total() as f64 / self.total_duration.as_secs_f64()
+    }
+
+    /// Minimum observed latency.
+    pub fn min_latency(&self) -> Duration {
+        self.outcomes
+            .iter()
+            .map(|o| o.latency)
+            .min()
+            .unwrap_or(Duration::ZERO)
+    }
+
+    /// Maximum observed latency.
+    pub fn max_latency(&self) -> Duration {
+        self.outcomes
+            .iter()
+            .map(|o| o.latency)
+            .max()
+            .unwrap_or(Duration::ZERO)
+    }
+
+    /// Mean (average) latency.
+    pub fn mean_latency(&self) -> Duration {
+        if self.outcomes.is_empty() {
+            return Duration::ZERO;
+        }
+        let total_nanos: u128 = self.outcomes.iter().map(|o| o.latency.as_nanos()).sum();
+        Duration::from_nanos((total_nanos / self.outcomes.len() as u128) as u64)
+    }
+
+    /// Percentile latency.  `p` must be in `(0.0, 100.0]`.
+    ///
+    /// Uses the nearest-rank method.
+    pub fn percentile_latency(&self, p: f64) -> Duration {
+        assert!(p > 0.0 && p <= 100.0, "percentile must be in (0, 100]");
+        if self.outcomes.is_empty() {
+            return Duration::ZERO;
+        }
+        let mut latencies: Vec<Duration> = self.outcomes.iter().map(|o| o.latency).collect();
+        latencies.sort_unstable();
+        let idx = ((p / 100.0) * latencies.len() as f64).ceil() as usize;
+        latencies[idx.saturating_sub(1).min(latencies.len() - 1)]
+    }
+
+    /// p50 (median) latency.
+    pub fn p50(&self) -> Duration {
+        self.percentile_latency(50.0)
+    }
+
+    /// p95 latency.
+    pub fn p95(&self) -> Duration {
+        self.percentile_latency(95.0)
+    }
+
+    /// p99 latency.
+    pub fn p99(&self) -> Duration {
+        self.percentile_latency(99.0)
+    }
+
+    /// Print a human-readable summary to stdout.
+    pub fn print_summary(&self, label: &str) {
+        println!(
+            "\n=== Load Test: {label} ===\n\
+             Total requests : {total}\n\
+             Successes      : {ok}\n\
+             Failures       : {fail}\n\
+             Error rate     : {err:.2}%\n\
+             Throughput     : {rps:.1} req/s\n\
+             Latency min    : {min:?}\n\
+             Latency mean   : {mean:?}\n\
+             Latency p50    : {p50:?}\n\
+             Latency p95    : {p95:?}\n\
+             Latency p99    : {p99:?}\n\
+             Latency max    : {max:?}\n\
+             Total duration : {dur:?}\n",
+            label = label,
+            total = self.total(),
+            ok = self.successes(),
+            fail = self.failures(),
+            err = self.error_rate() * 100.0,
+            rps = self.rps(),
+            min = self.min_latency(),
+            mean = self.mean_latency(),
+            p50 = self.p50(),
+            p95 = self.p95(),
+            p99 = self.p99(),
+            max = self.max_latency(),
+            dur = self.total_duration,
+        );
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Runner
+// ---------------------------------------------------------------------------
+
+/// Run a load test described by `config`.
+///
+/// `request_fn` is called once per request. It must be `Clone` so that each
+/// Tokio task gets its own copy. It returns `(StatusCode, Duration)`.
+///
+/// # Panics
+///
+/// Panics if the run exceeds `config.timeout`.
+pub async fn run_load<F, Fut>(config: LoadConfig, request_fn: F) -> LoadResult
+where
+    F: Fn() -> Fut + Clone + Send + 'static,
+    Fut: std::future::Future<Output = (StatusCode, Duration)> + Send,
+{
+    let wall_start = Instant::now();
+    let mut join_set: JoinSet<Vec<RequestOutcome>> = JoinSet::new();
+
+    for _ in 0..config.concurrency {
+        let fn_clone = request_fn.clone();
+        let n = config.requests_per_task;
+        join_set.spawn(async move {
+            let mut outcomes = Vec::with_capacity(n);
+            for _ in 0..n {
+                let (status, latency) = fn_clone().await;
+                outcomes.push(RequestOutcome { status, latency });
+            }
+            outcomes
+        });
+    }
+
+    // Collect with timeout guard
+    let mut all_outcomes: Vec<RequestOutcome> = Vec::with_capacity(config.total_requests());
+    let deadline = tokio::time::Instant::now() + config.timeout;
+
+    loop {
+        match tokio::time::timeout_at(deadline, join_set.join_next()).await {
+            Ok(Some(Ok(outcomes))) => all_outcomes.extend(outcomes),
+            Ok(Some(Err(e))) => panic!("Load test task panicked: {e}"),
+            Ok(None) => break, // all tasks done
+            Err(_) => panic!(
+                "Load test timed out after {:?} ({} requests completed of {})",
+                config.timeout,
+                all_outcomes.len(),
+                config.total_requests()
+            ),
+        }
+    }
+
+    LoadResult {
+        outcomes: all_outcomes,
+        total_duration: wall_start.elapsed(),
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Assertion helper
+// ---------------------------------------------------------------------------
+
+/// Assert that a [`LoadResult`] meets the given SLO targets.
+///
+/// # Arguments
+/// - `result` — the completed load run.
+/// - `max_error_rate` — maximum acceptable error rate as a fraction (e.g. `0.01` = 1 %).
+/// - `max_p99` — maximum acceptable p99 latency.
+///
+/// # Panics
+///
+/// Panics with a descriptive message if either threshold is exceeded.
+pub fn assert_load_result(result: &LoadResult, max_error_rate: f64, max_p99: Duration) {
+    let error_rate = result.error_rate();
+    let p99 = result.p99();
+
+    if error_rate > max_error_rate {
+        panic!(
+            "Load test failed: error rate {:.2}% exceeds maximum {:.2}%\n\
+             (failures={}, total={})",
+            error_rate * 100.0,
+            max_error_rate * 100.0,
+            result.failures(),
+            result.total(),
+        );
+    }
+
+    if p99 > max_p99 {
+        panic!(
+            "Load test failed: p99 latency {:?} exceeds maximum {:?}",
+            p99, max_p99,
+        );
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Unit tests for the framework itself
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // --- LoadConfig ---
+
+    #[test]
+    fn test_load_config_total_requests() {
+        let cfg = LoadConfig::new(4, 10);
+        assert_eq!(cfg.total_requests(), 40);
+    }
+
+    #[test]
+    fn test_load_config_default_total() {
+        let cfg = LoadConfig::default();
+        assert_eq!(cfg.total_requests(), 50);
+    }
+
+    #[test]
+    fn test_load_config_with_timeout() {
+        let cfg = LoadConfig::default().with_timeout(Duration::from_secs(60));
+        assert_eq!(cfg.timeout, Duration::from_secs(60));
+    }
+
+    // --- RequestOutcome ---
+
+    #[test]
+    fn test_request_outcome_is_success_2xx() {
+        let o = RequestOutcome {
+            status: StatusCode::OK,
+            latency: Duration::from_millis(5),
+        };
+        assert!(o.is_success());
+    }
+
+    #[test]
+    fn test_request_outcome_is_not_success_5xx() {
+        let o = RequestOutcome {
+            status: StatusCode::INTERNAL_SERVER_ERROR,
+            latency: Duration::from_millis(5),
+        };
+        assert!(!o.is_success());
+    }
+
+    #[test]
+    fn test_request_outcome_is_not_success_4xx() {
+        let o = RequestOutcome {
+            status: StatusCode::NOT_FOUND,
+            latency: Duration::from_millis(5),
+        };
+        assert!(!o.is_success());
+    }
+
+    // --- LoadResult statistics ---
+
+    fn make_result(latencies_ms: &[u64], statuses: &[StatusCode]) -> LoadResult {
+        assert_eq!(latencies_ms.len(), statuses.len());
+        let outcomes = latencies_ms
+            .iter()
+            .zip(statuses.iter())
+            .map(|(&ms, &status)| RequestOutcome {
+                status,
+                latency: Duration::from_millis(ms),
+            })
+            .collect();
+        LoadResult {
+            outcomes,
+            total_duration: Duration::from_millis(100),
+        }
+    }
+
+    #[test]
+    fn test_load_result_counts() {
+        let result = make_result(
+            &[10, 20, 30],
+            &[StatusCode::OK, StatusCode::OK, StatusCode::INTERNAL_SERVER_ERROR],
+        );
+        assert_eq!(result.total(), 3);
+        assert_eq!(result.successes(), 2);
+        assert_eq!(result.failures(), 1);
+    }
+
+    #[test]
+    fn test_load_result_error_rate() {
+        let result = make_result(
+            &[10, 20],
+            &[StatusCode::OK, StatusCode::INTERNAL_SERVER_ERROR],
+        );
+        assert!((result.error_rate() - 0.5).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_load_result_zero_error_rate() {
+        let result = make_result(&[10, 20, 30], &[StatusCode::OK; 3]);
+        assert_eq!(result.error_rate(), 0.0);
+    }
+
+    #[test]
+    fn test_load_result_empty_error_rate() {
+        let result = LoadResult {
+            outcomes: vec![],
+            total_duration: Duration::ZERO,
+        };
+        assert_eq!(result.error_rate(), 0.0);
+    }
+
+    #[test]
+    fn test_load_result_min_max_latency() {
+        let result = make_result(&[5, 50, 25], &[StatusCode::OK; 3]);
+        assert_eq!(result.min_latency(), Duration::from_millis(5));
+        assert_eq!(result.max_latency(), Duration::from_millis(50));
+    }
+
+    #[test]
+    fn test_load_result_mean_latency() {
+        let result = make_result(&[10, 20, 30], &[StatusCode::OK; 3]);
+        assert_eq!(result.mean_latency(), Duration::from_millis(20));
+    }
+
+    #[test]
+    fn test_load_result_p50() {
+        // sorted: [10, 20, 30, 40, 50] → p50 = 30
+        let result = make_result(&[50, 10, 30, 20, 40], &[StatusCode::OK; 5]);
+        assert_eq!(result.p50(), Duration::from_millis(30));
+    }
+
+    #[test]
+    fn test_load_result_p99_single_element() {
+        let result = make_result(&[42], &[StatusCode::OK]);
+        assert_eq!(result.p99(), Duration::from_millis(42));
+    }
+
+    #[test]
+    fn test_load_result_p95_100_elements() {
+        // 100 elements: 1ms..=100ms; p95 should be 95ms
+        let latencies: Vec<u64> = (1..=100).collect();
+        let statuses = vec![StatusCode::OK; 100];
+        let result = make_result(&latencies, &statuses);
+        assert_eq!(result.p95(), Duration::from_millis(95));
+    }
+
+    #[test]
+    fn test_load_result_rps() {
+        let result = LoadResult {
+            outcomes: vec![
+                RequestOutcome { status: StatusCode::OK, latency: Duration::from_millis(1) };
+                100
+            ],
+            total_duration: Duration::from_secs(1),
+        };
+        assert!((result.rps() - 100.0).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_load_result_rps_zero_duration() {
+        let result = LoadResult {
+            outcomes: vec![],
+            total_duration: Duration::ZERO,
+        };
+        assert_eq!(result.rps(), 0.0);
+    }
+
+    // --- assert_load_result ---
+
+    #[test]
+    fn test_assert_load_result_passes() {
+        let result = make_result(&[10, 20, 30], &[StatusCode::OK; 3]);
+        // Should not panic
+        assert_load_result(&result, 0.0, Duration::from_millis(100));
+    }
+
+    #[test]
+    #[should_panic(expected = "error rate")]
+    fn test_assert_load_result_fails_on_error_rate() {
+        let result = make_result(
+            &[10, 20],
+            &[StatusCode::OK, StatusCode::INTERNAL_SERVER_ERROR],
+        );
+        assert_load_result(&result, 0.0, Duration::from_secs(1));
+    }
+
+    #[test]
+    #[should_panic(expected = "p99 latency")]
+    fn test_assert_load_result_fails_on_p99() {
+        let result = make_result(&[500], &[StatusCode::OK]);
+        assert_load_result(&result, 0.0, Duration::from_millis(100));
+    }
+
+    // --- run_load ---
+
+    #[tokio::test]
+    async fn test_run_load_collects_all_outcomes() {
+        let cfg = LoadConfig::new(4, 5); // 20 total
+        let result = run_load(cfg, || async {
+            (StatusCode::OK, Duration::from_millis(1))
+        })
+        .await;
+
+        assert_eq!(result.total(), 20);
+        assert_eq!(result.failures(), 0);
+    }
+
+    #[tokio::test]
+    async fn test_run_load_records_failures() {
+        let cfg = LoadConfig::new(1, 2);
+        let counter = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
+        let counter_clone = counter.clone();
+
+        let result = run_load(cfg, move || {
+            let c = counter_clone.clone();
+            async move {
+                let n = c.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
+                let status = if n % 2 == 0 {
+                    StatusCode::OK
+                } else {
+                    StatusCode::INTERNAL_SERVER_ERROR
+                };
+                (status, Duration::from_millis(1))
+            }
+        })
+        .await;
+
+        assert_eq!(result.total(), 2);
+        assert_eq!(result.failures(), 1);
+    }
+
+    #[tokio::test]
+    async fn test_run_load_respects_concurrency() {
+        // Each task records its start time; with concurrency=5 they should
+        // all start within a short window (not sequentially).
+        let cfg = LoadConfig::new(5, 1);
+        let start = Instant::now();
+        let result = run_load(cfg, move || async move {
+            tokio::time::sleep(Duration::from_millis(10)).await;
+            (StatusCode::OK, start.elapsed())
+        })
+        .await;
+
+        // All 5 tasks ran concurrently so total wall time should be << 50ms
+        assert!(result.total_duration < Duration::from_millis(200));
+        assert_eq!(result.total(), 5);
+    }
+
+    #[tokio::test]
+    async fn test_run_load_default_config() {
+        let result = run_load(LoadConfig::default(), || async {
+            (StatusCode::OK, Duration::from_millis(1))
+        })
+        .await;
+        assert_eq!(result.total(), 50);
+    }
+}
diff --git a/backend/tests/load/mod.rs b/backend/tests/load/mod.rs
index 223744f..5f007b5 100644
--- a/backend/tests/load/mod.rs
+++ b/backend/tests/load/mod.rs
@@ -1,12 +1,39 @@
 //! Load and stress tests for the backend API.
 //!
 //! These tests exercise the API under concurrent load to verify that the
-//! server remains stable and responsive. They are gated behind the
-//! `load_tests` feature flag so they don't run in normal CI:
+//! server remains stable and responsive. They are designed to run without
+//! external services (PostgreSQL, Redis) by using in-process Axum routers
+//! with mock state.
+//!
+//! # Running
 //!
 //! ```bash
+//! # All load tests
 //! cargo test -p backend --test load_tests -- --nocapture
+//!
+//! # A specific module
+//! cargo test -p backend --test load_tests load::status_load -- --nocapture
+//! cargo test -p backend --test load_tests load::profile_load -- --nocapture
+//! cargo test -p backend --test load_tests load::dashboard_load -- --nocapture
+//! cargo test -p backend --test load_tests load::stellar_load -- --nocapture
+//! cargo test -p backend --test load_tests load::framework -- --nocapture
 //! ```
+//!
+//! # Architecture
+//!
+//! Each sub-module builds an in-process Axum [`Router`] with a lightweight
+//! mock [`AppState`] (no real DB or Redis connections). Requests are fired
+//! via [`tower::ServiceExt::oneshot`], which bypasses the network entirely
+//! and exercises only the handler + middleware stack.
+//!
+//! The [`framework`] module provides shared helpers:
+//! - [`LoadConfig`] — concurrency / iteration parameters
+//! - [`LoadResult`] — aggregated latency statistics
+//! - [`run_load`] — generic concurrent request runner
+//! - [`assert_load_result`] — assertion helper for p99 / error-rate targets
 
-pub mod status_load;
+pub mod dashboard_load;
+pub mod framework;
 pub mod profile_load;
+pub mod status_load;
+pub mod stellar_load;
diff --git a/backend/tests/load/profile_load.rs b/backend/tests/load/profile_load.rs
index ebcb132..88e1c03 100644
--- a/backend/tests/load/profile_load.rs
+++ b/backend/tests/load/profile_load.rs
@@ -1,29 +1,64 @@
 //! Concurrent load tests for the `POST /api/profile` endpoint.
+//!
+//! These tests verify that the profiling trigger handler remains stable and
+//! correct under concurrent load without requiring a live database or Redis.
+//!
+//! # Running
+//!
+//! ```bash
+//! cargo test -p backend --test load_tests load::profile_load -- --nocapture
+//! ```
 
-use axum::{routing::post, Router};
-use hyper::{Request, StatusCode};
 use std::sync::Arc;
+use std::time::Instant;
+
+use axum::{body::to_bytes, routing::post, Router};
+use axum::http::StatusCode;
+use hyper::Request;
 use tower::ServiceExt;
 
 use backend::api::handlers::profiling::{trigger_profile_collection, AppState};
-use backend::config::{reload::ConfigManager, AppConfig};
+use backend::config::{AppConfig, reload::ConfigManager};
 use backend::services::{error_recovery::ErrorManager, sys_metrics::MetricsExporter};
 
+use crate::load::framework::{assert_load_result, LoadConfig, LoadResult};
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Build a test router wired to the `POST /api/profile` handler.
 fn build_app() -> Router {
+    let (log_aggregator, _rx) = backend::services::log_aggregator::LogAggregator::new();
     let state = Arc::new(AppState {
         db: None,
         metrics_exporter: Arc::new(MetricsExporter::new()),
         error_manager: Arc::new(ErrorManager::new()),
         config_manager: Arc::new(ConfigManager::new(AppConfig::default())),
+        log_aggregator: Arc::new(log_aggregator),
+        redis: redis::Client::open("redis://127.0.0.1:1/").unwrap(),
     });
     Router::new()
         .route("/api/profile", post(trigger_profile_collection))
         .with_state(state)
 }
 
+/// Build a valid profile trigger request body.
+fn profile_request_body(label: &str) -> axum::body::Body {
+    axum::body::Body::from(
+        serde_json::json!({
+            "duration_secs": 10,
+            "sample_rate_hz": 100,
+            "label": label
+        })
+        .to_string(),
+    )
+}
+
+/// Fire `n` concurrent requests and assert all return 200.
 async fn run_concurrent(n: usize) {
     let handles: Vec<_> = (0..n)
-        .map(|_| {
+        .map(|i| {
             let app = build_app();
             tokio::spawn(async move {
                 let resp = app
@@ -32,14 +67,7 @@ async fn run_concurrent(n: usize) {
                             .method("POST")
                             .uri("/api/profile")
                             .header("content-type", "application/json")
-                            .body(axum::body::Body::from(
-                                serde_json::json!({
-                                    "duration_secs": 10,
-                                    "sample_rate_hz": 100,
-                                    "label": "load-test"
-                                })
-                                .to_string(),
-                            ))
+                            .body(profile_request_body(&format!("load-test-{i}")))
                             .unwrap(),
                     )
                     .await
@@ -55,6 +83,34 @@ async fn run_concurrent(n: usize) {
     }
 }
 
+/// Run a full load test using the framework and return the [`LoadResult`].
+async fn run_framework_load(concurrency: usize, requests_per_task: usize) -> LoadResult {
+    use crate::load::framework::run_load;
+
+    let cfg = LoadConfig::new(concurrency, requests_per_task);
+    run_load(cfg, || async {
+        let app = build_app();
+        let start = Instant::now();
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .method("POST")
+                    .uri("/api/profile")
+                    .header("content-type", "application/json")
+                    .body(profile_request_body("load-test"))
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        (resp.status(), start.elapsed())
+    })
+    .await
+}
+
+// ---------------------------------------------------------------------------
+// Basic concurrency tests
+// ---------------------------------------------------------------------------
+
 #[tokio::test]
 async fn test_profile_10_concurrent() {
     run_concurrent(10).await;
@@ -65,14 +121,53 @@ async fn test_profile_50_concurrent() {
     run_concurrent(50).await;
 }
 
+// ---------------------------------------------------------------------------
+// Response shape
+// ---------------------------------------------------------------------------
+
+/// Verify response body shape.
+#[tokio::test]
+async fn test_profile_response_shape() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/api/profile")
+                .header("content-type", "application/json")
+                .body(profile_request_body("shape-test"))
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    assert_eq!(resp.status(), StatusCode::OK);
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+
+    assert!(json.get("data").is_some(), "response must have 'data' key");
+    assert!(
+        json["data"].get("message").is_some(),
+        "data must have 'message' key"
+    );
+    assert!(
+        json["data"].get("profile_id").is_some(),
+        "data must have 'profile_id' key"
+    );
+    assert!(
+        json["data"].get("estimated_completion").is_some(),
+        "data must have 'estimated_completion' key"
+    );
+}
+
 /// Verify each response contains a unique profile_id.
 #[tokio::test]
 async fn test_profile_unique_ids() {
-    use axum::body::to_bytes;
     use std::collections::HashSet;
 
     let mut ids = HashSet::new();
-    for _ in 0..10 {
+    for i in 0..10 {
         let app = build_app();
         let resp = app
             .oneshot(
@@ -80,37 +175,57 @@ async fn test_profile_unique_ids() {
                     .method("POST")
                     .uri("/api/profile")
                     .header("content-type", "application/json")
-                    .body(axum::body::Body::from(
-                        serde_json::json!({
-                            "duration_secs": 10,
-                            "sample_rate_hz": 100,
-                            "label": "load-test-id"
-                        })
-                        .to_string(),
-                    ))
+                    .body(profile_request_body(&format!("unique-id-test-{i}")))
                     .unwrap(),
             )
             .await
             .unwrap();
 
         let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
-        let json: serde_json::Value = serde_json::from_slice(&bytes).expect("Valid JSON");
+        let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
         let id = json["data"]["profile_id"]
             .as_str()
-            .expect("profile_id in data")
+            .expect("profile_id must be a string")
             .to_string();
         ids.insert(id);
     }
 
-    // All 10 profile IDs should be unique
-    assert_eq!(ids.len(), 10);
+    assert_eq!(ids.len(), 10, "all 10 profile IDs must be unique");
 }
 
-/// Verify response body shape.
+/// Verify the `message` field contains the label from the request.
 #[tokio::test]
-async fn test_profile_response_shape() {
-    use axum::body::to_bytes;
+async fn test_profile_message_contains_label() {
+    let app = build_app();
+    let label = "my-custom-label";
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/api/profile")
+                .header("content-type", "application/json")
+                .body(profile_request_body(label))
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+    let message = json["data"]["message"].as_str().unwrap();
+    assert!(
+        message.contains(label),
+        "message '{message}' must contain label '{label}'"
+    );
+}
 
+// ---------------------------------------------------------------------------
+// Validation tests
+// ---------------------------------------------------------------------------
+
+/// Verify that a missing `label` field returns 400 / 422.
+#[tokio::test]
+async fn test_profile_missing_label_rejected() {
     let app = build_app();
     let resp = app
         .oneshot(
@@ -122,7 +237,7 @@ async fn test_profile_response_shape() {
                     serde_json::json!({
                         "duration_secs": 10,
                         "sample_rate_hz": 100,
-                        "label": "load-test-shape"
+                        "label": ""
                     })
                     .to_string(),
                 ))
@@ -131,12 +246,198 @@ async fn test_profile_response_shape() {
         .await
         .unwrap();
 
-    assert_eq!(resp.status(), StatusCode::OK);
+    // Empty label should fail validation → 400 or 422
+    assert!(
+        resp.status() == StatusCode::BAD_REQUEST
+            || resp.status() == StatusCode::UNPROCESSABLE_ENTITY,
+        "expected 400 or 422, got {}",
+        resp.status()
+    );
+}
 
-    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
-    let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+/// Verify that `duration_secs = 0` is rejected.
+#[tokio::test]
+async fn test_profile_zero_duration_rejected() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/api/profile")
+                .header("content-type", "application/json")
+                .body(axum::body::Body::from(
+                    serde_json::json!({
+                        "duration_secs": 0,
+                        "sample_rate_hz": 100,
+                        "label": "test"
+                    })
+                    .to_string(),
+                ))
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    assert!(
+        resp.status() == StatusCode::BAD_REQUEST
+            || resp.status() == StatusCode::UNPROCESSABLE_ENTITY,
+        "expected 400 or 422, got {}",
+        resp.status()
+    );
+}
+
+/// Verify that `duration_secs` exceeding 3600 is rejected.
+#[tokio::test]
+async fn test_profile_excessive_duration_rejected() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/api/profile")
+                .header("content-type", "application/json")
+                .body(axum::body::Body::from(
+                    serde_json::json!({
+                        "duration_secs": 9999,
+                        "sample_rate_hz": 100,
+                        "label": "test"
+                    })
+                    .to_string(),
+                ))
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    assert!(
+        resp.status() == StatusCode::BAD_REQUEST
+            || resp.status() == StatusCode::UNPROCESSABLE_ENTITY,
+        "expected 400 or 422, got {}",
+        resp.status()
+    );
+}
+
+/// Verify that a non-JSON body returns 400 / 415.
+#[tokio::test]
+async fn test_profile_non_json_body_rejected() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .method("POST")
+                .uri("/api/profile")
+                .header("content-type", "text/plain")
+                .body(axum::body::Body::from("not json"))
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    assert!(
+        resp.status().is_client_error(),
+        "expected 4xx, got {}",
+        resp.status()
+    );
+}
+
+// ---------------------------------------------------------------------------
+// Framework-based load tests with SLO assertions
+// ---------------------------------------------------------------------------
+
+/// 10 concurrent tasks × 10 requests each = 100 total.
+/// SLO: 0% errors, p99 < 500ms.
+#[tokio::test]
+async fn test_profile_load_100_requests_slo() {
+    let result = run_framework_load(10, 10).await;
+    result.print_summary("POST /api/profile — 100 requests");
+    assert_load_result(&result, 0.0, std::time::Duration::from_millis(500));
+}
+
+/// 20 concurrent tasks × 10 requests each = 200 total.
+/// SLO: 0% errors, p99 < 1s.
+#[tokio::test]
+async fn test_profile_load_200_requests_slo() {
+    let result = run_framework_load(20, 10).await;
+    result.print_summary("POST /api/profile — 200 requests");
+    assert_load_result(&result, 0.0, std::time::Duration::from_secs(1));
+}
+
+/// Verify that all responses under load have the correct JSON shape.
+#[tokio::test]
+async fn test_profile_load_response_shape_under_load() {
+    let mut join_set = tokio::task::JoinSet::new();
+    for i in 0..5_usize {
+        join_set.spawn(async move {
+            let mut results = Vec::new();
+            for j in 0..4_usize {
+                let app = build_app();
+                let resp = app
+                    .oneshot(
+                        Request::builder()
+                            .method("POST")
+                            .uri("/api/profile")
+                            .header("content-type", "application/json")
+                            .body(profile_request_body(&format!("task-{i}-req-{j}")))
+                            .unwrap(),
+                    )
+                    .await
+                    .unwrap();
+                let status = resp.status();
+                let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+                results.push((status, bytes.to_vec()));
+            }
+            results
+        });
+    }
+
+    while let Some(Ok(batch)) = join_set.join_next().await {
+        for (status, body) in batch {
+            assert_eq!(status, StatusCode::OK);
+            let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
+            assert_eq!(json["status"], "success");
+            assert!(json["data"].get("profile_id").is_some());
+            assert!(json["data"].get("message").is_some());
+            assert!(json["data"].get("estimated_completion").is_some());
+        }
+    }
+}
+
+/// Verify that concurrent requests each produce a unique profile_id.
+#[tokio::test]
+async fn test_profile_concurrent_unique_ids() {
+    use std::collections::HashSet;
+    use std::sync::Mutex;
+
+    let ids = Arc::new(Mutex::new(HashSet::new()));
+    let mut join_set = tokio::task::JoinSet::new();
+
+    for i in 0..20_usize {
+        let ids_clone = ids.clone();
+        join_set.spawn(async move {
+            let app = build_app();
+            let resp = app
+                .oneshot(
+                    Request::builder()
+                        .method("POST")
+                        .uri("/api/profile")
+                        .header("content-type", "application/json")
+                        .body(profile_request_body(&format!("concurrent-{i}")))
+                        .unwrap(),
+                )
+                .await
+                .unwrap();
+            let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+            let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+            let id = json["data"]["profile_id"]
+                .as_str()
+                .unwrap()
+                .to_string();
+            ids_clone.lock().unwrap().insert(id);
+        });
+    }
+
+    while join_set.join_next().await.is_some() {}
 
-    assert!(json.get("data").is_some());
-    assert!(json["data"].get("message").is_some());
-    assert!(json["data"].get("profile_id").is_some());
+    let collected = ids.lock().unwrap();
+    assert_eq!(collected.len(), 20, "all 20 concurrent profile IDs must be unique");
 }
diff --git a/backend/tests/load/status_load.rs b/backend/tests/load/status_load.rs
index 7508b01..abbb09b 100644
--- a/backend/tests/load/status_load.rs
+++ b/backend/tests/load/status_load.rs
@@ -1,21 +1,42 @@
 //! Concurrent load tests for the `GET /api/status` endpoint.
+//!
+//! These tests verify that the status handler remains stable and correct
+//! under concurrent load without requiring a live database or Redis instance.
+//!
+//! # Running
+//!
+//! ```bash
+//! cargo test -p backend --test load_tests load::status_load -- --nocapture
+//! ```
 
-use axum::{routing::get, Router};
-use hyper::{Request, StatusCode};
 use std::sync::Arc;
+use std::time::Instant;
+
+use axum::{body::to_bytes, routing::get, Router};
+use axum::http::StatusCode;
+use hyper::Request;
 use tower::ServiceExt;
 
 use backend::api::handlers::profiling::{get_system_status, AppState};
-use backend::config::{reload::ConfigManager, AppConfig};
+use backend::config::{AppConfig, reload::ConfigManager};
 use backend::services::{error_recovery::ErrorManager, sys_metrics::MetricsExporter};
 
-/// Build a test router with the status endpoint.
+use crate::load::framework::{assert_load_result, LoadConfig, LoadResult};
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Build a test router wired to the `/api/status` handler with mock state.
 fn build_app() -> Router {
+    let (log_aggregator, _rx) = backend::services::log_aggregator::LogAggregator::new();
     let state = Arc::new(AppState {
         db: None,
         metrics_exporter: Arc::new(MetricsExporter::new()),
         error_manager: Arc::new(ErrorManager::new()),
         config_manager: Arc::new(ConfigManager::new(AppConfig::default())),
+        log_aggregator: Arc::new(log_aggregator),
+        redis: redis::Client::open("redis://127.0.0.1:1/").unwrap(),
     });
     Router::new()
         .route("/api/status", get(get_system_status))
@@ -48,6 +69,32 @@ async fn run_concurrent(n: usize) {
     }
 }
 
+/// Run a full load test using the framework and return the [`LoadResult`].
+async fn run_framework_load(concurrency: usize, requests_per_task: usize) -> LoadResult {
+    use crate::load::framework::run_load;
+
+    let cfg = LoadConfig::new(concurrency, requests_per_task);
+    run_load(cfg, || async {
+        let app = build_app();
+        let start = Instant::now();
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .uri("/api/status")
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        (resp.status(), start.elapsed())
+    })
+    .await
+}
+
+// ---------------------------------------------------------------------------
+// Basic concurrency tests
+// ---------------------------------------------------------------------------
+
 #[tokio::test]
 async fn test_status_10_concurrent() {
     run_concurrent(10).await;
@@ -63,6 +110,10 @@ async fn test_status_100_concurrent() {
     run_concurrent(100).await;
 }
 
+// ---------------------------------------------------------------------------
+// Sequential stability
+// ---------------------------------------------------------------------------
+
 /// Verify that repeated sequential requests all succeed.
 #[tokio::test]
 async fn test_status_sequential_stability() {
@@ -82,11 +133,13 @@ async fn test_status_sequential_stability() {
     }
 }
 
+// ---------------------------------------------------------------------------
+// Response shape
+// ---------------------------------------------------------------------------
+
 /// Verify response body contains expected JSON keys.
 #[tokio::test]
 async fn test_status_response_shape() {
-    use axum::body::to_bytes;
-
     let app = build_app();
     let resp = app
         .oneshot(
@@ -104,8 +157,252 @@ async fn test_status_response_shape() {
     let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
 
     assert_eq!(json["status"], "success");
-    assert!(json.get("data").is_some());
-    assert!(json["data"].get("status").is_some());
-    assert!(json["data"].get("uptime_secs").is_some());
-    assert!(json["data"].get("active_recovery_tasks").is_some());
+    assert!(json.get("data").is_some(), "response must have 'data' key");
+    assert!(
+        json["data"].get("status").is_some(),
+        "data must have 'status' key"
+    );
+    assert!(
+        json["data"].get("uptime_secs").is_some(),
+        "data must have 'uptime_secs' key"
+    );
+    assert!(
+        json["data"].get("active_recovery_tasks").is_some(),
+        "data must have 'active_recovery_tasks' key"
+    );
+}
+
+/// Verify the `status` field value is `"healthy"`.
+#[tokio::test]
+async fn test_status_healthy_value() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/api/status")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+    assert_eq!(json["data"]["status"], "healthy");
+}
+
+/// Verify `active_recovery_tasks` starts at zero with a fresh state.
+#[tokio::test]
+async fn test_status_zero_recovery_tasks_initially() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/api/status")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+    assert_eq!(json["data"]["active_recovery_tasks"], 0);
+}
+
+/// Verify `uptime_secs` is a non-negative integer.
+#[tokio::test]
+async fn test_status_uptime_is_non_negative() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/api/status")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+    let uptime = json["data"]["uptime_secs"].as_u64();
+    assert!(uptime.is_some(), "uptime_secs must be a non-negative integer");
+}
+
+// ---------------------------------------------------------------------------
+// Framework-based load tests with SLO assertions
+// ---------------------------------------------------------------------------
+
+/// 10 concurrent tasks × 10 requests each = 100 total.
+/// SLO: 0% errors, p99 < 500ms.
+#[tokio::test]
+async fn test_status_load_100_requests_slo() {
+    let result = run_framework_load(10, 10).await;
+    result.print_summary("GET /api/status — 100 requests");
+    assert_load_result(&result, 0.0, std::time::Duration::from_millis(500));
+}
+
+/// 20 concurrent tasks × 10 requests each = 200 total.
+/// SLO: 0% errors, p99 < 1s.
+#[tokio::test]
+async fn test_status_load_200_requests_slo() {
+    let result = run_framework_load(20, 10).await;
+    result.print_summary("GET /api/status — 200 requests");
+    assert_load_result(&result, 0.0, std::time::Duration::from_secs(1));
+}
+
+/// Verify that all responses under load have the correct JSON shape.
+#[tokio::test]
+async fn test_status_load_response_shape_under_load() {
+    use crate::load::framework::run_load;
+
+    let cfg = LoadConfig::new(5, 4); // 20 total
+    let outcomes: Vec<(StatusCode, Vec<u8>)> = {
+        let mut join_set = tokio::task::JoinSet::new();
+        for _ in 0..cfg.concurrency {
+            join_set.spawn(async {
+                let mut results = Vec::new();
+                for _ in 0..4 {
+                    let app = build_app();
+                    let resp = app
+                        .oneshot(
+                            Request::builder()
+                                .uri("/api/status")
+                                .body(axum::body::Body::empty())
+                                .unwrap(),
+                        )
+                        .await
+                        .unwrap();
+                    let status = resp.status();
+                    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+                    results.push((status, bytes.to_vec()));
+                }
+                results
+            });
+        }
+        let mut all = Vec::new();
+        while let Some(Ok(batch)) = join_set.join_next().await {
+            all.extend(batch);
+        }
+        all
+    };
+
+    for (status, body) in outcomes {
+        assert_eq!(status, StatusCode::OK);
+        let json: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        assert_eq!(json["status"], "success");
+        assert!(json["data"].get("status").is_some());
+        assert!(json["data"].get("uptime_secs").is_some());
+        assert!(json["data"].get("active_recovery_tasks").is_some());
+    }
+}
+
+/// Verify that the handler is idempotent — repeated calls return the same shape.
+#[tokio::test]
+async fn test_status_idempotent_responses() {
+    let app = build_app();
+    let mut previous: Option<serde_json::Value> = None;
+
+    for _ in 0..5 {
+        let resp = app
+            .clone()
+            .oneshot(
+                Request::builder()
+                    .uri("/api/status")
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+        let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+
+        if let Some(ref prev) = previous {
+            // Keys must be identical; values may differ (e.g. uptime_secs)
+            assert_eq!(
+                prev.as_object().unwrap().keys().collect::<Vec<_>>(),
+                json.as_object().unwrap().keys().collect::<Vec<_>>(),
+                "response keys must be stable across calls"
+            );
+        }
+        previous = Some(json);
+    }
+}
+
+/// Verify that the handler correctly reflects recovery tasks added to state.
+#[tokio::test]
+async fn test_status_reflects_recovery_tasks() {
+    use backend::services::error_recovery::RecoveryError;
+
+    let error_manager = Arc::new(ErrorManager::new());
+    error_manager
+        .handle_error(RecoveryError::Internal("boom".into()), "worker_a")
+        .await
+        .unwrap();
+
+    let (log_aggregator, _rx) = backend::services::log_aggregator::LogAggregator::new();
+    let state = Arc::new(AppState {
+        db: None,
+        metrics_exporter: Arc::new(MetricsExporter::new()),
+        error_manager: error_manager.clone(),
+        config_manager: Arc::new(ConfigManager::new(AppConfig::default())),
+        log_aggregator: Arc::new(log_aggregator),
+        redis: redis::Client::open("redis://127.0.0.1:1/").unwrap(),
+    });
+
+    let app = Router::new()
+        .route("/api/status", get(get_system_status))
+        .with_state(state);
+
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/api/status")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+    assert_eq!(json["data"]["active_recovery_tasks"], 1);
+}
+
+/// Verify that the handler correctly reflects updated metrics.
+#[tokio::test]
+async fn test_status_reflects_updated_metrics() {
+    let metrics_exporter = Arc::new(MetricsExporter::new());
+    metrics_exporter.update_metrics(55.0, 2048, 300).await;
+
+    let (log_aggregator, _rx) = backend::services::log_aggregator::LogAggregator::new();
+    let state = Arc::new(AppState {
+        db: None,
+        metrics_exporter: metrics_exporter.clone(),
+        error_manager: Arc::new(ErrorManager::new()),
+        config_manager: Arc::new(ConfigManager::new(AppConfig::default())),
+        log_aggregator: Arc::new(log_aggregator),
+        redis: redis::Client::open("redis://127.0.0.1:1/").unwrap(),
+    });
+
+    let app = Router::new()
+        .route("/api/status", get(get_system_status))
+        .with_state(state);
+
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/api/status")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let json: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
+    assert_eq!(json["data"]["uptime_secs"], 300);
+    assert_eq!(json["data"]["memory_used_bytes"], 2048);
 }
diff --git a/backend/tests/load/stellar_load.rs b/backend/tests/load/stellar_load.rs
new file mode 100644
index 0000000..1eed7e0
--- /dev/null
+++ b/backend/tests/load/stellar_load.rs
@@ -0,0 +1,399 @@
+//! Concurrent load tests for the `GET /.well-known/stellar.toml` endpoint.
+//!
+//! These tests verify that the Stellar SEP-1 handler remains stable and
+//! correct under concurrent load. The handler is stateless so no mock
+//! infrastructure is required.
+//!
+//! # Running
+//!
+//! ```bash
+//! cargo test -p backend --test load_tests load::stellar_load -- --nocapture
+//! ```
+
+use std::time::Instant;
+
+use axum::{body::to_bytes, routing::get, Router};
+use axum::http::StatusCode;
+use hyper::Request;
+use tower::ServiceExt;
+
+use backend::api::handlers::stellar::get_stellar_toml;
+
+use crate::load::framework::{assert_load_result, LoadConfig, LoadResult};
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Build a test router wired to the Stellar TOML handler.
+fn build_app() -> Router {
+    Router::new().route("/.well-known/stellar.toml", get(get_stellar_toml))
+}
+
+/// Run a full load test using the framework and return the [`LoadResult`].
+async fn run_framework_load(concurrency: usize, requests_per_task: usize) -> LoadResult {
+    use crate::load::framework::run_load;
+
+    let cfg = LoadConfig::new(concurrency, requests_per_task);
+    run_load(cfg, || async {
+        let app = build_app();
+        let start = Instant::now();
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .uri("/.well-known/stellar.toml")
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        (resp.status(), start.elapsed())
+    })
+    .await
+}
+
+// ---------------------------------------------------------------------------
+// Basic correctness
+// ---------------------------------------------------------------------------
+
+/// Handler returns 200 OK.
+#[tokio::test]
+async fn test_stellar_toml_returns_200() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/.well-known/stellar.toml")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    assert_eq!(resp.status(), StatusCode::OK);
+}
+
+/// Response includes the required `Access-Control-Allow-Origin: *` header (SEP-1).
+#[tokio::test]
+async fn test_stellar_toml_cors_header() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/.well-known/stellar.toml")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let cors = resp
+        .headers()
+        .get("access-control-allow-origin")
+        .expect("Access-Control-Allow-Origin header must be present");
+    assert_eq!(cors, "*");
+}
+
+/// Response `Content-Type` is `text/plain`.
+#[tokio::test]
+async fn test_stellar_toml_content_type() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/.well-known/stellar.toml")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let ct = resp
+        .headers()
+        .get("content-type")
+        .expect("Content-Type header must be present");
+    assert!(
+        ct.to_str().unwrap().contains("text/plain"),
+        "Content-Type must be text/plain, got: {:?}",
+        ct
+    );
+}
+
+/// Response body contains the required TOML fields.
+#[tokio::test]
+async fn test_stellar_toml_body_content() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/.well-known/stellar.toml")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let body = std::str::from_utf8(&bytes).unwrap();
+
+    assert!(body.contains("VERSION"), "body must contain VERSION");
+    assert!(
+        body.contains("NETWORK_PASSPHRASE"),
+        "body must contain NETWORK_PASSPHRASE"
+    );
+    assert!(body.contains("ACCOUNTS"), "body must contain ACCOUNTS");
+    assert!(body.contains("CURRENCIES"), "body must contain CURRENCIES");
+}
+
+/// Response body contains the USDC currency entry.
+#[tokio::test]
+async fn test_stellar_toml_contains_usdc() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/.well-known/stellar.toml")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    let body = std::str::from_utf8(&bytes).unwrap();
+
+    assert!(body.contains("USDC"), "body must contain USDC currency");
+}
+
+/// Response body is non-empty.
+#[tokio::test]
+async fn test_stellar_toml_non_empty_body() {
+    let app = build_app();
+    let resp = app
+        .oneshot(
+            Request::builder()
+                .uri("/.well-known/stellar.toml")
+                .body(axum::body::Body::empty())
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+
+    let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+    assert!(!bytes.is_empty(), "response body must not be empty");
+}
+
+/// Response is identical across multiple calls (handler is pure / stateless).
+#[tokio::test]
+async fn test_stellar_toml_deterministic() {
+    let mut bodies: Vec<Vec<u8>> = Vec::new();
+
+    for _ in 0..5 {
+        let app = build_app();
+        let resp = app
+            .oneshot(
+                Request::builder()
+                    .uri("/.well-known/stellar.toml")
+                    .body(axum::body::Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
+        bodies.push(bytes.to_vec());
+    }
+
+    let first = &bodies[0];
+    for body in &bodies[1..] {
+        assert_eq!(body, first, "all responses must be identical");
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Concurrency tests
+// ---------------------------------------------------------------------------
+
+/// 10 concurrent requests all return 200.
+#[tokio::test]
+async fn test_stellar_toml_10_concurrent() {
+    let handles: Vec<_> = (0..10)
+        .map(|_| {
+            let app = build_app();
+            tokio::spawn(async move {
+                let resp = app
+                    .oneshot(
+                        Request::builder()
+                            .uri("/.well-known/stellar.toml")
+                            .body(axum::body::Body::empty())
+                            .unwrap(),
+                    )
+                    .await
+                    .unwrap();
+                resp.status()
+            })
+        })
+        .collect();
+
+    for handle in handles {
+        assert_eq!(handle.await.unwrap(), StatusCode::OK);
+    }
+}
+
+/// 50 concurrent requests all return 200.
+#[tokio::test]
+async fn test_stellar_toml_50_concurrent() {
+    let handles: Vec<_> = (0..50)
+        .map(|_| {
+            let app = build_app();
+            tokio::spawn(async move {
+                let resp = app
+                    .oneshot(
+                        Request::builder()
+                            .uri("/.well-known/stellar.toml")
+                            .body(axum::body::Body::empty())
+                            .unwrap(),
+                    )
+                    .await
+                    .unwrap();
+                resp.status()
+            })
+        })
+        .collect();
+
+    for handle in handles {
+        assert_eq!(handle.await.unwrap(), StatusCode::OK);
+    }
+}
+
+/// 100 concurrent requests all return 200.
+#[tokio::test]
+async fn test_stellar_toml_100_concurrent() {
+    let handles: Vec<_> = (0..100)
+        .map(|_| {
+            let app = build_app();
+            tokio::spawn(async move {
+                let resp = app
+                    .oneshot(
+                        Request::builder()
+                            .uri("/.well-known/stellar.toml")
+                            .body(axum::body::Body::empty())
+                            .unwrap(),
+                    )
+                    .await
+                    .unwrap();
+                resp.status()
+            })
+        })
+        .collect();
+
+    for handle in handles {
+        assert_eq!(handle.await.unwrap(), StatusCode::OK);
+    }
+}
+
+/// Verify that all concurrent responses have identical bodies.
+#[tokio::test]
+async fn test_stellar_toml_concurrent_identical_bodies() {
+    let mut join_set = tokio::task::JoinSet::new();
+    for _ in 0..20_usize {
+        join_set.spawn(async {
+            let app = build_app();
+            let resp = app
+                .oneshot(
+                    Request::builder()
+                        .uri("/.well-known/stellar.toml")
+                        .body(axum::body::Body::empty())
+                        .unwrap(),
+                )
+                .await
+                .unwrap();
+            to_bytes(resp.into_body(), usize::MAX)
+                .await
+                .unwrap()
+                .to_vec()
+        });
+    }
+
+    let mut bodies: Vec<Vec<u8>> = Vec::new();
+    while let Some(Ok(body)) = join_set.join_next().await {
+        bodies.push(body);
+    }
+
+    assert_eq!(bodies.len(), 20);
+    let first = &bodies[0];
+    for body in &bodies[1..] {
+        assert_eq!(body, first, "all concurrent responses must be identical");
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Framework-based load tests with SLO assertions
+// ---------------------------------------------------------------------------
+
+/// 10 concurrent tasks × 10 requests each = 100 total.
+/// SLO: 0% errors, p99 < 200ms (stateless handler should be very fast).
+#[tokio::test]
+async fn test_stellar_load_100_requests_slo() {
+    let result = run_framework_load(10, 10).await;
+    result.print_summary("GET /.well-known/stellar.toml — 100 requests");
+    assert_load_result(&result, 0.0, std::time::Duration::from_millis(200));
+}
+
+/// 20 concurrent tasks × 10 requests each = 200 total.
+/// SLO: 0% errors, p99 < 500ms.
+#[tokio::test]
+async fn test_stellar_load_200_requests_slo() {
+    let result = run_framework_load(20, 10).await;
+    result.print_summary("GET /.well-known/stellar.toml — 200 requests");
+    assert_load_result(&result, 0.0, std::time::Duration::from_millis(500));
+}
+
+/// 50 concurrent tasks × 10 requests each = 500 total.
+/// SLO: 0% errors, p99 < 1s.
+#[tokio::test]
+async fn test_stellar_load_500_requests_slo() {
+    let result = run_framework_load(50, 10).await;
+    result.print_summary("GET /.well-known/stellar.toml — 500 requests");
+    assert_load_result(&result, 0.0, std::time::Duration::from_secs(1));
+}
+
+/// Verify that all responses under load have the correct headers.
+#[tokio::test]
+async fn test_stellar_load_headers_under_load() {
+    let mut join_set = tokio::task::JoinSet::new();
+    for _ in 0..10_usize {
+        join_set.spawn(async {
+            let mut results = Vec::new();
+            for _ in 0..5_usize {
+                let app = build_app();
+                let resp = app
+                    .oneshot(
+                        Request::builder()
+                            .uri("/.well-known/stellar.toml")
+                            .body(axum::body::Body::empty())
+                            .unwrap(),
+                    )
+                    .await
+                    .unwrap();
+                let status = resp.status();
+                let cors = resp
+                    .headers()
+                    .get("access-control-allow-origin")
+                    .map(|v| v.to_str().unwrap().to_string());
+                results.push((status, cors));
+            }
+            results
+        });
+    }
+
+    while let Some(Ok(batch)) = join_set.join_next().await {
+        for (status, cors) in batch {
+            assert_eq!(status, StatusCode::OK);
+            assert_eq!(
+                cors.as_deref(),
+                Some("*"),
+                "CORS header must be '*' under load"
+            );
+        }
+    }
+}
diff --git a/backend/tests/load_tests.rs b/backend/tests/load_tests.rs
index b24467d..8b86fd7 100644
--- a/backend/tests/load_tests.rs
+++ b/backend/tests/load_tests.rs
@@ -1,11 +1,29 @@
 //! Load and stress test suite entry point.
 //!
-//! Run with:
+//! This file is the integration test binary for all load tests. Each sub-module
+//! exercises a specific API endpoint under concurrent load using the shared
+//! [`load::framework`] helpers.
+//!
+//! # Running
+//!
 //! ```bash
+//! # All load tests (with output)
 //! cargo test -p backend --test load_tests -- --nocapture
+//!
+//! # A specific endpoint
+//! cargo test -p backend --test load_tests load::status_load -- --nocapture
+//! cargo test -p backend --test load_tests load::profile_load -- --nocapture
+//! cargo test -p backend --test load_tests load::dashboard_load -- --nocapture
+//! cargo test -p backend --test load_tests load::stellar_load -- --nocapture
+//!
+//! # Framework unit tests only
+//! cargo test -p backend --test load_tests load::framework -- --nocapture
 //! ```
 
 mod load {
+    pub mod framework;
+    pub mod dashboard_load;
     pub mod profile_load;
     pub mod status_load;
+    pub mod stellar_load;
 }