fix: address quant review — train full universe, add IC gate, extend embargo, widen brackets, add VaR compliance scaling

Codeptor · Codeptor · commit 9d6ddd8cb7e8 · 2026-02-27T04:46:55.000+05:30
P1: Train on all ~500 S&amp;P 500 tickers instead of 100 random sample
P2: VaR breach now scales positions proportionally instead of skipping cycle
P3: IC quality gate (min 0.02) — falls back to equal-weight if model is weak
P4: Embargo extended from 5 to 22 business days to match feature lookback
P5: OCO brackets widened to 3x/5x ATR (from 2x/3x) for weekly holding period
P6: Estimated spread cost tracking at 2 bps (configurable via ESTIMATED_SPREAD_BPS)
P7: Winsorize bounds logged as MLflow artifact for version consistency
P8: Ledoit-Wolf shrinkage now unconditional (was gated by n_obs &lt; n_assets*3)
diff --git a/examples/live_bot.py b/examples/live_bot.py
@@ -85,10 +85,15 @@ def _env_int(key: str, default: int) -> int:
 MAX_POSITION_WEIGHT = _env_float("MAX_POSITION_WEIGHT", 0.30)
 MAX_PORTFOLIO_VAR_95 = _env_float("MAX_PORTFOLIO_VAR_95", 0.06)
 MAX_DRAWDOWN_LIMIT = _env_float("MAX_DRAWDOWN_LIMIT", 0.15)
-STOP_LOSS_PCT = _env_float("STOP_LOSS_PCT", 0.05)
-TAKE_PROFIT_PCT = _env_float("TAKE_PROFIT_PCT", 0.15)
-ATR_SL_MULTIPLIER = _env_float("ATR_SL_MULTIPLIER", 2.0)
-ATR_TP_MULTIPLIER = _env_float("ATR_TP_MULTIPLIER", 3.0)
+# Bracket defaults are sized for a weekly holding period.  Tighter brackets
+# (e.g., 2x/3x ATR) trigger on normal 1-2 day volatility spikes, prematurely
+# stopping out positions before the 5-day prediction horizon plays out.
+# 3x ATR SL / 5x ATR TP gives ~3:5 risk/reward and fewer whipsaws.
+# Fixed fallback percentages apply when ATR is unavailable.
+STOP_LOSS_PCT = _env_float("STOP_LOSS_PCT", 0.07)
+TAKE_PROFIT_PCT = _env_float("TAKE_PROFIT_PCT", 0.20)
+ATR_SL_MULTIPLIER = _env_float("ATR_SL_MULTIPLIER", 3.0)
+ATR_TP_MULTIPLIER = _env_float("ATR_TP_MULTIPLIER", 5.0)
 SLEEP_AFTER_TRADE_HOURS = _env_int("SLEEP_AFTER_TRADE_HOURS", 12)
 SLEEP_MARKET_CLOSED_HOURS = _env_int("SLEEP_MARKET_CLOSED_HOURS", 1)
 ORDER_POLL_INTERVAL_SECS = _env_int("ORDER_POLL_INTERVAL_SECS", 2)
@@ -1077,9 +1082,14 @@ def _handle_sigterm(signum, frame):
 
     # Create execution bridge once — persists equity curve, P&L, and weight
     # history across cycles. Each cycle syncs positions/equity from broker.
+    # Alpaca is commission-free for equities, but we track estimated spread
+    # costs at 2 bps per trade as a proxy for real-world execution costs.
     account = broker.get_account()
+    estimated_spread_bps = _env_float("ESTIMATED_SPREAD_BPS", 0.0002)
     bridge = ExecutionBridge(
-        risk_manager=risk_manager, initial_capital=account.equity, commission_rate=0.0
+        risk_manager=risk_manager,
+        initial_capital=account.equity,
+        commission_rate=estimated_spread_bps,
     )
     _bridge_ref[0] = bridge  # C3 fix: make bridge accessible to SIGTERM handler
 
@@ -1153,8 +1163,36 @@ def _handle_sigterm(signum, frame):
                             f"{drawdown_violations[0].message}. Liquidating all positions."
                         )
                         _liquidate_all_positions(broker)
+                        time.sleep(60 * 30)
+                        continue
+
+                    # Non-drawdown violations (VaR, leverage): reduce exposure to
+                    # compliance instead of skipping the cycle entirely.  Scale
+                    # all current positions proportionally so the breached metric
+                    # falls within limits.
+                    var_violations = [v for v in critical_violations if v.rule == "MAX_VAR_95"]
+                    if var_violations and weights is not None and len(weights) > 0:
+                        v = var_violations[0]
+                        # scale_factor = limit / actual, clamped to (0.1, 0.95)
+                        scale = max(0.1, min(0.95, v.limit_value / max(v.metric_value, 1e-9)))
+                        scaled = weights * scale
+                        logger.warning(
+                            f"VaR breach — scaling positions by {scale:.2f} "
+                            f"(VaR {v.metric_value:.2%} → target {v.limit_value:.2%})"
+                        )
+                        _send_alert(
+                            f"[LiveBot] VaR breach: scaling positions by {scale:.2f}. "
+                            f"VaR={v.metric_value:.2%}, limit={v.limit_value:.2%}"
+                        )
+                        bridge.reconcile_target_weights(
+                            target_weights=scaled.to_dict(),
+                            broker=broker,
+                        )
                     else:
-                        logger.warning("Skipping trade cycle due to critical risk violations.")
+                        logger.warning(
+                            "Skipping trade cycle due to critical risk violations "
+                            "(no reduce-to-compliance path available)."
+                        )
                     time.sleep(60 * 30)
                     continue
 
diff --git a/python/alpha/predict.py b/python/alpha/predict.py
@@ -7,7 +7,6 @@
 import hashlib
 import json
 import logging
-import random
 import tempfile
 from datetime import date, datetime, timezone
 from pathlib import Path
@@ -51,6 +50,11 @@
 # training data (~500 dates × 500 tickers = 250k samples).
 TRAINING_LOOKBACK = "2y"
 
+# IC quality gate: if validation IC is below this threshold, the model
+# is too weak to trust for live trading.  Fall back to equal-weight.
+# 0.02 is conservative — typical cross-sectional ICs are 0.03-0.08.
+MIN_VALIDATION_IC = 0.02
+
 # R3-P-9/P-10 fix: resolve paths relative to project root, not CWD
 _PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
 MODEL_CACHE_DIR = _PROJECT_ROOT / "data" / "models"
@@ -392,12 +396,12 @@ def train_model(
             "Use a point-in-time membership table for unbiased backtests."
         )
 
-        # M12 fix: use a random sample instead of the first 100 alphabetically.
-        # Alphabetical slicing (A-D) introduces systematic sector/name bias.
-        # Seed is fixed for reproducibility across runs on the same day.
+        # Train on the full S&P 500 universe.  Previous versions sampled 100
+        # tickers for speed, but this causes distribution mismatch at inference
+        # time when the model scores all ~500 stocks.  LightGBM handles 500
+        # tickers × 2yr daily data (~250k rows) in under a minute.
         all_tickers = fetch_sp500_tickers()
-        random.seed(42)
-        tickers = training_tickers or random.sample(all_tickers, min(100, len(all_tickers)))
+        tickers = training_tickers or all_tickers
         raw = fetch_ohlcv(tickers, period=TRAINING_LOOKBACK)
         long = reshape_ohlcv_wide_to_long(raw)
 
@@ -438,15 +442,23 @@ def train_model(
     # with 5-day embargo to prevent target leakage (same approach as train.py)
     dates = labeled.index.get_level_values(0).unique().sort_values()
     split_date = dates[int(len(dates) * 0.8)]
-    embargo_offset = pd.tseries.offsets.BDay(5)
+    # Embargo must cover the longest feature lookback window to prevent
+    # information leakage from features that straddle the train/val boundary.
+    # Feature set includes 20-day returns, 20-day vol, 20-day Bollinger, and
+    # 60-day moving averages.  22 business days (~1 calendar month) provides
+    # a safe margin above the 20-day features while being conservative enough
+    # not to waste too much data.  (The 60-day MA creates backward dependence
+    # only, not forward leakage, so 22 days is sufficient.)
+    embargo_offset = pd.tseries.offsets.BDay(22)
     embargo_date = split_date + embargo_offset
 
     train_data = labeled.loc[labeled.index.get_level_values(0) <= split_date]
     val_data = labeled.loc[labeled.index.get_level_values(0) >= embargo_date]
 
     logger.info(
         f"Training on {len(train_data)} samples (up to {split_date.date()}), "
-        f"validating on {len(val_data)} samples (from {embargo_date.date()})"
+        f"validating on {len(val_data)} samples (from {embargo_date.date()}, "
+        f"embargo=22 business days)"
     )
 
     model = CrossSectionalModel(model_type="lightgbm", feature_cols=available_cols)
@@ -462,6 +474,9 @@ def train_model(
         logger.warning("No validation data available — training without early stopping")
         model.fit(train_data, target_col="target_5d")
 
+    # Attach IC to model so callers can gate on quality
+    model.validation_ic = ic  # type: ignore[attr-defined]
+
     # --- MLflow tracking (best-effort: never crash training) ---
     try:
         import mlflow
@@ -488,6 +503,15 @@ def train_model(
             except Exception as e:
                 logger.debug(f"Could not log feature importance artifact: {e}")
 
+            # Log winsorize bounds alongside the model so rollbacks keep
+            # bounds in sync with the model version that produced them.
+            try:
+                bounds_path = Path("data/models/winsorize_bounds.json")
+                if bounds_path.exists():
+                    mlflow.log_artifact(str(bounds_path), artifact_path="winsorize_bounds")
+            except Exception as e:
+                logger.debug(f"Could not log winsorize bounds artifact: {e}")
+
         logger.info("MLflow run logged successfully for live_lgbm_alpha")
     except Exception as e:
         logger.warning(f"MLflow tracking failed (training unaffected): {e}")
@@ -707,6 +731,22 @@ def get_ml_weights(
     logger.info("Step 1/4: Training model...")
     model = train_model(data_path=training_data_path)
 
+    # IC quality gate: if the model's validation IC is too low, fall back
+    # to equal-weight portfolio.  A weak model is worse than no model.
+    model_ic = getattr(model, "validation_ic", None)
+    if isinstance(model_ic, (int, float)) and model_ic < MIN_VALIDATION_IC:
+        logger.warning(
+            f"Model validation IC ({model_ic:.4f}) below minimum "
+            f"({MIN_VALIDATION_IC}).  Falling back to equal-weight."
+        )
+        # Return equal-weight across top_n current holdings if available,
+        # otherwise return empty (no trades).
+        if current_weights:
+            tickers = list(current_weights.keys())[:top_n]
+            eq_wt = 1.0 / len(tickers) if tickers else 0.0
+            return {t: eq_wt for t in tickers}
+        return {}
+
     # Step 2: Fetch recent data for the full universe to rank
     logger.info("Step 2/4: Fetching recent data for universe ranking...")
     from python.data.ingestion import fetch_sp500_tickers
diff --git a/python/alpha/train.py b/python/alpha/train.py
@@ -89,8 +89,10 @@ def run_training(data_path: str = "data/raw/sp500_ohlcv.parquet") -> "CrossSecti
     # in both train and val sets. Instead, sort dates and split by date.
     dates = labeled.index.get_level_values(0).unique().sort_values()
     split_date = dates[int(len(dates) * 0.8)]
-    # 5-day embargo gap to avoid target leakage across the boundary
-    embargo_offset = pd.tseries.offsets.BDay(5)
+    # 22-day embargo gap to cover the longest feature lookback window
+    # (20-day returns, volatility, Bollinger) and prevent information leakage
+    # from features that straddle the train/val boundary.
+    embargo_offset = pd.tseries.offsets.BDay(22)
     embargo_date = split_date + embargo_offset
 
     train = labeled.loc[labeled.index.get_level_values(0) <= split_date]
@@ -99,7 +101,7 @@ def run_training(data_path: str = "data/raw/sp500_ohlcv.parquet") -> "CrossSecti
     logger.info(
         f"Train: {len(train)} rows up to {split_date.date()}, "
         f"Val: {len(val)} rows from {embargo_date.date()} "
-        f"(5-day embargo)"
+        f"(22-day embargo)"
     )
 
     with mlflow.start_run(run_name="lgbm_alpha158"):
diff --git a/python/portfolio/optimizer.py b/python/portfolio/optimizer.py
@@ -69,12 +69,16 @@ def __init__(
         self.current_weights = current_weights
         self.turnover_threshold = turnover_threshold
 
-        # H-HRP fix: apply Ledoit-Wolf shrinkage for small-sample covariance.
-        # R3-O-1 fix: store the estimator so optimizer methods can use it.
+        # Apply Ledoit-Wolf shrinkage unconditionally when enabled.
+        # Sample covariance is always a noisy estimator — shrinkage toward
+        # a structured target (diagonal) always reduces estimation error for
+        # portfolio optimization, regardless of n_obs/n_assets ratio.  The
+        # previous n_obs < n_assets*3 condition meant shrinkage never fired
+        # for the typical 10-stock portfolio with 252 observations.
         n_obs, n_assets = self.returns.shape
         self._shrunk = False
         self._lw_estimator = None  # sklearn LedoitWolf estimator (if applied)
-        if shrink_covariance and n_assets > 2 and n_obs < n_assets * 3:
+        if shrink_covariance and n_assets > 2:
             try:
                 from sklearn.covariance import LedoitWolf