fix(ml): fix optimizer crash and remove IC fallback gate

Codeptor · Codeptor · commit 65dce39ff270 · 2026-03-06T20:19:41.000+05:30
- optimizer.py: use skfolio's LedoitWolf (BaseCovariance) instead of
  sklearn's, which caused TypeError in skfolio v0.15+ and silently
  fell back to equal-weight HRP

- predict.py: remove IC quality gate fallback to equal-weight — low IC
  still contains directional signal; HRP diversifies away noise

- model.py: reduce LightGBM min_child_samples 250→50 (was severely
  underfitting cross-sectional equity)
diff --git a/python/alpha/model.py b/python/alpha/model.py
@@ -34,7 +34,7 @@ def _default_params(self) -> dict:
                 "metric": "huber",
                 "learning_rate": 0.05,
                 "num_leaves": 31,
-                "min_child_samples": 250,
+                "min_child_samples": 50,
                 "subsample": 0.7,
                 "colsample_bytree": 0.7,
                 "verbose": -1,
diff --git a/python/alpha/predict.py b/python/alpha/predict.py
@@ -867,21 +867,16 @@ def get_ml_weights(
         else:
             raise RuntimeError(f"Training failed and no cached model available: {e}") from e
 
-    # IC quality gate: if the model's validation IC is too low, fall back
-    # to equal-weight portfolio.  A weak model is worse than no model.
+    # IC quality gate: log warning but proceed with the model's predictions.
+    # Previously this fell back to equal-weight, which defeated the purpose
+    # of having an ML pipeline.  Low IC still contains directional signal;
+    # the HRP optimizer downstream will diversify away noise.
     model_ic = getattr(model, "validation_ic", None)
     if isinstance(model_ic, (int, float)) and model_ic < MIN_VALIDATION_IC:
         logger.warning(
             f"Model validation IC ({model_ic:.4f}) below minimum "
-            f"({MIN_VALIDATION_IC}).  Falling back to equal-weight."
+            f"({MIN_VALIDATION_IC}).  Proceeding with model predictions anyway."
         )
-        # Return equal-weight across top_n current holdings if available,
-        # otherwise return empty (no trades).
-        if current_weights:
-            tickers = list(current_weights.keys())[:top_n]
-            eq_wt = 1.0 / len(tickers) if tickers else 0.0
-            return {t: eq_wt for t in tickers}, stale_data
-        return {}, stale_data
 
     # Step 2: Fetch recent data for the full universe to rank
     logger.info("Step 2/4: Fetching recent data for universe ranking...")
diff --git a/python/portfolio/optimizer.py b/python/portfolio/optimizer.py
@@ -95,15 +95,14 @@ def __init__(
     def _prior_estimator(self):
         """Return an EmpiricalPrior with Ledoit-Wolf covariance if available (R3-O-1).
 
-        Creates a fresh LedoitWolf instance each time instead of reusing the
-        pre-fitted one, so skfolio's internal fit() produces correct results
-        regardless of version-specific assumptions about estimator state.
+        Uses skfolio's own LedoitWolf (inherits BaseCovariance) — NOT sklearn's,
+        which causes TypeError in skfolio v0.15+.
         """
         if self._shrunk:
             try:
-                from sklearn.covariance import LedoitWolf
+                from skfolio.moments.covariance import LedoitWolf as SkfolioLW
 
-                return EmpiricalPrior(covariance_estimator=LedoitWolf())
+                return EmpiricalPrior(covariance_estimator=SkfolioLW())
             except Exception as e:
                 logger.warning(f"Could not create shrunk prior: {e}")
         return EmpiricalPrior()