@@ -406,6 +406,8 @@ def fit(
406406 converged = True
407407 else :
408408 v , w , converged , mspe_v = _outer_solve_V (
409+ X1 ,
410+ X0 ,
409411 X1s ,
410412 X0s ,
411413 Z1 ,
@@ -913,6 +915,8 @@ def _inner_solve_W(
913915
914916def _v_starts (
915917 k : int ,
918+ X1 : np .ndarray ,
919+ X0 : np .ndarray ,
916920 X1s : np .ndarray ,
917921 X0s : np .ndarray ,
918922 Z1 : np .ndarray ,
@@ -922,11 +926,14 @@ def _v_starts(
922926 inner_max_iter : int ,
923927 inner_min_decrease : float ,
924928) -> List [np .ndarray ]:
925- """Build a list of starting ``theta`` vectors for the outer V search.
929+ """Build a list of DISTINCT starting ``theta`` vectors for the outer V search.
926930
927- Heuristic starts: uniform V; inverse-row-variance V; univariate-fit V
931+ Heuristic starts: uniform V; inverse-row-variance V (computed from the
932+ UNSTANDARDIZED predictors ``X1``/``X0`` — on the standardized rows every variance
933+ is 1 by construction, so it would collapse to the uniform start); univariate-fit V
928934 (v_i ∝ 1/MSPE_i from solving with mass concentrated on predictor i). Remaining
929- starts are random Dirichlet draws. Non-finite candidates are dropped
935+ starts are random Dirichlet draws. Candidates are de-duplicated so the multistart
936+ never runs the same Nelder-Mead seed twice; non-finite candidates are dropped
930937 (validation 10); uniform is always retained.
931938 """
932939
@@ -944,24 +951,28 @@ def _to_theta(v: np.ndarray) -> Optional[np.ndarray]:
944951 theta = theta - np .mean (theta )
945952 return theta if np .all (np .isfinite (theta )) else None
946953
947- # Candidates are generated lazily and we stop as soon as n_starts are collected,
948- # so a small n_starts does not pay for heuristic starts it would only discard. In
949- # particular n_starts=1 returns the uniform start without running the O(k) univariate
950- # inner-solve loop below. The candidate ORDER (uniform -> inverse-variance ->
951- # univariate-fit -> Dirichlet) is unchanged, so any given n_starts yields the same
952- # set as before — only unused work is skipped.
954+ def _add_unique (t : Optional [np .ndarray ], pool : List [np .ndarray ]) -> None :
955+ # Append only DISTINCT, finite candidates so the multistart never runs the same
956+ # Nelder-Mead seed twice (codex: a degenerate heuristic must not waste a start).
957+ if t is not None and not any (np .allclose (t , e , atol = 1e-9 ) for e in pool ):
958+ pool .append (t )
959+
960+ # Candidates are generated lazily and we stop as soon as `target` DISTINCT starts are
961+ # collected, so a small n_starts does not pay for heuristic starts it would only
962+ # discard. In particular n_starts=1 returns the uniform start without running the
963+ # O(k) univariate inner-solve loop below.
953964 target = max (n_starts , 1 )
954965 candidates : List [np .ndarray ] = [np .zeros (k )] # uniform V
955966
956- # inverse row variance of the standardized predictors over donors+treated
967+ # inverse row variance of the UNSTANDARDIZED predictors over donors+treated.
968+ # (On the standardized rows every variance is ~1, so this would collapse to the
969+ # uniform start — using the raw scales makes it a genuinely different seed.)
957970 if len (candidates ) < target :
958- combined = np .column_stack ([X0s , X1s .reshape (- 1 , 1 )])
971+ combined = np .column_stack ([X0 , X1 .reshape (- 1 , 1 )])
959972 row_var = np .var (combined , axis = 1 , ddof = 1 )
960973 inv_var = np .where (row_var > 0 , 1.0 / np .maximum (row_var , 1e-12 ), 0.0 )
961974 if np .sum (inv_var ) > 0 :
962- t = _to_theta (inv_var / np .sum (inv_var ))
963- if t is not None :
964- candidates .append (t )
975+ _add_unique (_to_theta (inv_var / np .sum (inv_var )), candidates )
965976
966977 # univariate-fit start: v_i ∝ 1 / (pre-outcome MSPE of W solved with V=e_i).
967978 # Skipped entirely when enough candidates are already collected (saves k inner solves).
@@ -974,23 +985,21 @@ def _to_theta(v: np.ndarray) -> Optional[np.ndarray]:
974985 uni_mspe [i ] = float (np .mean ((Z1 - Z0 @ w_i ) ** 2 ))
975986 inv_mspe = np .where (uni_mspe > 0 , 1.0 / np .maximum (uni_mspe , 1e-12 ), 0.0 )
976987 if np .sum (inv_mspe ) > 0 :
977- t = _to_theta (inv_mspe / np .sum (inv_mspe ))
978- if t is not None :
979- candidates .append (t )
988+ _add_unique (_to_theta (inv_mspe / np .sum (inv_mspe )), candidates )
980989
981- # random Dirichlet draws to reach n_starts (bounded attempts as a backstop)
990+ # random Dirichlet draws to fill the remaining slots with DISTINCT starts
982991 attempts = 0
983- max_attempts = 10 * n_starts + 20
992+ max_attempts = 20 * n_starts + 20
984993 while len (candidates ) < target and attempts < max_attempts :
985994 attempts += 1
986- t = _to_theta (rng .dirichlet (np .ones (k )))
987- if t is not None :
988- candidates .append (t )
995+ _add_unique (_to_theta (rng .dirichlet (np .ones (k ))), candidates )
989996
990997 return candidates [:target ]
991998
992999
9931000def _outer_solve_V (
1001+ X1 : np .ndarray ,
1002+ X0 : np .ndarray ,
9941003 X1s : np .ndarray ,
9951004 X0s : np .ndarray ,
9961005 Z1 : np .ndarray ,
@@ -1029,7 +1038,9 @@ def objective(theta: np.ndarray) -> float:
10291038 powell_options ["ftol" ] = powell_options .pop ("fatol" )
10301039
10311040 rng = np .random .default_rng (seed )
1032- starts = _v_starts (k , X1s , X0s , Z1 , Z0 , n_starts , rng , inner_max_iter , inner_min_decrease )
1041+ starts = _v_starts (
1042+ k , X1 , X0 , X1s , X0s , Z1 , Z0 , n_starts , rng , inner_max_iter , inner_min_decrease
1043+ )
10331044
10341045 best_x : np .ndarray = starts [0 ]
10351046 best_fun = np .inf
0 commit comments