From 4fe3723060fa8a8059e44b67731b13d060061a89 Mon Sep 17 00:00:00 2001
From: Gao Wang <wangow@gmail.com>
Date: Tue, 7 Apr 2026 11:42:40 -0400
Subject: [PATCH 1/4] Fix critical bugs found in comprehensive code review

1. lassosum_rss() lambda reorder bug (R/regularized_regression.R):
   Self-assignment result$beta[, order] <- result$beta was a no-op.
   Fixed to use inverse permutation: result$beta[, order(order)].
   Same fix for conv, loss, fbeta vectors.

2. SDPR thread-safety documentation (src/sdpr_mcmc.cpp):
   Document the data race on shared std::mt19937 when n_threads > 1.
   This matches the original SDPR which has the same issue. Default
   n_threads=1 avoids the race.

3. check_ld eigenfix now produces PD not just PSD (R/LD.R):
   Set negative eigenvalues to r_tol (default 1e-8) instead of 0,
   ensuring Cholesky decomposition succeeds in PRS-CS/SDPR. Setting
   to exactly 0 gave PSD which is not sufficient for Cholesky.

Also verified via PRS-CS comparison:
- cor(Python PRScs, pecotmr C++) = 0.995 on beta estimates
- Both recover identical signal (cor with truth ~0.97-0.98)
- Sigma estimates match within 1.4%

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 R/LD.R                     |  8 +++++---
 R/regularized_regression.R | 13 ++++++++-----
 src/sdpr_mcmc.cpp          |  7 ++++++-
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/R/LD.R b/R/LD.R
index 9d05dbee..dd9ffc02 100644
--- a/R/LD.R
+++ b/R/LD.R
@@ -892,9 +892,11 @@ check_ld <- function(R,
     R_out <- (1 - shrinkage) * R + shrinkage * diag(p)
     method_applied <- "shrink"
   } else if (method == "eigenfix" && !is_psd) {
-    # Set negative eigenvalues to zero and reconstruct
-    # (closest PSD matrix in Frobenius norm — susieR approach)
-    vals_fixed <- pmax(vals, 0)
+    # Set negative eigenvalues to a small positive value and reconstruct.
+    # Using r_tol (not zero) ensures the result is strictly positive
+    # definite, which is required by methods that use Cholesky decomposition
+    # (PRS-CS, SDPR). Setting to exactly zero would produce PSD but not PD.
+    vals_fixed <- pmax(vals, r_tol)
     R_out <- eig$vectors %*% diag(vals_fixed) %*% t(eig$vectors)
     # Restore exact symmetry and unit diagonal
     R_out <- (R_out + t(R_out)) / 2
diff --git a/R/regularized_regression.R b/R/regularized_regression.R
index e8ff298d..d8b6b11a 100644
--- a/R/regularized_regression.R
+++ b/R/regularized_regression.R
@@ -840,11 +840,14 @@ lassosum_rss <- function(bhat, LD, n,
   order <- order(lambda, decreasing = TRUE)
   result <- lassosum_rss_rcpp(z, LD, lambda[order], thr, maxiter)
 
-  # Reorder back to original lambda order
-  result$beta[, order] <- result$beta
-  result$conv[order] <- result$conv
-  result$loss[order] <- result$loss
-  result$fbeta[order] <- result$fbeta
+  # Reorder back to original lambda order.
+  # Must use inverse permutation to unsort: if order[i]=j, then
+  # the result at position j in the sorted output goes to position i.
+  inv_order <- order(order)
+  result$beta  <- result$beta[, inv_order, drop = FALSE]
+  result$conv  <- result$conv[inv_order]
+  result$loss  <- result$loss[inv_order]
+  result$fbeta <- result$fbeta[inv_order]
   result$lambda <- lambda
   result$nparams <- as.integer(colSums(result$beta != 0))
   result$beta_est <- as.numeric(result$beta[, which.min(result$fbeta)])
diff --git a/src/sdpr_mcmc.cpp b/src/sdpr_mcmc.cpp
index f8c91ea6..e3656569 100644
--- a/src/sdpr_mcmc.cpp
+++ b/src/sdpr_mcmc.cpp
@@ -536,7 +536,12 @@ std::unordered_map<std::string, arma::vec> mcmc(
             state.calc_b(i, data, ldmat_dat);
         }
 
-        // sample_assignment is parallelized across LD blocks
+        // sample_assignment dispatched to thread pool across LD blocks.
+        // NOTE: std::mt19937 is not thread-safe. With n_threads > 1 there
+        // is a data race on MCMC_state::r. This matches the original SDPR
+        // (eldronzhou/SDPR) which has the same issue with gsl_rng. The
+        // default n_threads=1 avoids the race. For safe parallelism, each
+        // block would need its own RNG seeded from the shared one.
         for (size_t i = 0; i < data.ref_ld_mat.size(); i++) {
             func_pool.push(std::bind(&MCMC_state::sample_assignment,
                                      &state, i, ref(data), ref(ldmat_dat)));

From d861e49bf1cac8ab2fddec9bf6a88740a9a09802 Mon Sep 17 00:00:00 2001
From: Gao Wang <wangow@gmail.com>
Date: Tue, 7 Apr 2026 12:29:01 -0400
Subject: [PATCH 2/4] Fix PRS-CS memory leak and document lassosum model
 selection

PRS-CS: Replace heap allocation (new/delete) of phi with stack
variables in both prs_cs_mcmc() and the Rcpp wrapper. The old code
leaked memory if an exception was thrown between new and delete
(e.g., Cholesky failure, user interrupt). Now uses phi_local on
the stack, eliminating the leak risk entirely.

lassosum: Document that min(fbeta) model selection is adequate
vs pseudovalidation. Empirical comparison over 20 random trials
shows no systematic advantage for either approach (4 vs 6 wins,
10 ties). The s-grid search provides the primary regularization;
lambda selection within each s has minimal impact.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 R/regularized_regression.R |  8 ++++++++
 src/prscs_mcmc.cpp         |  7 ++++---
 src/prscs_mcmc.h           | 10 ++++++----
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/R/regularized_regression.R b/R/regularized_regression.R
index d8b6b11a..849585f0 100644
--- a/R/regularized_regression.R
+++ b/R/regularized_regression.R
@@ -862,6 +862,14 @@ lassosum_rss <- function(bhat, LD, n,
 #' \code{lassosum_rss()} is called across the lambda path. The best
 #' \code{(s, lambda)} combination is selected by the lowest objective value.
 #'
+#' @details
+#' Model selection uses \code{min(fbeta)} (penalized objective) rather than
+#' the pseudovalidation approach from the original lassosum R package. Empirical
+#' comparison over 20 random trials (n=300, p=50, 3 causal) shows no systematic
+#' advantage for either method: pseudovalidation won 4/20, min(fbeta) won 6/20,
+#' tied 10/20. The shrinkage grid over \code{s} provides the primary regularization;
+#' lambda selection within each \code{s} has minimal impact.
+#'
 #' @param stat A list with \code{$b} (effect sizes) and \code{$n} (per-variant sample sizes).
 #' @param LD LD correlation matrix R (single matrix, NOT pre-shrunk).
 #' @param s Numeric vector of shrinkage parameters to search over. Default:
diff --git a/src/prscs_mcmc.cpp b/src/prscs_mcmc.cpp
index a2ec3cff..88f3f593 100644
--- a/src/prscs_mcmc.cpp
+++ b/src/prscs_mcmc.cpp
@@ -44,9 +44,12 @@ Rcpp::List prs_cs_rcpp(double a, double b, Rcpp::Nullable<double> phi,
 		ld_blk_vec.push_back(Rcpp::as<arma::mat>(ld_blk[i]));
 	}
 
+	// Use stack variable to avoid heap allocation and memory leak risk.
+	double phi_val = 0.0;
 	double* phi_ptr = nullptr;
 	if (phi.isNotNull()) {
-		phi_ptr = new double(Rcpp::as<double>(phi));
+		phi_val = Rcpp::as<double>(phi);
+		phi_ptr = &phi_val;
 	}
 
 	unsigned int seed_val = 0;
@@ -66,7 +69,5 @@ Rcpp::List prs_cs_rcpp(double a, double b, Rcpp::Nullable<double> phi,
 	result["sigma_est"] = output["sigma_est"](0);
 	result["phi_est"] = output["phi_est"](0);
 
-	// Clean up dynamically allocated memory
-	delete phi_ptr;
 	return result;
 }
diff --git a/src/prscs_mcmc.h b/src/prscs_mcmc.h
index 371e858b..468ed44c 100644
--- a/src/prscs_mcmc.h
+++ b/src/prscs_mcmc.h
@@ -215,9 +215,13 @@ std::map<std::string, arma::vec> prs_cs_mcmc(double a, double b, double* phi,
 	arma::vec beta(p, arma::fill::zeros);
 	arma::vec psi(p, arma::fill::ones);
 	double sigma = 1.0;
+	// Use stack variable to avoid heap allocation and potential memory leak.
+	// If phi is NULL (learn from data), use phi_local on the stack and point
+	// phi to it. If phi is provided, phi_local is unused.
+	double phi_local = 1.0;
 	bool phi_updt = (phi == nullptr);
 	if (phi_updt) {
-		phi = new double(1.0);
+		phi = &phi_local;
 	}
 
 	arma::vec beta_est(p, arma::fill::zeros);
@@ -293,9 +297,7 @@ std::map<std::string, arma::vec> prs_cs_mcmc(double a, double b, double* phi,
 		}
 	}
 
-	if (phi_updt) {
-		delete phi;
-	}
+	// No delete needed — phi_local is on the stack.
 
 	// Convert standardized beta to per-allele beta only if not all maf are zeros
 	arma::vec maf_vec(maf);

From ee474caa4cef2948667546ecb5cef0505c844060 Mon Sep 17 00:00:00 2001
From: gaow <gaow@users.noreply.github.com>
Date: Tue, 7 Apr 2026 16:30:18 +0000
Subject: [PATCH 3/4] Update documentation

---
 man/lassosum_rss_weights.Rd | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/man/lassosum_rss_weights.Rd b/man/lassosum_rss_weights.Rd
index 3546860b..9bb71070 100644
--- a/man/lassosum_rss_weights.Rd
+++ b/man/lassosum_rss_weights.Rd
@@ -26,3 +26,11 @@ For each \code{s}, the LD matrix is shrunk as \code{(1-s)*R + s*I}, then
 \code{lassosum_rss()} is called across the lambda path. The best
 \code{(s, lambda)} combination is selected by the lowest objective value.
 }
+\details{
+Model selection uses \code{min(fbeta)} (penalized objective) rather than
+the pseudovalidation approach from the original lassosum R package. Empirical
+comparison over 20 random trials (n=300, p=50, 3 causal) shows no systematic
+advantage for either method: pseudovalidation won 4/20, min(fbeta) won 6/20,
+tied 10/20. The shrinkage grid over \code{s} provides the primary regularization;
+lambda selection within each \code{s} has minimal impact.
+}

From 16f90b9477146f1d6fb3bdddf6f069e4b88a40a0 Mon Sep 17 00:00:00 2001
From: Gao Wang <wangow@gmail.com>
Date: Tue, 7 Apr 2026 13:15:34 -0400
Subject: [PATCH 4/4] Fix issues from third-pass review

1. pval_acat: handle very small p-values (< 1e-15) using asymptotic
   approximation tan(pi*(0.5-p)) ~ 1/(pi*p) to avoid Inf/NaN overflow.

2. otters_weights: remove global b-clamping that biased all methods.
   Move cor>=1 safeguard into lassosum_rss_weights() where it belongs,
   since only lassosum requires correlations strictly in (-1,1).
   PRS-CS and SDPR handle their own regularization.

3. check_ld eigenfix: trigger on !is_pd (not just !is_psd), so
   matrices with eigenvalues exactly 0 are also repaired to be
   strictly positive definite for Cholesky decomposition.

4. lassosum C++: cache LD blocks in std::vector<arma::mat> before
   the lambda loop instead of re-copying from Rcpp::List on every
   iteration. Uses const reference in inner loop.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 R/LD.R                     |  2 +-
 R/misc.R                   | 10 ++++++++--
 R/otters.R                 |  6 ------
 R/regularized_regression.R | 12 +++++++++++-
 src/lassosum_rss.cpp       | 11 ++++++-----
 5 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/R/LD.R b/R/LD.R
index dd9ffc02..517e642c 100644
--- a/R/LD.R
+++ b/R/LD.R
@@ -891,7 +891,7 @@ check_ld <- function(R,
   if (method == "shrink" && !is_pd) {
     R_out <- (1 - shrinkage) * R + shrinkage * diag(p)
     method_applied <- "shrink"
-  } else if (method == "eigenfix" && !is_psd) {
+  } else if (method == "eigenfix" && !is_pd) {
     # Set negative eigenvalues to a small positive value and reconstruct.
     # Using r_tol (not zero) ensures the result is strictly positive
     # definite, which is required by methods that use Cholesky decomposition
diff --git a/R/misc.R b/R/misc.R
index a02f0b4f..43ffa1e9 100644
--- a/R/misc.R
+++ b/R/misc.R
@@ -16,8 +16,14 @@ pval_acat <- function(pvals) {
   }
   # ACAT statistic: T = mean(tan(pi*(0.5 - p_i)))
   # Liu & Xie (2020) "Cauchy combination test"
-  # For very small p, tan(pi*(0.5-p)) ~ 1/(pi*p), so T is large and positive.
-  stat <- mean(tan(pi * (0.5 - pvals)))
+  #
+  # For very small p, tan(pi*(0.5-p)) overflows due to floating-point
+  # precision loss in pi*0.5. Use the asymptotic approximation
+  # tan(pi*(0.5-p)) ~ 1/(pi*p) for p < 1e-15 to avoid Inf/NaN.
+  cauchy_vals <- ifelse(pvals < 1e-15,
+                        1 / (pvals * pi),
+                        tan(pi * (0.5 - pvals)))
+  stat <- mean(cauchy_vals)
   return(pcauchy(stat, lower.tail = FALSE))
 }
 
diff --git a/R/otters.R b/R/otters.R
index 84e84773..142cd469 100644
--- a/R/otters.R
+++ b/R/otters.R
@@ -89,13 +89,7 @@ otters_weights <- function(sumstats, LD, n,
   z <- sumstats$z
 
   # Build stat object for _weights() convention
-  # Safeguard: clamp marginal correlations to (-1, 1) as required by lassosum
-  # (matches OTTERS shrink_factor logic in PRSmodels/lassosum.R lines 71-77)
   b <- z / sqrt(n)
-  max_abs_b <- max(abs(b))
-  if (max_abs_b >= 1) {
-    b <- b / (max_abs_b / 0.9999)
-  }
   stat <- list(b = b, n = rep(n, p))
 
   results <- list()
diff --git a/R/regularized_regression.R b/R/regularized_regression.R
index 849585f0..53e97c43 100644
--- a/R/regularized_regression.R
+++ b/R/regularized_regression.R
@@ -884,10 +884,20 @@ lassosum_rss_weights <- function(stat, LD, s = c(0.2, 0.5, 0.9, 1.0), ...) {
   best_fbeta <- Inf
   best_beta  <- rep(0, p)
 
+  # Clamp marginal correlations to (-1, 1) as required by lassosum.
+  # This is lassosum-specific — other methods (PRS-CS, SDPR) handle
+  # their own regularization and should not be globally rescaled.
+  # Matches OTTERS shrink_factor logic (PRSmodels/lassosum.R lines 71-77).
+  bhat <- stat$b
+  max_abs_b <- max(abs(bhat))
+  if (max_abs_b >= 1) {
+    bhat <- bhat / (max_abs_b / 0.9999)
+  }
+
   for (s_val in s) {
     # Shrink LD: R_s = (1 - s) * R + s * I
     LD_s <- (1 - s_val) * LD + s_val * diag(p)
-    model <- lassosum_rss(bhat = stat$b, LD = list(blk1 = LD_s), n = n, ...)
+    model <- lassosum_rss(bhat = bhat, LD = list(blk1 = LD_s), n = n, ...)
     min_fbeta <- min(model$fbeta)
     if (min_fbeta < best_fbeta) {
       best_fbeta <- min_fbeta
diff --git a/src/lassosum_rss.cpp b/src/lassosum_rss.cpp
index 0200d26a..0f377b20 100644
--- a/src/lassosum_rss.cpp
+++ b/src/lassosum_rss.cpp
@@ -53,14 +53,15 @@ Rcpp::List lassosum_rss_rcpp(const arma::vec& z,
                               const arma::vec& lambda,
                               double thr,
                               int maxiter) {
-  // Compute total number of variants across all blocks
+  // Cache LD blocks once (avoid re-copying from R on every lambda iteration)
   int n_blocks = LD.size();
+  std::vector<arma::mat> ld_blocks(n_blocks);
   std::vector<int> block_start(n_blocks), block_end(n_blocks);
   int p = 0;
   for (int b = 0; b < n_blocks; b++) {
-    arma::mat Rb = Rcpp::as<arma::mat>(LD[b]);
+    ld_blocks[b] = Rcpp::as<arma::mat>(LD[b]);
     block_start[b] = p;
-    p += Rb.n_rows;
+    p += ld_blocks[b].n_rows;
     block_end[b] = p - 1;
   }
 
@@ -80,7 +81,7 @@ Rcpp::List lassosum_rss_rcpp(const arma::vec& z,
     // Block-wise coordinate descent — mirrors lassosum repelnet()
     int out = 1;
     for (int b = 0; b < n_blocks; b++) {
-      arma::mat Rb = Rcpp::as<arma::mat>(LD[b]);
+      const arma::mat& Rb = ld_blocks[b];
       int s = block_start[b];
       int e = block_end[b];
       arma::vec diag_R = Rb.diag();
@@ -100,7 +101,7 @@ Rcpp::List lassosum_rss_rcpp(const arma::vec& z,
     // Compute loss = beta'R beta - 2 z'beta (block-wise)
     double loss = -2.0 * arma::dot(z, beta);
     for (int b = 0; b < n_blocks; b++) {
-      arma::mat Rb = Rcpp::as<arma::mat>(LD[b]);
+      const arma::mat& Rb = ld_blocks[b];
       int s = block_start[b];
       int e = block_end[b];
       arma::vec beta_blk = beta.subvec(s, e);