Skip to content

Commit 13c5b61

Browse files
Merge pull request #25 from alexmccreight/main
update ld threshold logic in complex setting
2 parents ce939bf + c2a7a42 commit 13c5b61

3 files changed

Lines changed: 107 additions & 81 deletions

File tree

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Encoding: UTF-8
22
Type: Package
33
Package: simxQTL
4-
Version: 0.1.0
5-
Date: 2023-12-16
4+
Version: 0.1.1
5+
Date: 2026-02-17
66
Title: Simulate xQTL associations
77
Description: Provides a fast and memory-efficient method for
88
centering and scaling matrices.

R/simulate_eQTL.R

Lines changed: 96 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,10 @@ is_causal_power <- function(G, beta, residual_variance, power = 0.80) {
189189
#' @param inf_sd Standard deviation for drawing infinitesimal effects (default 0.01).
190190
#' @param standardize Logical; if TRUE, the genotype matrix will be standardized.
191191
#' @param independent Logical; if TRUE, ensures all sparse and oligogenic SNPs have |r| < ld_threshold with each other (default TRUE).
192-
#' @param ld_threshold Numeric; maximum allowed absolute correlation between causal variants when independent = TRUE (default 0.15).
193-
#' @param max_attempts Integer; maximum number of attempts to find SNPs satisfying LD constraints (default 200).
192+
#' @param ld_threshold Numeric; starting maximum allowed absolute correlation between causal variants when independent = TRUE (default 0.10). If constraints cannot be satisfied, the threshold is progressively increased by ld_step.
193+
#' @param ld_step Numeric; amount to increase ld_threshold after max_attempts failures (default 0.10).
194+
#' @param ld_max Numeric; maximum ld_threshold to try before giving up (default 0.50).
195+
#' @param max_attempts Integer; maximum number of attempts per LD threshold level to find SNPs satisfying LD constraints (default 200).
194196
#' @param seed Optional seed for reproducibility.
195197
#' @return A list containing the standardized genotype matrix, simulated phenotype,
196198
#' combined beta values, indices for each effect component, realized heritability estimates,
@@ -210,7 +212,9 @@ generate_cis_qtl_data <- function(G,
210212
inf_sd = 0.01,
211213
standardize = TRUE,
212214
independent = TRUE,
213-
ld_threshold = 0.15,
215+
ld_threshold = 0.10,
216+
ld_step = 0.10,
217+
ld_max = 0.50,
214218
max_attempts = 200,
215219
seed = NULL) {
216220
# Input validation
@@ -232,96 +236,111 @@ generate_cis_qtl_data <- function(G,
232236
# All columns are valid for selection
233237
valid_cols <- 1:n_features
234238

235-
# Setup for LD constraint checking
236-
max_attempts <- if (independent) max_attempts else 1
237-
attempt <- 0
239+
# Setup for progressive LD constraint checking
238240
ld_satisfied <- FALSE
241+
current_ld_threshold <- ld_threshold
242+
total_attempts <- 0
239243

240244
# Set initial seed if provided
241245
if (!is.null(seed)) {
242246
set.seed(seed)
243247
}
244248

245-
while (!ld_satisfied && attempt < max_attempts) {
246-
attempt <- attempt + 1
249+
# Progressive LD threshold loop
250+
while (!ld_satisfied && current_ld_threshold <= ld_max) {
251+
attempt <- 0
247252

248-
# Set seed for this attempt (for reproducibility)
249-
if (!is.null(seed) && attempt > 1) {
250-
set.seed(seed * 1000 + attempt)
251-
}
252-
253-
# 1. Sparse Effects using target-based scaling
254-
sparse_res <- simulate_sparse_effects(G, h2_sparse, n_sparse, sparse_sd, valid_cols)
255-
beta_sparse <- sparse_res$beta
256-
sparse_indices <- sparse_res$sparse_indices
257-
258-
# 2. Oligogenic Effects using target-based scaling
259-
non_sparse_indices <- setdiff(valid_cols, sparse_indices)
260-
oligo_res <- simulate_oligogenic_effects(G, h2_oligogenic, n_oligogenic,
261-
mixture_props, non_sparse_indices,
262-
oligo_sds)
263-
beta_oligo <- oligo_res$beta
264-
oligogenic_indices <- oligo_res$oligogenic_indices
265-
266-
# 3. Infinitesimal Effects using target-based scaling
267-
infinitesimal_pool <- setdiff(non_sparse_indices, oligogenic_indices)
268-
269-
# Select n_inf SNPs from the infinitesimal pool if n_inf is specified
270-
if (!is.null(n_inf)) {
271-
n_inf_actual <- min(n_inf, length(infinitesimal_pool))
272-
infinitesimal_indices <- sample(infinitesimal_pool, n_inf_actual)
273-
} else {
274-
# Default behavior: all remaining SNPs get infinitesimal effects
275-
infinitesimal_indices <- infinitesimal_pool
276-
}
277-
278-
beta_inf <- simulate_infinitesimal_effects(G, h2_infinitesimal, infinitesimal_indices,
279-
inf_sd)
280-
281-
# Combine all effect components
282-
beta <- beta_sparse + beta_oligo + beta_inf
253+
while (!ld_satisfied && attempt < max_attempts) {
254+
attempt <- attempt + 1
255+
total_attempts <- total_attempts + 1
283256

284-
# Generate latent genetic component and phenotype
285-
g <- as.vector(G %*% beta)
286-
var_g <- var(g)
287-
var_epsilon <- var_g * (1 - h2g) / h2g
288-
epsilon <- rnorm(n_samples, 0, sqrt(var_epsilon))
289-
y <- g + epsilon
290-
291-
# Check LD constraint if independent = TRUE (applied to sparse and oligogenic SNPs)
292-
if (independent) {
293-
ld_satisfied <- TRUE # Assume satisfied unless violations found
294-
295-
# Check sparse SNPs: |r| < ld_threshold
296-
if (length(sparse_indices) > 1) {
297-
sparse_cor <- cor(G[, sparse_indices, drop = FALSE])
298-
high_ld_sparse <- which(abs(sparse_cor) >= ld_threshold & upper.tri(sparse_cor, diag = FALSE), arr.ind = TRUE)
299-
if (nrow(high_ld_sparse) > 0) ld_satisfied <- FALSE
257+
# Set seed for this attempt (for reproducibility)
258+
if (!is.null(seed) && total_attempts > 1) {
259+
set.seed(seed * 1000 + total_attempts)
300260
}
301261

302-
# Check oligogenic SNPs: |r| < ld_threshold
303-
if (ld_satisfied && length(oligogenic_indices) > 1) {
304-
oligo_cor <- cor(G[, oligogenic_indices, drop = FALSE])
305-
high_ld_oligo <- which(abs(oligo_cor) >= ld_threshold & upper.tri(oligo_cor, diag = FALSE), arr.ind = TRUE)
306-
if (nrow(high_ld_oligo) > 0) ld_satisfied <- FALSE
262+
# 1. Sparse Effects using target-based scaling
263+
sparse_res <- simulate_sparse_effects(G, h2_sparse, n_sparse, sparse_sd, valid_cols)
264+
beta_sparse <- sparse_res$beta
265+
sparse_indices <- sparse_res$sparse_indices
266+
267+
# 2. Oligogenic Effects using target-based scaling
268+
non_sparse_indices <- setdiff(valid_cols, sparse_indices)
269+
oligo_res <- simulate_oligogenic_effects(G, h2_oligogenic, n_oligogenic,
270+
mixture_props, non_sparse_indices,
271+
oligo_sds)
272+
beta_oligo <- oligo_res$beta
273+
oligogenic_indices <- oligo_res$oligogenic_indices
274+
275+
# 3. Infinitesimal Effects using target-based scaling
276+
infinitesimal_pool <- setdiff(non_sparse_indices, oligogenic_indices)
277+
278+
# Select n_inf SNPs from the infinitesimal pool if n_inf is specified
279+
if (!is.null(n_inf)) {
280+
n_inf_actual <- min(n_inf, length(infinitesimal_pool))
281+
infinitesimal_indices <- sample(infinitesimal_pool, n_inf_actual)
282+
} else {
283+
# Default behavior: all remaining SNPs get infinitesimal effects
284+
infinitesimal_indices <- infinitesimal_pool
307285
}
308286

309-
# Check between sparse and oligogenic: |r| < ld_threshold
310-
if (ld_satisfied && length(sparse_indices) > 0 && length(oligogenic_indices) > 0) {
311-
cross_cor <- cor(G[, sparse_indices, drop = FALSE], G[, oligogenic_indices, drop = FALSE])
312-
high_ld_cross <- which(abs(cross_cor) >= ld_threshold, arr.ind = TRUE)
313-
if (nrow(high_ld_cross) > 0) ld_satisfied <- FALSE
287+
beta_inf <- simulate_infinitesimal_effects(G, h2_infinitesimal, infinitesimal_indices,
288+
inf_sd)
289+
290+
# Combine all effect components
291+
beta <- beta_sparse + beta_oligo + beta_inf
292+
293+
# Generate latent genetic component and phenotype
294+
g <- as.vector(G %*% beta)
295+
var_g <- var(g)
296+
var_epsilon <- var_g * (1 - h2g) / h2g
297+
epsilon <- rnorm(n_samples, 0, sqrt(var_epsilon))
298+
y <- g + epsilon
299+
300+
# Check LD constraint if independent = TRUE (applied to sparse and oligogenic SNPs)
301+
if (independent) {
302+
ld_satisfied <- TRUE # Assume satisfied unless violations found
303+
304+
# Check sparse SNPs: |r| < current_ld_threshold
305+
if (length(sparse_indices) > 1) {
306+
sparse_cor <- cor(G[, sparse_indices, drop = FALSE])
307+
high_ld_sparse <- which(abs(sparse_cor) >= current_ld_threshold & upper.tri(sparse_cor, diag = FALSE), arr.ind = TRUE)
308+
if (nrow(high_ld_sparse) > 0) ld_satisfied <- FALSE
309+
}
310+
311+
# Check oligogenic SNPs: |r| < current_ld_threshold
312+
if (ld_satisfied && length(oligogenic_indices) > 1) {
313+
oligo_cor <- cor(G[, oligogenic_indices, drop = FALSE])
314+
high_ld_oligo <- which(abs(oligo_cor) >= current_ld_threshold & upper.tri(oligo_cor, diag = FALSE), arr.ind = TRUE)
315+
if (nrow(high_ld_oligo) > 0) ld_satisfied <- FALSE
316+
}
317+
318+
# Check between sparse and oligogenic: |r| < current_ld_threshold
319+
if (ld_satisfied && length(sparse_indices) > 0 && length(oligogenic_indices) > 0) {
320+
cross_cor <- cor(G[, sparse_indices, drop = FALSE], G[, oligogenic_indices, drop = FALSE])
321+
high_ld_cross <- which(abs(cross_cor) >= current_ld_threshold, arr.ind = TRUE)
322+
if (nrow(high_ld_cross) > 0) ld_satisfied <- FALSE
323+
}
324+
} else {
325+
# If not independent, accept immediately
326+
ld_satisfied <- TRUE
314327
}
315-
} else {
316-
# If not independent, accept
317-
ld_satisfied <- TRUE
328+
}
329+
330+
# If not satisfied at current threshold, increase and try again
331+
if (!ld_satisfied && independent) {
332+
current_ld_threshold <- current_ld_threshold + ld_step
318333
}
319334
}
320335

321-
# Warn if LD constraints not satisfied
336+
# Warn if LD constraints not satisfied even at maximum threshold
322337
if (independent && !ld_satisfied) {
323-
warning(paste0("Failed to satisfy LD constraints after ", max_attempts,
324-
" attempts. Returning last generated data with LD violations."))
338+
warning(paste0("Failed to satisfy LD constraints after ", total_attempts,
339+
" total attempts (up to ld_threshold = ", ld_max,
340+
"). Returning last generated data with LD violations."))
341+
} else if (independent && current_ld_threshold > ld_threshold) {
342+
message(paste0("LD constraints satisfied at ld_threshold = ", current_ld_threshold,
343+
" after ", total_attempts, " total attempts."))
325344
}
326345

327346
# Calculate causal indices using power-based identification (non-central chi-square with Bonferroni correction)
@@ -346,6 +365,7 @@ generate_cis_qtl_data <- function(G,
346365
oligogenic_indices = oligogenic_indices,
347366
infinitesimal_indices = infinitesimal_indices,
348367
residual_variance = var_epsilon,
349-
causal = causal_indices
368+
causal = causal_indices,
369+
ld_threshold_used = if (independent) current_ld_threshold else NA
350370
))
351371
}

man/generate_cis_qtl_data.Rd

Lines changed: 9 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)