Skip to content

Commit 8aaaee1

Browse files
committed
qtwas weights modification
1 parent a3936d6 commit 8aaaee1

1 file changed

Lines changed: 151 additions & 97 deletions

File tree

R/quantile_twas_weight.R

Lines changed: 151 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,9 @@ qr_screen <- function(
138138
}
139139

140140
# Split variant_id and reorder columns
141-
parsed <- parse_variant_id(df_result$variant_id)
142141
df_result <- df_result %>%
143-
mutate(chr = parsed$chrom, pos = parsed$pos, ref = parsed$A2, alt = parsed$A1)
142+
separate(variant_id, into = c("chr", "pos", "ref", "alt"), sep = "[:_]", remove = FALSE) %>%
143+
mutate(chr = as.numeric(gsub("chr", "", chr)), pos = as.numeric(pos))
144144

145145
# Define the column order
146146
col_order <- c("chr", "pos", "ref", "alt", "phenotype_id", "variant_id", "p_qr")
@@ -353,10 +353,12 @@ perform_qr_analysis <- function(X, Y, Z = NULL, tau_values = seq(0.05, 0.95, by
353353
names_from = tau,
354354
values_from = predictor_coef,
355355
names_prefix = "coef_qr_"
356-
)
357-
parsed_ids <- parse_variant_id(result_table_wide$variant_id)
358-
result_table_wide <- result_table_wide %>%
359-
mutate(chr = parsed_ids$chrom, pos = parsed_ids$pos, ref = parsed_ids$A2, alt = parsed_ids$A1) %>%
356+
) %>%
357+
separate(variant_id, into = c("chr", "pos", "ref", "alt"), sep = "[:_]", remove = FALSE, fill = "right") %>%
358+
mutate(
359+
chr = as.numeric(gsub("chr", "", chr)),
360+
pos = as.numeric(pos)
361+
) %>%
360362
select(chr, pos, ref, alt, everything())
361363

362364
# Return the wide format result table
@@ -618,14 +620,15 @@ remove_highcorr_snp <- function(X, problematic_cols, strategy = c("correlation",
618620
#' @param strategy The strategy for removing problematic columns ("variance", "correlation", or "response_correlation")
619621
#' @return A list containing the cleaned X matrix, beta matrix as twas weight, and pseudo R-squared values
620622
#' @noRd
621-
calculate_qr_and_pseudo_R2 <- function(AssocData, tau.list, strategy = c("correlation", "variance", "response_correlation")) {
623+
calculate_qr_and_pseudo_R2 <- function(AssocData, tau.list, strategy = c("correlation", "variance", "response_correlation"),
624+
corr_thresholds = seq(0.75, 0.5, by = -0.05)) {
622625
# Make sure quantreg is installed
623626
if (!requireNamespace("quantreg", quietly = TRUE)) {
624627
stop("To use this function, please install quantreg: https://cran.r-project.org/web/packages/quantreg/index.html")
625628
}
626629
strategy <- match.arg(strategy)
627630
# Check and handle problematic columns affecting the full rank of the design matrix
628-
AssocData$X.filter <- check_remove_highcorr_snp(X = AssocData$X.filter, C = AssocData$C, strategy = strategy, response = AssocData$Y)
631+
AssocData$X.filter <- check_remove_highcorr_snp(X = AssocData$X.filter, C = AssocData$C, strategy = strategy, response = AssocData$Y, corr_thresholds = corr_thresholds)
629632
snp_names <- colnames(AssocData$X.filter)
630633
# Build the cleaned design matrix using the filtered X and unnamed C
631634

@@ -717,6 +720,7 @@ calculate_coef_heterogeneity <- function(rq_coef_result) {
717720
#' @param tau_range Numeric vector of tau values to use (default: seq(0.1, 0.9, by = 0.05))
718721
#' @param min_valid Minimum number of valid (non-NA) coefficients required (default: 10)
719722
#' @return A data frame with variant_id, xi, and xi_pval columns
723+
#' @importFrom XICOR xicor
720724
#' @export
721725
calculate_xi_correlation <- function(rq_coef_result, tau_range = seq(0.1, 0.9, by = 0.05), min_valid = 10) {
722726
if (!requireNamespace("XICOR", quietly = TRUE)) {
@@ -819,7 +823,9 @@ quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id
819823
quantile_twas_tau_list = seq(0.01, 0.99, by = 0.01),
820824
screen_threshold = 0.05,
821825
xi_tau_range = seq(0.1, 0.9, by = 0.05),
822-
keep_variants = NULL) {
826+
keep_variants = NULL,
827+
marginal_beta_calculate = TRUE,
828+
twas_weight_calculate = TRUE) {
823829
# Step 1: vQTL
824830
# Step 1-1: Calculate vQTL rank scores
825831
message("Step 0: Calculating vQTL rank scores for region ", region_id)
@@ -874,108 +880,156 @@ quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id
874880
message("Skipping LD clumping.")
875881
}
876882

877-
# Step 4: Fit marginal QR to get beta with SNPs for quantile_qtl_tau_list values
878-
message("Fitting marginal QR for selected SNPs...")
879-
X_for_qr <- if (ld_clumping) x_clumped else X_filtered
880-
if (!is.null(keep_variants)) {
881-
variants_to_keep <- intersect(keep_variants, colnames(X_for_qr))
882-
if (length(variants_to_keep) > 0) {
883-
X_for_qr <- X_for_qr[, variants_to_keep, drop = FALSE]
884-
message("Filtered to ", ncol(X_for_qr), " variants from keep_variants list for QR analysis")
885-
} else {
886-
message("Warning: No variants from keep_variants found in selected SNPs, using all selected SNPs")
883+
# Determine whether to skip marginal beta calculation:
884+
# - skip if marginal_beta_calculate = FALSE
885+
# - skip if keep_variants is provided but empty (length 0)
886+
skip_marginal_beta <- !marginal_beta_calculate ||
887+
(!is.null(keep_variants) && length(keep_variants) == 0)
888+
889+
if (!skip_marginal_beta) {
890+
# Step 4: Fit marginal QR to get beta with SNPs for quantile_qtl_tau_list values
891+
message("Fitting marginal QR for selected SNPs...")
892+
X_for_qr <- if (ld_clumping) x_clumped else X_filtered
893+
if (!is.null(keep_variants)) {
894+
variants_to_keep <- intersect(keep_variants, colnames(X_for_qr))
895+
if (length(variants_to_keep) > 0) {
896+
X_for_qr <- X_for_qr[, variants_to_keep, drop = FALSE]
897+
message("Filtered to ", ncol(X_for_qr), " variants from keep_variants list for QR analysis")
898+
} else {
899+
message("Warning: No variants from keep_variants found in selected SNPs, using all selected SNPs")
900+
}
887901
}
902+
rq_coef_result <- perform_qr_analysis(X = X_for_qr, Y = Y, Z = Z, tau_values = quantile_qtl_tau_list)
903+
904+
# Step 5: Heterogeneity calculation
905+
# Step 5-1: beta_heterogeneity index in marginal model
906+
message("Marginal QR for selected SNPs completed. Calculating beta heterogeneity...")
907+
beta_heterogeneity <- calculate_coef_heterogeneity(rq_coef_result)
908+
message("Beta heterogeneity calculation completed.")
909+
910+
# Step 5-2: Calculate xi correlation (Chatterjee correlation test)
911+
message("Calculating xi correlation for QR coefficients...")
912+
xi_correlation <- calculate_xi_correlation(rq_coef_result, tau_range = xi_tau_range, min_valid = 10)
913+
message("Xi correlation calculation completed.")
914+
915+
# Merge xi and xi_pval into rq_coef_result (using left_join to preserve row order)
916+
rq_coef_result <- rq_coef_result %>%
917+
dplyr::left_join(xi_correlation, by = "variant_id")
918+
919+
results$rq_coef_df <- rq_coef_result
920+
results$beta_heterogeneity <- beta_heterogeneity
921+
results$xi_correlation <- xi_correlation
922+
} else {
923+
message("Skipping marginal beta calculation and heterogeneity analysis.")
888924
}
889-
rq_coef_result <- perform_qr_analysis(X = X_for_qr, Y = Y, Z = Z, tau_values = quantile_qtl_tau_list)
890-
891-
# Step 5: Heterogeneity calculation
892-
# Step 5-1: beta_heterogeneity index in marginal model
893-
message("Marginal QR for selected SNPs completed. Calculating beta heterogeneity...")
894-
beta_heterogeneity <- calculate_coef_heterogeneity(rq_coef_result)
895-
message("Beta heterogeneity calculation completed.")
896-
897-
# Step 5-2: Calculate xi correlation (Chatterjee correlation test)
898-
message("Calculating xi correlation for QR coefficients...")
899-
xi_correlation <- calculate_xi_correlation(rq_coef_result, tau_range = xi_tau_range, min_valid = 10)
900-
message("Xi correlation calculation completed.")
901-
902-
# Merge xi and xi_pval into rq_coef_result (using left_join to preserve row order)
903-
rq_coef_result <- rq_coef_result %>%
904-
dplyr::left_join(xi_correlation, by = "variant_id")
905-
906-
results$rq_coef_df <- rq_coef_result
907-
results$beta_heterogeneity <- beta_heterogeneity
908-
909-
# Step 6: Optional LD panel filtering and MAF filtering from results of QR_screen
910-
if (!is.null(ld_reference_meta_file)) {
911-
message("Starting LD panel filtering...")
912-
ld_result <- tryCatch(
913-
{
914-
variants_kept <- filter_variants_by_ld_reference(colnames(X_filtered), ld_reference_meta_file)
915-
if (length(variants_kept$data) == 0) NULL else variants_kept
916-
},
917-
error = function(e) {
918-
message("Error in LD filtering for region ", region_id, ": ", e$message)
919-
NULL
925+
926+
if (twas_weight_calculate) {
927+
# Step 6: Optional LD panel filtering and MAF filtering from results of QR_screen
928+
if (!is.null(ld_reference_meta_file)) {
929+
message("Starting LD panel filtering...")
930+
ld_result <- tryCatch(
931+
{
932+
variants_kept <- filter_variants_by_ld_reference(colnames(X_filtered), ld_reference_meta_file)
933+
if (length(variants_kept$data) == 0) NULL else variants_kept
934+
},
935+
error = function(e) {
936+
message("Error in LD filtering for region ", region_id, ": ", e$message)
937+
NULL
938+
}
939+
)
940+
941+
if (is.null(ld_result)) {
942+
results$message <- paste0("No SNPs left or error in LD filtering in region ", region_id)
943+
return(results)
920944
}
921-
)
922945

923-
if (is.null(ld_result)) {
924-
results$message <- paste0("No SNPs left or error in LD filtering in region ", region_id)
925-
return(results)
946+
X_filtered <- X_filtered[, ld_result$data, drop = FALSE]
947+
message(paste0("Number of SNPs after LD filtering: ", ncol(X_filtered)))
948+
949+
# MAF filtering
950+
if (!is.null(maf)) {
951+
maf_filtered <- maf[colnames(X_filtered)] > twas_maf_cutoff
952+
X_filtered <- X_filtered[, maf_filtered, drop = FALSE]
953+
954+
# Check if any SNPs are left after MAF filtering
955+
if (ncol(X_filtered) == 0) {
956+
results$message <- paste0("No SNPs left after MAF filtering in region ", region_id)
957+
return(results)
958+
}
959+
960+
message(paste0("Number of SNPs after MAF filtering: ", ncol(X_filtered)))
961+
}
926962
}
927963

928-
X_filtered <- X_filtered[, ld_result$data, drop = FALSE]
929-
message(paste0("Number of SNPs after LD filtering: ", ncol(X_filtered)))
964+
# Step 7-9: Pre-filter by p_qr, corr_filter, then fit QR model
965+
# First attempt: p_qr < 0.05, corr_filter down to 0.5
966+
# If still singular: retry with p_qr < 0.01, corr_filter down to 0.2
967+
pqr_cutoffs <- list(
968+
list(pval = 0.05, corr_thresholds = seq(0.75, 0.5, by = -0.05)),
969+
list(pval = 0.01, corr_thresholds = seq(0.75, 0.2, by = -0.05))
970+
)
930971

931-
# MAF filtering
932-
if (!is.null(maf)) {
933-
maf_filtered <- maf[colnames(X_filtered)] > twas_maf_cutoff
934-
X_filtered <- X_filtered[, maf_filtered, drop = FALSE]
972+
qr_beta_R2_results <- NULL
973+
for (attempt in seq_along(pqr_cutoffs)) {
974+
pqr_cutoff <- pqr_cutoffs[[attempt]]$pval
975+
corr_thresholds <- pqr_cutoffs[[attempt]]$corr_thresholds
976+
977+
# Step 7: Pre-filter variants by raw p_qr
978+
raw_pvals <- p.screen$pvec[colnames(X_filtered)]
979+
sig_raw <- which(raw_pvals < pqr_cutoff)
980+
if (length(sig_raw) == 0) {
981+
message("No variants with raw p_qr < ", pqr_cutoff, " in region ", region_id)
982+
next
983+
}
984+
if (length(sig_raw) < ncol(X_filtered)) {
985+
message("Pre-filtering variants by raw p_qr < ", pqr_cutoff, ": keeping ", length(sig_raw), " out of ", ncol(X_filtered), " variants")
986+
X_pqr_filtered <- X_filtered[, sig_raw, drop = FALSE]
987+
} else {
988+
X_pqr_filtered <- X_filtered
989+
}
935990

936-
# Check if any SNPs are left after MAF filtering
937-
if (ncol(X_filtered) == 0) {
938-
results$message <- paste0("No SNPs left after MAF filtering in region ", region_id)
939-
return(results)
991+
# Step 8: Filter highly correlated SNPs
992+
message("Filtering highly correlated SNPs...")
993+
if (ncol(X_pqr_filtered) > 1) {
994+
filtered <- corr_filter(X_pqr_filtered, 0.8)
995+
X.filter <- filtered$X.new
996+
} else {
997+
X.filter <- X_pqr_filtered
940998
}
941999

942-
message(paste0("Number of SNPs after MAF filtering: ", ncol(X_filtered)))
1000+
# Step 9: Fit QR and get twas weight and R2 for all taus
1001+
message("Fitting full QR to calculate TWAS weights and pseudo R-squared values...")
1002+
AssocData <- list(X = X, Y = Y, C = Z, X.filter = X.filter)
1003+
qr_beta_R2_results <- tryCatch(
1004+
{
1005+
res <- calculate_qr_and_pseudo_R2(AssocData, quantile_twas_tau_list, corr_thresholds = corr_thresholds)
1006+
res
1007+
},
1008+
error = function(e) {
1009+
message("Attempt ", attempt, " failed (p_qr < ", pqr_cutoff, "): ", e$message)
1010+
NULL
1011+
}
1012+
)
1013+
1014+
if (!is.null(qr_beta_R2_results)) break
9431015
}
944-
}
9451016

946-
# Step 7: Pre-filter variants by raw p_qr < 0.05 to reduce dimensionality
947-
raw_pvals <- p.screen$pvec[colnames(X_filtered)]
948-
sig_raw <- which(raw_pvals < 0.05)
949-
if (length(sig_raw) > 0 && length(sig_raw) < ncol(X_filtered)) {
950-
message("Pre-filtering variants by raw p_qr < 0.05: keeping ", length(sig_raw), " out of ", ncol(X_filtered), " variants")
951-
X_filtered <- X_filtered[, sig_raw, drop = FALSE]
952-
} else if (length(sig_raw) == 0) {
953-
results$message <- paste0("No variants with raw p_qr < 0.05 in region ", region_id)
954-
return(results)
955-
}
1017+
if (is.null(qr_beta_R2_results)) {
1018+
results$message <- paste0("Failed to fit QR model after all attempts in region ", region_id)
1019+
return(results)
1020+
}
1021+
1022+
X.filter <- qr_beta_R2_results$X.filter
1023+
message("TWAS weights and pseudo R-squared calculations completed.")
9561024

957-
# Step 8: Filter highly correlated SNPs
958-
message("Filtering highly correlated SNPs...")
959-
if (ncol(X_filtered) > 1) {
960-
filtered <- corr_filter(X_filtered, 0.8)
961-
X.filter <- filtered$X.new
1025+
# Add additional results
1026+
results$twas_variant_names <- colnames(X.filter)
1027+
results$twas_weight <- qr_beta_R2_results$beta_mat
1028+
results$pseudo_R2 <- qr_beta_R2_results$pseudo_R2
1029+
results$quantile_twas_prediction <- X.filter %*% results$twas_weight
9621030
} else {
963-
X.filter <- X_filtered
964-
results$message <- paste0("Skipping correlation filter because there is only one significant SNP in region ", region_id)
965-
}
966-
967-
# Step 9: Fit QR and get twas weight and R2 for all taus
968-
message("Filter highly correlated SNPs completed. Fitting full QR to calculate TWAS weights and pseudo R-squared values...")
969-
AssocData <- list(X = X, Y = Y, C = Z, X.filter = X.filter)
970-
qr_beta_R2_results <- calculate_qr_and_pseudo_R2(AssocData, quantile_twas_tau_list)
971-
X.filter <- qr_beta_R2_results$X.filter
972-
message("TWAS weights and pseudo R-squared calculations completed.")
973-
974-
# Add additional results
975-
results$twas_variant_names <- colnames(X.filter)
976-
results$twas_weight <- qr_beta_R2_results$beta_mat
977-
results$pseudo_R2 <- qr_beta_R2_results$pseudo_R2
978-
results$quantile_twas_prediction <- X.filter %*% results$twas_weight
1031+
message("Skipping TWAS weight calculation.")
1032+
}
9791033

9801034
return(results)
9811035
}

0 commit comments

Comments
 (0)