@@ -138,9 +138,9 @@ qr_screen <- function(
138138 }
139139
140140 # Split variant_id and reorder columns
141- parsed <- parse_variant_id(df_result $ variant_id )
142141 df_result <- df_result %> %
143- mutate(chr = parsed $ chrom , pos = parsed $ pos , ref = parsed $ A2 , alt = parsed $ A1 )
142+ separate(variant_id , into = c(" chr" , " pos" , " ref" , " alt" ), sep = " [:_]" , remove = FALSE ) %> %
143+ mutate(chr = as.numeric(gsub(" chr" , " " , chr )), pos = as.numeric(pos ))
144144
145145 # Define the column order
146146 col_order <- c(" chr" , " pos" , " ref" , " alt" , " phenotype_id" , " variant_id" , " p_qr" )
@@ -353,10 +353,12 @@ perform_qr_analysis <- function(X, Y, Z = NULL, tau_values = seq(0.05, 0.95, by
353353 names_from = tau ,
354354 values_from = predictor_coef ,
355355 names_prefix = " coef_qr_"
356- )
357- parsed_ids <- parse_variant_id(result_table_wide $ variant_id )
358- result_table_wide <- result_table_wide %> %
359- mutate(chr = parsed_ids $ chrom , pos = parsed_ids $ pos , ref = parsed_ids $ A2 , alt = parsed_ids $ A1 ) %> %
356+ ) %> %
357+ separate(variant_id , into = c(" chr" , " pos" , " ref" , " alt" ), sep = " [:_]" , remove = FALSE , fill = " right" ) %> %
358+ mutate(
359+ chr = as.numeric(gsub(" chr" , " " , chr )),
360+ pos = as.numeric(pos )
361+ ) %> %
360362 select(chr , pos , ref , alt , everything())
361363
362364 # Return the wide format result table
@@ -618,14 +620,15 @@ remove_highcorr_snp <- function(X, problematic_cols, strategy = c("correlation",
618620# ' @param strategy The strategy for removing problematic columns ("variance", "correlation", or "response_correlation")
619621# ' @return A list containing the cleaned X matrix, beta matrix as twas weight, and pseudo R-squared values
620622# ' @noRd
621- calculate_qr_and_pseudo_R2 <- function (AssocData , tau.list , strategy = c(" correlation" , " variance" , " response_correlation" )) {
623+ calculate_qr_and_pseudo_R2 <- function (AssocData , tau.list , strategy = c(" correlation" , " variance" , " response_correlation" ),
624+ corr_thresholds = seq(0.75 , 0.5 , by = - 0.05 )) {
622625 # Make sure quantreg is installed
623626 if (! requireNamespace(" quantreg" , quietly = TRUE )) {
624627 stop(" To use this function, please install quantreg: https://cran.r-project.org/web/packages/quantreg/index.html" )
625628 }
626629 strategy <- match.arg(strategy )
627630 # Check and handle problematic columns affecting the full rank of the design matrix
628- AssocData $ X.filter <- check_remove_highcorr_snp(X = AssocData $ X.filter , C = AssocData $ C , strategy = strategy , response = AssocData $ Y )
631+ AssocData $ X.filter <- check_remove_highcorr_snp(X = AssocData $ X.filter , C = AssocData $ C , strategy = strategy , response = AssocData $ Y , corr_thresholds = corr_thresholds )
629632 snp_names <- colnames(AssocData $ X.filter )
630633 # Build the cleaned design matrix using the filtered X and unnamed C
631634
@@ -717,6 +720,7 @@ calculate_coef_heterogeneity <- function(rq_coef_result) {
717720# ' @param tau_range Numeric vector of tau values to use (default: seq(0.1, 0.9, by = 0.05))
718721# ' @param min_valid Minimum number of valid (non-NA) coefficients required (default: 10)
719722# ' @return A data frame with variant_id, xi, and xi_pval columns
723+ # ' @importFrom XICOR xicor
720724# ' @export
721725calculate_xi_correlation <- function (rq_coef_result , tau_range = seq(0.1 , 0.9 , by = 0.05 ), min_valid = 10 ) {
722726 if (! requireNamespace(" XICOR" , quietly = TRUE )) {
@@ -819,7 +823,9 @@ quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id
819823 quantile_twas_tau_list = seq(0.01 , 0.99 , by = 0.01 ),
820824 screen_threshold = 0.05 ,
821825 xi_tau_range = seq(0.1 , 0.9 , by = 0.05 ),
822- keep_variants = NULL ) {
826+ keep_variants = NULL ,
827+ marginal_beta_calculate = TRUE ,
828+ twas_weight_calculate = TRUE ) {
823829 # Step 1: vQTL
824830 # Step 1-1: Calculate vQTL rank scores
825831 message(" Step 0: Calculating vQTL rank scores for region " , region_id )
@@ -874,108 +880,156 @@ quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id
874880 message(" Skipping LD clumping." )
875881 }
876882
877- # Step 4: Fit marginal QR to get beta with SNPs for quantile_qtl_tau_list values
878- message(" Fitting marginal QR for selected SNPs..." )
879- X_for_qr <- if (ld_clumping ) x_clumped else X_filtered
880- if (! is.null(keep_variants )) {
881- variants_to_keep <- intersect(keep_variants , colnames(X_for_qr ))
882- if (length(variants_to_keep ) > 0 ) {
883- X_for_qr <- X_for_qr [, variants_to_keep , drop = FALSE ]
884- message(" Filtered to " , ncol(X_for_qr ), " variants from keep_variants list for QR analysis" )
885- } else {
886- message(" Warning: No variants from keep_variants found in selected SNPs, using all selected SNPs" )
883+ # Determine whether to skip marginal beta calculation:
884+ # - skip if marginal_beta_calculate = FALSE
885+ # - skip if keep_variants is provided but empty (length 0)
886+ skip_marginal_beta <- ! marginal_beta_calculate ||
887+ (! is.null(keep_variants ) && length(keep_variants ) == 0 )
888+
889+ if (! skip_marginal_beta ) {
890+ # Step 4: Fit marginal QR to get beta with SNPs for quantile_qtl_tau_list values
891+ message(" Fitting marginal QR for selected SNPs..." )
892+ X_for_qr <- if (ld_clumping ) x_clumped else X_filtered
893+ if (! is.null(keep_variants )) {
894+ variants_to_keep <- intersect(keep_variants , colnames(X_for_qr ))
895+ if (length(variants_to_keep ) > 0 ) {
896+ X_for_qr <- X_for_qr [, variants_to_keep , drop = FALSE ]
897+ message(" Filtered to " , ncol(X_for_qr ), " variants from keep_variants list for QR analysis" )
898+ } else {
899+ message(" Warning: No variants from keep_variants found in selected SNPs, using all selected SNPs" )
900+ }
887901 }
902+ rq_coef_result <- perform_qr_analysis(X = X_for_qr , Y = Y , Z = Z , tau_values = quantile_qtl_tau_list )
903+
904+ # Step 5: Heterogeneity calculation
905+ # Step 5-1: beta_heterogeneity index in marginal model
906+ message(" Marginal QR for selected SNPs completed. Calculating beta heterogeneity..." )
907+ beta_heterogeneity <- calculate_coef_heterogeneity(rq_coef_result )
908+ message(" Beta heterogeneity calculation completed." )
909+
910+ # Step 5-2: Calculate xi correlation (Chatterjee correlation test)
911+ message(" Calculating xi correlation for QR coefficients..." )
912+ xi_correlation <- calculate_xi_correlation(rq_coef_result , tau_range = xi_tau_range , min_valid = 10 )
913+ message(" Xi correlation calculation completed." )
914+
915+ # Merge xi and xi_pval into rq_coef_result (using left_join to preserve row order)
916+ rq_coef_result <- rq_coef_result %> %
917+ dplyr :: left_join(xi_correlation , by = " variant_id" )
918+
919+ results $ rq_coef_df <- rq_coef_result
920+ results $ beta_heterogeneity <- beta_heterogeneity
921+ results $ xi_correlation <- xi_correlation
922+ } else {
923+ message(" Skipping marginal beta calculation and heterogeneity analysis." )
888924 }
889- rq_coef_result <- perform_qr_analysis(X = X_for_qr , Y = Y , Z = Z , tau_values = quantile_qtl_tau_list )
890-
891- # Step 5: Heterogeneity calculation
892- # Step 5-1: beta_heterogeneity index in marginal model
893- message(" Marginal QR for selected SNPs completed. Calculating beta heterogeneity..." )
894- beta_heterogeneity <- calculate_coef_heterogeneity(rq_coef_result )
895- message(" Beta heterogeneity calculation completed." )
896-
897- # Step 5-2: Calculate xi correlation (Chatterjee correlation test)
898- message(" Calculating xi correlation for QR coefficients..." )
899- xi_correlation <- calculate_xi_correlation(rq_coef_result , tau_range = xi_tau_range , min_valid = 10 )
900- message(" Xi correlation calculation completed." )
901-
902- # Merge xi and xi_pval into rq_coef_result (using left_join to preserve row order)
903- rq_coef_result <- rq_coef_result %> %
904- dplyr :: left_join(xi_correlation , by = " variant_id" )
905-
906- results $ rq_coef_df <- rq_coef_result
907- results $ beta_heterogeneity <- beta_heterogeneity
908-
909- # Step 6: Optional LD panel filtering and MAF filtering from results of QR_screen
910- if (! is.null(ld_reference_meta_file )) {
911- message(" Starting LD panel filtering..." )
912- ld_result <- tryCatch(
913- {
914- variants_kept <- filter_variants_by_ld_reference(colnames(X_filtered ), ld_reference_meta_file )
915- if (length(variants_kept $ data ) == 0 ) NULL else variants_kept
916- },
917- error = function (e ) {
918- message(" Error in LD filtering for region " , region_id , " : " , e $ message )
919- NULL
925+
926+ if (twas_weight_calculate ) {
927+ # Step 6: Optional LD panel filtering and MAF filtering from results of QR_screen
928+ if (! is.null(ld_reference_meta_file )) {
929+ message(" Starting LD panel filtering..." )
930+ ld_result <- tryCatch(
931+ {
932+ variants_kept <- filter_variants_by_ld_reference(colnames(X_filtered ), ld_reference_meta_file )
933+ if (length(variants_kept $ data ) == 0 ) NULL else variants_kept
934+ },
935+ error = function (e ) {
936+ message(" Error in LD filtering for region " , region_id , " : " , e $ message )
937+ NULL
938+ }
939+ )
940+
941+ if (is.null(ld_result )) {
942+ results $ message <- paste0(" No SNPs left or error in LD filtering in region " , region_id )
943+ return (results )
920944 }
921- )
922945
923- if (is.null(ld_result )) {
924- results $ message <- paste0(" No SNPs left or error in LD filtering in region " , region_id )
925- return (results )
946+ X_filtered <- X_filtered [, ld_result $ data , drop = FALSE ]
947+ message(paste0(" Number of SNPs after LD filtering: " , ncol(X_filtered )))
948+
949+ # MAF filtering
950+ if (! is.null(maf )) {
951+ maf_filtered <- maf [colnames(X_filtered )] > twas_maf_cutoff
952+ X_filtered <- X_filtered [, maf_filtered , drop = FALSE ]
953+
954+ # Check if any SNPs are left after MAF filtering
955+ if (ncol(X_filtered ) == 0 ) {
956+ results $ message <- paste0(" No SNPs left after MAF filtering in region " , region_id )
957+ return (results )
958+ }
959+
960+ message(paste0(" Number of SNPs after MAF filtering: " , ncol(X_filtered )))
961+ }
926962 }
927963
928- X_filtered <- X_filtered [, ld_result $ data , drop = FALSE ]
929- message(paste0(" Number of SNPs after LD filtering: " , ncol(X_filtered )))
964+ # Step 7-9: Pre-filter by p_qr, corr_filter, then fit QR model
965+ # First attempt: p_qr < 0.05, corr_filter down to 0.5
966+ # If still singular: retry with p_qr < 0.01, corr_filter down to 0.2
967+ pqr_cutoffs <- list (
968+ list (pval = 0.05 , corr_thresholds = seq(0.75 , 0.5 , by = - 0.05 )),
969+ list (pval = 0.01 , corr_thresholds = seq(0.75 , 0.2 , by = - 0.05 ))
970+ )
930971
931- # MAF filtering
932- if (! is.null(maf )) {
933- maf_filtered <- maf [colnames(X_filtered )] > twas_maf_cutoff
934- X_filtered <- X_filtered [, maf_filtered , drop = FALSE ]
972+ qr_beta_R2_results <- NULL
973+ for (attempt in seq_along(pqr_cutoffs )) {
974+ pqr_cutoff <- pqr_cutoffs [[attempt ]]$ pval
975+ corr_thresholds <- pqr_cutoffs [[attempt ]]$ corr_thresholds
976+
977+ # Step 7: Pre-filter variants by raw p_qr
978+ raw_pvals <- p.screen $ pvec [colnames(X_filtered )]
979+ sig_raw <- which(raw_pvals < pqr_cutoff )
980+ if (length(sig_raw ) == 0 ) {
981+ message(" No variants with raw p_qr < " , pqr_cutoff , " in region " , region_id )
982+ next
983+ }
984+ if (length(sig_raw ) < ncol(X_filtered )) {
985+ message(" Pre-filtering variants by raw p_qr < " , pqr_cutoff , " : keeping " , length(sig_raw ), " out of " , ncol(X_filtered ), " variants" )
986+ X_pqr_filtered <- X_filtered [, sig_raw , drop = FALSE ]
987+ } else {
988+ X_pqr_filtered <- X_filtered
989+ }
935990
936- # Check if any SNPs are left after MAF filtering
937- if (ncol(X_filtered ) == 0 ) {
938- results $ message <- paste0(" No SNPs left after MAF filtering in region " , region_id )
939- return (results )
991+ # Step 8: Filter highly correlated SNPs
992+ message(" Filtering highly correlated SNPs..." )
993+ if (ncol(X_pqr_filtered ) > 1 ) {
994+ filtered <- corr_filter(X_pqr_filtered , 0.8 )
995+ X.filter <- filtered $ X.new
996+ } else {
997+ X.filter <- X_pqr_filtered
940998 }
941999
942- message(paste0(" Number of SNPs after MAF filtering: " , ncol(X_filtered )))
1000+ # Step 9: Fit QR and get twas weight and R2 for all taus
1001+ message(" Fitting full QR to calculate TWAS weights and pseudo R-squared values..." )
1002+ AssocData <- list (X = X , Y = Y , C = Z , X.filter = X.filter )
1003+ qr_beta_R2_results <- tryCatch(
1004+ {
1005+ res <- calculate_qr_and_pseudo_R2(AssocData , quantile_twas_tau_list , corr_thresholds = corr_thresholds )
1006+ res
1007+ },
1008+ error = function (e ) {
1009+ message(" Attempt " , attempt , " failed (p_qr < " , pqr_cutoff , " ): " , e $ message )
1010+ NULL
1011+ }
1012+ )
1013+
1014+ if (! is.null(qr_beta_R2_results )) break
9431015 }
944- }
9451016
946- # Step 7: Pre-filter variants by raw p_qr < 0.05 to reduce dimensionality
947- raw_pvals <- p.screen $ pvec [colnames(X_filtered )]
948- sig_raw <- which(raw_pvals < 0.05 )
949- if (length(sig_raw ) > 0 && length(sig_raw ) < ncol(X_filtered )) {
950- message(" Pre-filtering variants by raw p_qr < 0.05: keeping " , length(sig_raw ), " out of " , ncol(X_filtered ), " variants" )
951- X_filtered <- X_filtered [, sig_raw , drop = FALSE ]
952- } else if (length(sig_raw ) == 0 ) {
953- results $ message <- paste0(" No variants with raw p_qr < 0.05 in region " , region_id )
954- return (results )
955- }
1017+ if (is.null(qr_beta_R2_results )) {
1018+ results $ message <- paste0(" Failed to fit QR model after all attempts in region " , region_id )
1019+ return (results )
1020+ }
1021+
1022+ X.filter <- qr_beta_R2_results $ X.filter
1023+ message(" TWAS weights and pseudo R-squared calculations completed." )
9561024
957- # Step 8: Filter highly correlated SNPs
958- message( " Filtering highly correlated SNPs... " )
959- if (ncol( X_filtered ) > 1 ) {
960- filtered <- corr_filter( X_filtered , 0.8 )
961- X.filter <- filtered $ X.new
1025+ # Add additional results
1026+ results $ twas_variant_names <- colnames( X.filter )
1027+ results $ twas_weight <- qr_beta_R2_results $ beta_mat
1028+ results $ pseudo_R2 <- qr_beta_R2_results $ pseudo_R2
1029+ results $ quantile_twas_prediction <- X.filter %*% results $ twas_weight
9621030 } else {
963- X.filter <- X_filtered
964- results $ message <- paste0(" Skipping correlation filter because there is only one significant SNP in region " , region_id )
965- }
966-
967- # Step 9: Fit QR and get twas weight and R2 for all taus
968- message(" Filter highly correlated SNPs completed. Fitting full QR to calculate TWAS weights and pseudo R-squared values..." )
969- AssocData <- list (X = X , Y = Y , C = Z , X.filter = X.filter )
970- qr_beta_R2_results <- calculate_qr_and_pseudo_R2(AssocData , quantile_twas_tau_list )
971- X.filter <- qr_beta_R2_results $ X.filter
972- message(" TWAS weights and pseudo R-squared calculations completed." )
973-
974- # Add additional results
975- results $ twas_variant_names <- colnames(X.filter )
976- results $ twas_weight <- qr_beta_R2_results $ beta_mat
977- results $ pseudo_R2 <- qr_beta_R2_results $ pseudo_R2
978- results $ quantile_twas_prediction <- X.filter %*% results $ twas_weight
1031+ message(" Skipping TWAS weight calculation." )
1032+ }
9791033
9801034 return (results )
9811035}
0 commit comments