mattlee821
diff --git a/‎DESCRIPTION‎
Lines changed: 0 additions & 1 deletion b/‎DESCRIPTION‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎NAMESPACE‎
Lines changed: 0 additions & 2 deletions b/‎NAMESPACE‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎R/TwoSampleMR_functions.R‎
Lines changed: 77 additions & 74 deletions b/‎R/TwoSampleMR_functions.R‎
Lines changed: 77 additions & 74 deletions
@@ -13,7 +13,6 @@ RoxygenNote: 7.3.3
 Imports:
   dplyr, forcats, ggplot2, tibble, tidyr, purrr,
   plyr,
-  magrittr,
   data.table,
   here,
   rmarkdown,
 
@@ -1,6 +1,5 @@
 # Generated by roxygen2: do not edit by hand
 
-export("%>%")
 export(alignment_check)
 export(biomaRt_getBM_batch)
 export(blank_plot)
@@ -49,4 +48,3 @@ export(remove_duplicate_SNP)
 export(remove_nearby_positions)
 export(replace_na_with_mean)
 export(susieR_cs_table)
-importFrom(magrittr,"%>%")
@@ -83,9 +83,9 @@ format_data <- function(dat, type = "exposure", snps = NULL, header = TRUE,
   #   return(x)
   # })
 
-  dat <- dat %>%
-    dplyr::group_by(!!rlang::sym(type)) %>%
-    dplyr::distinct(!!rlang::sym(snp_col), .keep_all = TRUE) %>%
+  dat <- dat |>
+    dplyr::group_by(!!rlang::sym(type)) |>
+    dplyr::distinct(!!rlang::sym(snp_col), .keep_all = TRUE) |>
     dplyr::ungroup()
 
   mr_cols_required <- c(snp_col, beta_col, se_col, effect_allele_col)
@@ -402,9 +402,9 @@ format_data2 <- function(dat, type = "exposure", snps = NULL, header = TRUE,
   #   return(x)
   # })
 
-  dat <- dat %>%
-    dplyr::group_by(!!rlang::sym(type)) %>%
-    dplyr::distinct(!!rlang::sym(snp_col), .keep_all = TRUE) %>%
+  dat <- dat |>
+    dplyr::group_by(!!rlang::sym(type)) |>
+    dplyr::distinct(!!rlang::sym(snp_col), .keep_all = TRUE) |>
     dplyr::ungroup()
 
   mr_cols_required <- c(snp_col, beta_col, se_col, effect_allele_col)
@@ -788,7 +788,7 @@ proxy_search <- function(data_exposure, data_outcome, data_outcome_path, data_re
     out = tempfile()
   )
   ## format proxy data: change column order and names, add proxy.outcome = TRUE
-  proxies <- proxies %>%
+  proxies <- proxies |>
     dplyr::select(
       target_snp.outcome = SNP_A,
       proxy_snp.outcome = SNP_B,
@@ -797,15 +797,15 @@ proxy_search <- function(data_exposure, data_outcome, data_outcome_path, data_re
       proxy_a1.outcome = B1,
       proxy_a2.outcome = B2,
       R
-    ) %>%
-    dplyr::mutate(proxy.outcome = TRUE) %>%
+    ) |>
+    dplyr::mutate(proxy.outcome = TRUE) |>
     dplyr::select(proxy.outcome, dplyr::everything())
   message(paste0("## proxy-SNP(s) for ", length(unique(proxies$target_snp.outcome)), " missing-SNP(s) found; ", "proxy-SNP(s) for ", length(unique(as.factor(snps_reference))) - length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) not available (e.g., no proxy-SNP or r2 < provided)"))
 
   # extract proxies from outcome ====
   message(paste0("# 3. extracting proxy-SNP(s) from outcome"))
-  proxy_snps <- proxies %>% # select unique proxy SNPs to extract
-    dplyr::distinct(proxy_snp.outcome) %>%
+  proxy_snps <- proxies |> # select unique proxy SNPs to extract
+    dplyr::distinct(proxy_snp.outcome) |>
     dplyr::pull(proxy_snp.outcome)
   data_outcome_proxies <- TwoSampleMR::read_outcome_data(
     filename = data_outcome_path,
@@ -824,8 +824,8 @@ proxy_search <- function(data_exposure, data_outcome, data_outcome_path, data_re
     chr_col = outcome_CHR,
     pos_col = outcome_POS
   )
-  data_outcome_proxies <- data_outcome_proxies %>%
-    dplyr::left_join(proxies, by = c("SNP" = "proxy_snp.outcome")) %>%
+  data_outcome_proxies <- data_outcome_proxies |>
+    dplyr::left_join(proxies, by = c("SNP" = "proxy_snp.outcome")) |>
     dplyr::mutate(
       proxy_snp.outcome = SNP, # add proxy_snp.outcome back in (SNP is the proxies found in the outcome)
       SNP = target_snp.outcome, # make SNP the original (i.e., missing) SNP
@@ -834,10 +834,10 @@ proxy_search <- function(data_exposure, data_outcome, data_outcome_path, data_re
   message(paste0("## proxy-SNP(s) for ", length(unique(as.factor(data_outcome_proxies$target_snp.outcome))), " of ", length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) extracted"))
 
   # select proxy-SNP(s) with the highest R2 ====
-  data_outcome_proxies <- data_outcome_proxies %>%
-    dplyr::group_by(target_snp.outcome) %>%
-    dplyr::filter(R == max(R)) %>%
-    dplyr::slice(1) %>%
+  data_outcome_proxies <- data_outcome_proxies |>
+    dplyr::group_by(target_snp.outcome) |>
+    dplyr::filter(R == max(R)) |>
+    dplyr::slice(1) |>
     dplyr::select(-R)
 
   ## Bind rows of data_outcome with data_outcome_proxies
@@ -912,7 +912,7 @@ proxy_search_split <- function(data_exposure, data_outcome, data_outcome_path, d
       out = tempfile()
     )
     ## format proxy data: change column order and names, add proxy.outcome = TRUE
-    proxies <- proxies %>%
+    proxies <- proxies |>
       dplyr::select(
         target_snp.outcome = SNP_A,
         proxy_snp.outcome = SNP_B,
@@ -921,18 +921,18 @@ proxy_search_split <- function(data_exposure, data_outcome, data_outcome_path, d
         proxy_a1.outcome = B1,
         proxy_a2.outcome = B2,
         R
-      ) %>%
+      ) |>
       dplyr::mutate(
         proxy.outcome = TRUE,
         SNP = proxy_snp.outcome
-      ) %>%
+      ) |>
       dplyr::select(proxy.outcome, dplyr::everything())
     message(paste0("## proxy-SNP(s) for ", length(unique(proxies$target_snp.outcome)), " missing-SNP(s) found; ", "proxy-SNP(s) for ", length(unique(as.factor(snps_reference))) - length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) not available (e.g., no proxy-SNP or r2 < provided)"))
 
     # extract proxies from outcome ====
     message(paste0("# 3. extracting proxy-SNP(s) from outcome"))
-    proxy_snps <- proxies %>% # select unique proxy SNPs to extract
-      dplyr::distinct(proxy_snp.outcome) %>%
+    proxy_snps <- proxies |> # select unique proxy SNPs to extract
+      dplyr::distinct(proxy_snp.outcome) |>
       dplyr::pull(proxy_snp.outcome)
     data_outcome_proxies <- TwoSampleMR::read_outcome_data(
       filename = data_outcome_path,
@@ -955,10 +955,10 @@ proxy_search_split <- function(data_exposure, data_outcome, data_outcome_path, d
     message(paste0("## proxy-SNP(s) for ", length(unique(as.factor(data_outcome_proxies$target_snp.outcome))), " of ", length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) extracted"))
 
     # select proxy-SNP(s) with the highest R2 ====
-    data_outcome_proxies <- data_outcome_proxies %>%
-      dplyr::group_by(target_snp.outcome) %>%
-      dplyr::filter(R == max(R)) %>%
-      dplyr::slice(1) %>%
+    data_outcome_proxies <- data_outcome_proxies |>
+      dplyr::group_by(target_snp.outcome) |>
+      dplyr::filter(R == max(R)) |>
+      dplyr::slice(1) |>
       dplyr::select(-R)
 
     ## Bind rows of data_outcome with data_outcome_proxies
@@ -1051,7 +1051,7 @@ proxy_search_DECODE <- function(data_exposure, data_outcome, data_outcome_path,
     out = tempfile()
   )
   ## format proxy data: change column order and names, add proxy.outcome = TRUE
-  proxies <- proxies %>%
+  proxies <- proxies |>
     dplyr::select(
       target_snp.outcome = SNP_A,
       proxy_snp.outcome = SNP_B,
@@ -1060,15 +1060,15 @@ proxy_search_DECODE <- function(data_exposure, data_outcome, data_outcome_path,
       proxy_a1.outcome = B1,
       proxy_a2.outcome = B2,
       R
-    ) %>%
-    dplyr::mutate(proxy.outcome = TRUE) %>%
+    ) |>
+    dplyr::mutate(proxy.outcome = TRUE) |>
     dplyr::select(proxy.outcome, dplyr::everything())
   message(paste0("## proxy-SNP(s) for ", length(unique(proxies$target_snp.outcome)), " missing-SNP(s) found; ", "proxy-SNP(s) for ", length(unique(as.factor(snps_reference))) - length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) not available (e.g., no proxy-SNP or r2 < provided)"))
 
   # extract proxies from outcome ====
   message(paste0("# 3. extracting proxy-SNP(s) from outcome"))
-  proxy_snps <- proxies %>%
-    dplyr::distinct(proxy_snp.outcome) %>%
+  proxy_snps <- proxies |>
+    dplyr::distinct(proxy_snp.outcome) |>
     dplyr::pull(proxy_snp.outcome)
   data_outcome_proxies <- data.table::fread(data_outcome_path,
     header = FALSE, sep = "\t",
@@ -1079,9 +1079,9 @@ proxy_search_DECODE <- function(data_exposure, data_outcome, data_outcome_path,
     )
   )
 
-  data_outcome_proxies <- data_outcome_proxies %>%
-    # dplyr::filter(SNP %in% proxy_snps) %>%
-    dplyr::left_join(proxies, by = c("SNP" = "proxy_snp.outcome")) %>%
+  data_outcome_proxies <- data_outcome_proxies |>
+    # dplyr::filter(SNP %in% proxy_snps) |>
+    dplyr::left_join(proxies, by = c("SNP" = "proxy_snp.outcome")) |>
     dplyr::mutate(
       proxy_snp.outcome = SNP, # add proxy_snp.outcome back in (SNP is the proxies found in the outcome)
       SNP = target_snp.outcome, # make SNP the original (i.e., missing) SNP
@@ -1090,10 +1090,10 @@ proxy_search_DECODE <- function(data_exposure, data_outcome, data_outcome_path,
   message(paste0("## proxy-SNP(s) for ", length(unique(as.factor(data_outcome_proxies$target_snp.outcome))), " of ", length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) extracted"))
 
   # select proxy-SNP(s) with the highest R2 ====
-  data_outcome_proxies <- data_outcome_proxies %>%
-    dplyr::group_by(target_snp.outcome) %>%
-    dplyr::filter(R == max(R)) %>%
-    dplyr::slice(1) %>%
+  data_outcome_proxies <- data_outcome_proxies |>
+    dplyr::group_by(target_snp.outcome) |>
+    dplyr::filter(R == max(R)) |>
+    dplyr::slice(1) |>
     dplyr::select(-R)
 
   # format data and bind with original outcome data ====
@@ -1183,7 +1183,7 @@ proxy_search_UKB <- function(data_exposure, data_outcome, data_outcome_path, dat
     out = tempfile()
   )
   ## format proxy data: change column order and names, add proxy.outcome = TRUE
-  proxies <- proxies %>%
+  proxies <- proxies |>
     dplyr::select(
       target_snp.outcome = SNP_A,
       proxy_snp.outcome = SNP_B,
@@ -1192,29 +1192,29 @@ proxy_search_UKB <- function(data_exposure, data_outcome, data_outcome_path, dat
       proxy_a1.outcome = B1,
       proxy_a2.outcome = B2,
       R
-    ) %>%
-    dplyr::mutate(proxy.outcome = TRUE) %>%
+    ) |>
+    dplyr::mutate(proxy.outcome = TRUE) |>
     dplyr::select(proxy.outcome, dplyr::everything())
   message(paste0("## proxy-SNP(s) for ", length(unique(proxies$target_snp.outcome)), " missing-SNP(s) found; ", "proxy-SNP(s) for ", length(unique(as.factor(snps_reference))) - length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) not available (e.g., no proxy-SNP or r2 < provided)"))
 
   # extract proxies from outcome ====
   message(paste0("# 3. extracting proxy-SNP(s) from outcome"))
-  proxy_snps <- proxies %>%
-    dplyr::distinct(proxy_snp.outcome) %>%
+  proxy_snps <- proxies |>
+    dplyr::distinct(proxy_snp.outcome) |>
     dplyr::pull(proxy_snp.outcome)
   data_outcome_proxies <- data.table::fread(data_outcome_path,
     header = FALSE,
     col.names = c("ID", "REF", "ALT", "SNP", "POS19", "POS38")
   )
-  data_outcome_proxies <- data_outcome_proxies %>%
+  data_outcome_proxies <- data_outcome_proxies |>
     dplyr::filter(SNP %in% proxy_snps)
   data_outcome_proxies <- tidyr::separate(data_outcome_proxies, ID, into = c(
     "chr.outcome", "pos.outcome", "ID", "other_allele.outcome", "effect_allele.outcome", "eaf.outcome",
     "INFO", "samplesize.outcome", "TEST", "beta.outcome", "se.outcome", "CHISQ", "LOG10P", "EXTRA", "outcome", "ID2"
   ), sep = " ")
-  data_outcome_proxies <- data_outcome_proxies %>%
-    # dplyr::filter(SNP %in% proxy_snps) %>%
-    dplyr::left_join(proxies, by = c("SNP" = "proxy_snp.outcome")) %>%
+  data_outcome_proxies <- data_outcome_proxies |>
+    # dplyr::filter(SNP %in% proxy_snps) |>
+    dplyr::left_join(proxies, by = c("SNP" = "proxy_snp.outcome")) |>
     dplyr::mutate(
       proxy_snp.outcome = SNP, # add proxy_snp.outcome back in (SNP is the proxies found in the outcome)
       SNP = target_snp.outcome, # make SNP the original (i.e., missing) SNP
@@ -1223,10 +1223,10 @@ proxy_search_UKB <- function(data_exposure, data_outcome, data_outcome_path, dat
   message(paste0("## proxy-SNP(s) for ", length(unique(as.factor(data_outcome_proxies$target_snp.outcome))), " of ", length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) extracted"))
 
   # select proxy-SNP(s) with the highest R2 ====
-  data_outcome_proxies <- data_outcome_proxies %>%
-    dplyr::group_by(target_snp.outcome) %>%
-    dplyr::filter(R == max(R)) %>%
-    dplyr::slice(1) %>%
+  data_outcome_proxies <- data_outcome_proxies |>
+    dplyr::group_by(target_snp.outcome) |>
+    dplyr::filter(R == max(R)) |>
+    dplyr::slice(1) |>
     dplyr::select(-R)
 
   # format data and bind with original outcome data ====
@@ -1283,18 +1283,21 @@ calculate_r2 <- function(data) {
 #' @return Data frame with fstat values added.
 #' @export
 calculate_fstat_from_r2 <- function(data, grouping_column) {
+  # Capture the grouping column name
+  group_col_name <- rlang::as_name(rlang::enquo(grouping_column))
+
   # Group by the specified column and calculate fstat for each group
-  result <- data %>%
-    dplyr::group_by({{ grouping_column }}) %>%
+  result <- data |>
+    dplyr::group_by(!!rlang::sym(group_col_name)) |>
     dplyr::mutate(
       k = dplyr::n_distinct(SNP), # Number of unique SNPs
       fstat_from_r2 = r2 * (samplesize.exposure - 1 - k) / ((1 - r2) * k)
-    ) %>%
-    dplyr::ungroup() %>%
-    dplyr::select(-k) %>% # Remove the auxiliary column
-    dplyr::group_by({{ grouping_column }}) %>%
-    dplyr::summarize(mean_fstat_from_r2 = mean(fstat_from_r2, na.rm = TRUE)) %>%
-    dplyr::left_join(data, by = {{ grouping_column }})
+    ) |>
+    dplyr::ungroup() |>
+    dplyr::select(-k) |> # Remove the auxiliary column
+    dplyr::group_by(!!rlang::sym(group_col_name)) |>
+    dplyr::summarize(mean_fstat_from_r2 = mean(fstat_from_r2, na.rm = TRUE)) |>
+    dplyr::left_join(data, by = group_col_name)
   return(result)
 }
 
@@ -1309,12 +1312,12 @@ calculate_fstat_from_beta_se <- function(data) {
   f_stats <- (b / se)^2
   data$fstat_from_b_se <- f_stats
   # Calculate mean f_stats for each level of id.exposure
-  fstat_mean_from_b_se <- data %>%
-    dplyr::group_by(id.exposure) %>%
-    dplyr::summarize(mean_f_stats = mean(f_stats, na.rm = TRUE)) %>%
+  fstat_mean_from_b_se <- data |>
+    dplyr::group_by(id.exposure) |>
+    dplyr::summarize(mean_f_stats = mean(f_stats, na.rm = TRUE)) |>
     dplyr::ungroup()
   # Merge the mean_f_stats column with the original data
-  data <- dplyr::left_join(data, mean_f_stats, by = "id.exposure")
+  data <- dplyr::left_join(data, fstat_mean_from_b_se, by = "id.exposure")
   return(data)
 }
 
@@ -1326,14 +1329,14 @@ calculate_fstat_from_beta_se <- function(data) {
 #' @return Data frame with missing values replaced by mean.
 #' @export
 replace_na_with_mean <- function(data, grouping_column, column_name) {
-  mean_values <- data %>%
-    dplyr::group_by({{ grouping_column }}) %>%
-    dplyr::summarize(mean_value = mean({{ column_name }}, na.rm = TRUE)) %>%
+  mean_values <- data |>
+    dplyr::group_by({{ grouping_column }}) |>
+    dplyr::summarize(mean_value = mean({{ column_name }}, na.rm = TRUE)) |>
     dplyr::filter(!is.infinite(mean_value)) # Filter out Inf values
 
-  result <- data %>%
-    dplyr::left_join(mean_values, by = {{ grouping_column }}) %>%
-    dplyr::mutate({{ column_name }} := ifelse(is.na({{ column_name }}), mean_value, {{ column_name }})) %>%
+  result <- data |>
+    dplyr::left_join(mean_values, by = {{ grouping_column }}) |>
+    dplyr::mutate({{ column_name }} := ifelse(is.na({{ column_name }}), mean_value, {{ column_name }})) |>
     dplyr::select(-mean_value)
 
   return(result)
@@ -1361,18 +1364,18 @@ missing_EAF <- function(df, reference, column_EAF) {
       header = TRUE,
       select = c("Predictor", "A1", "A2", "MAF"), # Select only necessary columns
       data.table = FALSE
-    ) %>%
-      dplyr::filter(Predictor %in% df$SNP) %>%
+    ) |>
+      dplyr::filter(Predictor %in% df$SNP) |>
       dplyr::rename(EAF = MAF)
 
-    df <- df %>%
-      dplyr::left_join(EAF, by = c("SNP" = "Predictor")) %>%
+    df <- df |>
+      dplyr::left_join(EAF, by = c("SNP" = "Predictor")) |>
       dplyr::mutate(!!column_EAF := ifelse(is.na(!!rlang::sym(column_EAF)),
         ifelse(effect_allele.outcome == A1 & other_allele.outcome == A2, EAF,
           ifelse(effect_allele.outcome == A2 & other_allele.outcome == A1, 1 - EAF, !!rlang::sym(column_EAF))
         ),
         !!rlang::sym(column_EAF)
-      )) %>%
+      )) |>
       dplyr::select(-A1, -A2, -EAF)
   }
   return(df)