Skip to content

Commit cf63a40

Browse files
committed
refactor: migrate from magrittr pipe (%>%) to native R pipe (|>)
- Replace all instances of %>% with |> across R/, vignettes/, and tests/ - Remove magrittr from DESCRIPTION Imports - Delete obsolete R/utils-pipe.R and man/pipe.Rd files - Update all vignettes to use native pipe operator
1 parent 00f93d0 commit cf63a40

File tree

128 files changed

+2087
-2861
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+2087
-2861
lines changed

DESCRIPTION

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ RoxygenNote: 7.3.3
1313
Imports:
1414
dplyr, forcats, ggplot2, tibble, tidyr, purrr,
1515
plyr,
16-
magrittr,
1716
data.table,
1817
here,
1918
rmarkdown,

NAMESPACE

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# Generated by roxygen2: do not edit by hand
22

3-
export("%>%")
43
export(alignment_check)
54
export(biomaRt_getBM_batch)
65
export(blank_plot)
@@ -49,4 +48,3 @@ export(remove_duplicate_SNP)
4948
export(remove_nearby_positions)
5049
export(replace_na_with_mean)
5150
export(susieR_cs_table)
52-
importFrom(magrittr,"%>%")

R/TwoSampleMR_functions.R

Lines changed: 77 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,9 @@ format_data <- function(dat, type = "exposure", snps = NULL, header = TRUE,
8383
# return(x)
8484
# })
8585

86-
dat <- dat %>%
87-
dplyr::group_by(!!rlang::sym(type)) %>%
88-
dplyr::distinct(!!rlang::sym(snp_col), .keep_all = TRUE) %>%
86+
dat <- dat |>
87+
dplyr::group_by(!!rlang::sym(type)) |>
88+
dplyr::distinct(!!rlang::sym(snp_col), .keep_all = TRUE) |>
8989
dplyr::ungroup()
9090

9191
mr_cols_required <- c(snp_col, beta_col, se_col, effect_allele_col)
@@ -402,9 +402,9 @@ format_data2 <- function(dat, type = "exposure", snps = NULL, header = TRUE,
402402
# return(x)
403403
# })
404404

405-
dat <- dat %>%
406-
dplyr::group_by(!!rlang::sym(type)) %>%
407-
dplyr::distinct(!!rlang::sym(snp_col), .keep_all = TRUE) %>%
405+
dat <- dat |>
406+
dplyr::group_by(!!rlang::sym(type)) |>
407+
dplyr::distinct(!!rlang::sym(snp_col), .keep_all = TRUE) |>
408408
dplyr::ungroup()
409409

410410
mr_cols_required <- c(snp_col, beta_col, se_col, effect_allele_col)
@@ -788,7 +788,7 @@ proxy_search <- function(data_exposure, data_outcome, data_outcome_path, data_re
788788
out = tempfile()
789789
)
790790
## format proxy data: change column order and names, add proxy.outcome = TRUE
791-
proxies <- proxies %>%
791+
proxies <- proxies |>
792792
dplyr::select(
793793
target_snp.outcome = SNP_A,
794794
proxy_snp.outcome = SNP_B,
@@ -797,15 +797,15 @@ proxy_search <- function(data_exposure, data_outcome, data_outcome_path, data_re
797797
proxy_a1.outcome = B1,
798798
proxy_a2.outcome = B2,
799799
R
800-
) %>%
801-
dplyr::mutate(proxy.outcome = TRUE) %>%
800+
) |>
801+
dplyr::mutate(proxy.outcome = TRUE) |>
802802
dplyr::select(proxy.outcome, dplyr::everything())
803803
message(paste0("## proxy-SNP(s) for ", length(unique(proxies$target_snp.outcome)), " missing-SNP(s) found; ", "proxy-SNP(s) for ", length(unique(as.factor(snps_reference))) - length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) not available (e.g., no proxy-SNP or r2 < provided)"))
804804

805805
# extract proxies from outcome ====
806806
message(paste0("# 3. extracting proxy-SNP(s) from outcome"))
807-
proxy_snps <- proxies %>% # select unique proxy SNPs to extract
808-
dplyr::distinct(proxy_snp.outcome) %>%
807+
proxy_snps <- proxies |> # select unique proxy SNPs to extract
808+
dplyr::distinct(proxy_snp.outcome) |>
809809
dplyr::pull(proxy_snp.outcome)
810810
data_outcome_proxies <- TwoSampleMR::read_outcome_data(
811811
filename = data_outcome_path,
@@ -824,8 +824,8 @@ proxy_search <- function(data_exposure, data_outcome, data_outcome_path, data_re
824824
chr_col = outcome_CHR,
825825
pos_col = outcome_POS
826826
)
827-
data_outcome_proxies <- data_outcome_proxies %>%
828-
dplyr::left_join(proxies, by = c("SNP" = "proxy_snp.outcome")) %>%
827+
data_outcome_proxies <- data_outcome_proxies |>
828+
dplyr::left_join(proxies, by = c("SNP" = "proxy_snp.outcome")) |>
829829
dplyr::mutate(
830830
proxy_snp.outcome = SNP, # add proxy_snp.outcome back in (SNP is the proxies found in the outcome)
831831
SNP = target_snp.outcome, # make SNP the original (i.e., missing) SNP
@@ -834,10 +834,10 @@ proxy_search <- function(data_exposure, data_outcome, data_outcome_path, data_re
834834
message(paste0("## proxy-SNP(s) for ", length(unique(as.factor(data_outcome_proxies$target_snp.outcome))), " of ", length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) extracted"))
835835

836836
# select proxy-SNP(s) with the highest R2 ====
837-
data_outcome_proxies <- data_outcome_proxies %>%
838-
dplyr::group_by(target_snp.outcome) %>%
839-
dplyr::filter(R == max(R)) %>%
840-
dplyr::slice(1) %>%
837+
data_outcome_proxies <- data_outcome_proxies |>
838+
dplyr::group_by(target_snp.outcome) |>
839+
dplyr::filter(R == max(R)) |>
840+
dplyr::slice(1) |>
841841
dplyr::select(-R)
842842

843843
## Bind rows of data_outcome with data_outcome_proxies
@@ -912,7 +912,7 @@ proxy_search_split <- function(data_exposure, data_outcome, data_outcome_path, d
912912
out = tempfile()
913913
)
914914
## format proxy data: change column order and names, add proxy.outcome = TRUE
915-
proxies <- proxies %>%
915+
proxies <- proxies |>
916916
dplyr::select(
917917
target_snp.outcome = SNP_A,
918918
proxy_snp.outcome = SNP_B,
@@ -921,18 +921,18 @@ proxy_search_split <- function(data_exposure, data_outcome, data_outcome_path, d
921921
proxy_a1.outcome = B1,
922922
proxy_a2.outcome = B2,
923923
R
924-
) %>%
924+
) |>
925925
dplyr::mutate(
926926
proxy.outcome = TRUE,
927927
SNP = proxy_snp.outcome
928-
) %>%
928+
) |>
929929
dplyr::select(proxy.outcome, dplyr::everything())
930930
message(paste0("## proxy-SNP(s) for ", length(unique(proxies$target_snp.outcome)), " missing-SNP(s) found; ", "proxy-SNP(s) for ", length(unique(as.factor(snps_reference))) - length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) not available (e.g., no proxy-SNP or r2 < provided)"))
931931

932932
# extract proxies from outcome ====
933933
message(paste0("# 3. extracting proxy-SNP(s) from outcome"))
934-
proxy_snps <- proxies %>% # select unique proxy SNPs to extract
935-
dplyr::distinct(proxy_snp.outcome) %>%
934+
proxy_snps <- proxies |> # select unique proxy SNPs to extract
935+
dplyr::distinct(proxy_snp.outcome) |>
936936
dplyr::pull(proxy_snp.outcome)
937937
data_outcome_proxies <- TwoSampleMR::read_outcome_data(
938938
filename = data_outcome_path,
@@ -955,10 +955,10 @@ proxy_search_split <- function(data_exposure, data_outcome, data_outcome_path, d
955955
message(paste0("## proxy-SNP(s) for ", length(unique(as.factor(data_outcome_proxies$target_snp.outcome))), " of ", length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) extracted"))
956956

957957
# select proxy-SNP(s) with the highest R2 ====
958-
data_outcome_proxies <- data_outcome_proxies %>%
959-
dplyr::group_by(target_snp.outcome) %>%
960-
dplyr::filter(R == max(R)) %>%
961-
dplyr::slice(1) %>%
958+
data_outcome_proxies <- data_outcome_proxies |>
959+
dplyr::group_by(target_snp.outcome) |>
960+
dplyr::filter(R == max(R)) |>
961+
dplyr::slice(1) |>
962962
dplyr::select(-R)
963963

964964
## Bind rows of data_outcome with data_outcome_proxies
@@ -1051,7 +1051,7 @@ proxy_search_DECODE <- function(data_exposure, data_outcome, data_outcome_path,
10511051
out = tempfile()
10521052
)
10531053
## format proxy data: change column order and names, add proxy.outcome = TRUE
1054-
proxies <- proxies %>%
1054+
proxies <- proxies |>
10551055
dplyr::select(
10561056
target_snp.outcome = SNP_A,
10571057
proxy_snp.outcome = SNP_B,
@@ -1060,15 +1060,15 @@ proxy_search_DECODE <- function(data_exposure, data_outcome, data_outcome_path,
10601060
proxy_a1.outcome = B1,
10611061
proxy_a2.outcome = B2,
10621062
R
1063-
) %>%
1064-
dplyr::mutate(proxy.outcome = TRUE) %>%
1063+
) |>
1064+
dplyr::mutate(proxy.outcome = TRUE) |>
10651065
dplyr::select(proxy.outcome, dplyr::everything())
10661066
message(paste0("## proxy-SNP(s) for ", length(unique(proxies$target_snp.outcome)), " missing-SNP(s) found; ", "proxy-SNP(s) for ", length(unique(as.factor(snps_reference))) - length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) not available (e.g., no proxy-SNP or r2 < provided)"))
10671067

10681068
# extract proxies from outcome ====
10691069
message(paste0("# 3. extracting proxy-SNP(s) from outcome"))
1070-
proxy_snps <- proxies %>%
1071-
dplyr::distinct(proxy_snp.outcome) %>%
1070+
proxy_snps <- proxies |>
1071+
dplyr::distinct(proxy_snp.outcome) |>
10721072
dplyr::pull(proxy_snp.outcome)
10731073
data_outcome_proxies <- data.table::fread(data_outcome_path,
10741074
header = FALSE, sep = "\t",
@@ -1079,9 +1079,9 @@ proxy_search_DECODE <- function(data_exposure, data_outcome, data_outcome_path,
10791079
)
10801080
)
10811081

1082-
data_outcome_proxies <- data_outcome_proxies %>%
1083-
# dplyr::filter(SNP %in% proxy_snps) %>%
1084-
dplyr::left_join(proxies, by = c("SNP" = "proxy_snp.outcome")) %>%
1082+
data_outcome_proxies <- data_outcome_proxies |>
1083+
# dplyr::filter(SNP %in% proxy_snps) |>
1084+
dplyr::left_join(proxies, by = c("SNP" = "proxy_snp.outcome")) |>
10851085
dplyr::mutate(
10861086
proxy_snp.outcome = SNP, # add proxy_snp.outcome back in (SNP is the proxies found in the outcome)
10871087
SNP = target_snp.outcome, # make SNP the original (i.e., missing) SNP
@@ -1090,10 +1090,10 @@ proxy_search_DECODE <- function(data_exposure, data_outcome, data_outcome_path,
10901090
message(paste0("## proxy-SNP(s) for ", length(unique(as.factor(data_outcome_proxies$target_snp.outcome))), " of ", length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) extracted"))
10911091

10921092
# select proxy-SNP(s) with the highest R2 ====
1093-
data_outcome_proxies <- data_outcome_proxies %>%
1094-
dplyr::group_by(target_snp.outcome) %>%
1095-
dplyr::filter(R == max(R)) %>%
1096-
dplyr::slice(1) %>%
1093+
data_outcome_proxies <- data_outcome_proxies |>
1094+
dplyr::group_by(target_snp.outcome) |>
1095+
dplyr::filter(R == max(R)) |>
1096+
dplyr::slice(1) |>
10971097
dplyr::select(-R)
10981098

10991099
# format data and bind with original outcome data ====
@@ -1183,7 +1183,7 @@ proxy_search_UKB <- function(data_exposure, data_outcome, data_outcome_path, dat
11831183
out = tempfile()
11841184
)
11851185
## format proxy data: change column order and names, add proxy.outcome = TRUE
1186-
proxies <- proxies %>%
1186+
proxies <- proxies |>
11871187
dplyr::select(
11881188
target_snp.outcome = SNP_A,
11891189
proxy_snp.outcome = SNP_B,
@@ -1192,29 +1192,29 @@ proxy_search_UKB <- function(data_exposure, data_outcome, data_outcome_path, dat
11921192
proxy_a1.outcome = B1,
11931193
proxy_a2.outcome = B2,
11941194
R
1195-
) %>%
1196-
dplyr::mutate(proxy.outcome = TRUE) %>%
1195+
) |>
1196+
dplyr::mutate(proxy.outcome = TRUE) |>
11971197
dplyr::select(proxy.outcome, dplyr::everything())
11981198
message(paste0("## proxy-SNP(s) for ", length(unique(proxies$target_snp.outcome)), " missing-SNP(s) found; ", "proxy-SNP(s) for ", length(unique(as.factor(snps_reference))) - length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) not available (e.g., no proxy-SNP or r2 < provided)"))
11991199

12001200
# extract proxies from outcome ====
12011201
message(paste0("# 3. extracting proxy-SNP(s) from outcome"))
1202-
proxy_snps <- proxies %>%
1203-
dplyr::distinct(proxy_snp.outcome) %>%
1202+
proxy_snps <- proxies |>
1203+
dplyr::distinct(proxy_snp.outcome) |>
12041204
dplyr::pull(proxy_snp.outcome)
12051205
data_outcome_proxies <- data.table::fread(data_outcome_path,
12061206
header = FALSE,
12071207
col.names = c("ID", "REF", "ALT", "SNP", "POS19", "POS38")
12081208
)
1209-
data_outcome_proxies <- data_outcome_proxies %>%
1209+
data_outcome_proxies <- data_outcome_proxies |>
12101210
dplyr::filter(SNP %in% proxy_snps)
12111211
data_outcome_proxies <- tidyr::separate(data_outcome_proxies, ID, into = c(
12121212
"chr.outcome", "pos.outcome", "ID", "other_allele.outcome", "effect_allele.outcome", "eaf.outcome",
12131213
"INFO", "samplesize.outcome", "TEST", "beta.outcome", "se.outcome", "CHISQ", "LOG10P", "EXTRA", "outcome", "ID2"
12141214
), sep = " ")
1215-
data_outcome_proxies <- data_outcome_proxies %>%
1216-
# dplyr::filter(SNP %in% proxy_snps) %>%
1217-
dplyr::left_join(proxies, by = c("SNP" = "proxy_snp.outcome")) %>%
1215+
data_outcome_proxies <- data_outcome_proxies |>
1216+
# dplyr::filter(SNP %in% proxy_snps) |>
1217+
dplyr::left_join(proxies, by = c("SNP" = "proxy_snp.outcome")) |>
12181218
dplyr::mutate(
12191219
proxy_snp.outcome = SNP, # add proxy_snp.outcome back in (SNP is the proxies found in the outcome)
12201220
SNP = target_snp.outcome, # make SNP the original (i.e., missing) SNP
@@ -1223,10 +1223,10 @@ proxy_search_UKB <- function(data_exposure, data_outcome, data_outcome_path, dat
12231223
message(paste0("## proxy-SNP(s) for ", length(unique(as.factor(data_outcome_proxies$target_snp.outcome))), " of ", length(unique(as.factor(proxies$target_snp.outcome))), " missing-SNP(s) extracted"))
12241224

12251225
# select proxy-SNP(s) with the highest R2 ====
1226-
data_outcome_proxies <- data_outcome_proxies %>%
1227-
dplyr::group_by(target_snp.outcome) %>%
1228-
dplyr::filter(R == max(R)) %>%
1229-
dplyr::slice(1) %>%
1226+
data_outcome_proxies <- data_outcome_proxies |>
1227+
dplyr::group_by(target_snp.outcome) |>
1228+
dplyr::filter(R == max(R)) |>
1229+
dplyr::slice(1) |>
12301230
dplyr::select(-R)
12311231

12321232
# format data and bind with original outcome data ====
@@ -1283,18 +1283,21 @@ calculate_r2 <- function(data) {
12831283
#' @return Data frame with fstat values added.
12841284
#' @export
12851285
calculate_fstat_from_r2 <- function(data, grouping_column) {
1286+
# Capture the grouping column name
1287+
group_col_name <- rlang::as_name(rlang::enquo(grouping_column))
1288+
12861289
# Group by the specified column and calculate fstat for each group
1287-
result <- data %>%
1288-
dplyr::group_by({{ grouping_column }}) %>%
1290+
result <- data |>
1291+
dplyr::group_by(!!rlang::sym(group_col_name)) |>
12891292
dplyr::mutate(
12901293
k = dplyr::n_distinct(SNP), # Number of unique SNPs
12911294
fstat_from_r2 = r2 * (samplesize.exposure - 1 - k) / ((1 - r2) * k)
1292-
) %>%
1293-
dplyr::ungroup() %>%
1294-
dplyr::select(-k) %>% # Remove the auxiliary column
1295-
dplyr::group_by({{ grouping_column }}) %>%
1296-
dplyr::summarize(mean_fstat_from_r2 = mean(fstat_from_r2, na.rm = TRUE)) %>%
1297-
dplyr::left_join(data, by = {{ grouping_column }})
1295+
) |>
1296+
dplyr::ungroup() |>
1297+
dplyr::select(-k) |> # Remove the auxiliary column
1298+
dplyr::group_by(!!rlang::sym(group_col_name)) |>
1299+
dplyr::summarize(mean_fstat_from_r2 = mean(fstat_from_r2, na.rm = TRUE)) |>
1300+
dplyr::left_join(data, by = group_col_name)
12981301
return(result)
12991302
}
13001303

@@ -1309,12 +1312,12 @@ calculate_fstat_from_beta_se <- function(data) {
13091312
f_stats <- (b / se)^2
13101313
data$fstat_from_b_se <- f_stats
13111314
# Calculate mean f_stats for each level of id.exposure
1312-
fstat_mean_from_b_se <- data %>%
1313-
dplyr::group_by(id.exposure) %>%
1314-
dplyr::summarize(mean_f_stats = mean(f_stats, na.rm = TRUE)) %>%
1315+
fstat_mean_from_b_se <- data |>
1316+
dplyr::group_by(id.exposure) |>
1317+
dplyr::summarize(mean_f_stats = mean(f_stats, na.rm = TRUE)) |>
13151318
dplyr::ungroup()
13161319
# Merge the mean_f_stats column with the original data
1317-
data <- dplyr::left_join(data, mean_f_stats, by = "id.exposure")
1320+
data <- dplyr::left_join(data, fstat_mean_from_b_se, by = "id.exposure")
13181321
return(data)
13191322
}
13201323

@@ -1326,14 +1329,14 @@ calculate_fstat_from_beta_se <- function(data) {
13261329
#' @return Data frame with missing values replaced by mean.
13271330
#' @export
13281331
replace_na_with_mean <- function(data, grouping_column, column_name) {
1329-
mean_values <- data %>%
1330-
dplyr::group_by({{ grouping_column }}) %>%
1331-
dplyr::summarize(mean_value = mean({{ column_name }}, na.rm = TRUE)) %>%
1332+
mean_values <- data |>
1333+
dplyr::group_by({{ grouping_column }}) |>
1334+
dplyr::summarize(mean_value = mean({{ column_name }}, na.rm = TRUE)) |>
13321335
dplyr::filter(!is.infinite(mean_value)) # Filter out Inf values
13331336

1334-
result <- data %>%
1335-
dplyr::left_join(mean_values, by = {{ grouping_column }}) %>%
1336-
dplyr::mutate({{ column_name }} := ifelse(is.na({{ column_name }}), mean_value, {{ column_name }})) %>%
1337+
result <- data |>
1338+
dplyr::left_join(mean_values, by = {{ grouping_column }}) |>
1339+
dplyr::mutate({{ column_name }} := ifelse(is.na({{ column_name }}), mean_value, {{ column_name }})) |>
13371340
dplyr::select(-mean_value)
13381341

13391342
return(result)
@@ -1361,18 +1364,18 @@ missing_EAF <- function(df, reference, column_EAF) {
13611364
header = TRUE,
13621365
select = c("Predictor", "A1", "A2", "MAF"), # Select only necessary columns
13631366
data.table = FALSE
1364-
) %>%
1365-
dplyr::filter(Predictor %in% df$SNP) %>%
1367+
) |>
1368+
dplyr::filter(Predictor %in% df$SNP) |>
13661369
dplyr::rename(EAF = MAF)
13671370

1368-
df <- df %>%
1369-
dplyr::left_join(EAF, by = c("SNP" = "Predictor")) %>%
1371+
df <- df |>
1372+
dplyr::left_join(EAF, by = c("SNP" = "Predictor")) |>
13701373
dplyr::mutate(!!column_EAF := ifelse(is.na(!!rlang::sym(column_EAF)),
13711374
ifelse(effect_allele.outcome == A1 & other_allele.outcome == A2, EAF,
13721375
ifelse(effect_allele.outcome == A2 & other_allele.outcome == A1, 1 - EAF, !!rlang::sym(column_EAF))
13731376
),
13741377
!!rlang::sym(column_EAF)
1375-
)) %>%
1378+
)) |>
13761379
dplyr::select(-A1, -A2, -EAF)
13771380
}
13781381
return(df)

0 commit comments

Comments
 (0)