diff --git a/tests/testthat/test-clustering.R b/tests/testthat/test-clustering.R new file mode 100644 index 0000000..f5d4c3c --- /dev/null +++ b/tests/testthat/test-clustering.R @@ -0,0 +1,318 @@ +# Tests for .cluster_gliph1() and .cluster_gliph2() + +# Register sequential backend for %dopar% +foreach::registerDoSEQ() + +# ---- Helpers ---------------------------------------------------------------- + +.make_cluster_data <- function() { + seqs <- c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF", + "CASSLTGGEETQYF", "CASSLGGRETQYF") + sequences <- data.frame( + seq_ID = seq_along(seqs), + CDR3b = seqs, + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1", "TRBV7-2", "TRBV5-1"), + patient = c("P1", "P1", "P2", "P2", "P1"), + stringsAsFactors = FALSE + ) + list(seqs = seqs, sequences = sequences) +} + +# =========================================================================== +# .cluster_gliph1() tests +# =========================================================================== + +# ---- Output structure ------------------------------------------------------- + +test_that(".cluster_gliph1 returns list with expected elements", { + d <- .make_cluster_data() + + clone_network <- data.frame( + V1 = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + V2 = c("CASSLDRGEVFF", "CASSYLAGGRNTLYF"), + type = c("local", "local"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph1( + clone_network = clone_network, + sequences = d$sequences, + not_in_global_ids = integer(0), + seqs = d$seqs, + vgene.info = TRUE, + patient.info = TRUE, + global_vgene = FALSE, + public_tcrs = TRUE, + cluster_min_size = 1, + verbose = FALSE + ) + + expect_type(result, "list") + expect_true("cluster_properties" %in% names(result)) + expect_true("cluster_list" %in% names(result)) + expect_true("clone_network" %in% names(result)) + expect_true("save_cluster_list_df" %in% names(result)) +}) + +# ---- Clustering logic ------------------------------------------------------- + +test_that(".cluster_gliph1 forms connected components correctly", { + d <- .make_cluster_data() + + # Two connected components: {seq1, seq2, seq3} and {seq4, seq5} + clone_network <- data.frame( + V1 = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", + "CASSLTGGEETQYF"), + V2 = c("CASSLDRGEVFF", "CASSYLAGGRNTLYF", + "CASSLGGRETQYF"), + type = c("local", "local", "local"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph1( + clone_network = clone_network, + sequences = d$sequences, + not_in_global_ids = integer(0), + seqs = d$seqs, + vgene.info = TRUE, + patient.info = TRUE, + global_vgene = FALSE, + public_tcrs = TRUE, + cluster_min_size = 1, + verbose = FALSE + ) + + expect_s3_class(result$cluster_properties, "data.frame") + expect_true(nrow(result$cluster_properties) >= 2) + expect_true(all(c("cluster_size", "tag", "members") %in% + colnames(result$cluster_properties))) +}) + +# ---- cluster_min_size ------------------------------------------------------- + +test_that(".cluster_gliph1 filters by cluster_min_size", { + d <- .make_cluster_data() + + # One pair (size 2) and one pair (size 2) and one isolated (size 1) + clone_network <- data.frame( + V1 = c("CASSLAPGATNEKLFF", "CASSLTGGEETQYF"), + V2 = c("CASSLDRGEVFF", "CASSLGGRETQYF"), + type = c("local", "local"), + stringsAsFactors = FALSE + ) + + # With min_size = 3, both pairs should be filtered out + result <- immGLIPH:::.cluster_gliph1( + clone_network = clone_network, + sequences = d$sequences, + not_in_global_ids = integer(0), + seqs = d$seqs, + vgene.info = TRUE, + patient.info = TRUE, + global_vgene = FALSE, + public_tcrs = TRUE, + cluster_min_size = 3, + verbose = FALSE + ) + + if (!is.null(result$cluster_properties)) { + expect_true(all(result$cluster_properties$cluster_size >= 3)) + } +}) + +# ---- Empty edge list -------------------------------------------------------- + +test_that(".cluster_gliph1 returns NULL for empty edge list", { + d <- .make_cluster_data() + + result <- immGLIPH:::.cluster_gliph1( + clone_network = NULL, + sequences = d$sequences, + not_in_global_ids = integer(0), + seqs = d$seqs, + vgene.info = TRUE, + patient.info = TRUE, + global_vgene = FALSE, + public_tcrs = TRUE, + cluster_min_size = 2, + verbose = FALSE + ) + + expect_null(result$cluster_properties) + expect_equal(length(result$cluster_list), 0) +}) + +# ---- Singleton handling ----------------------------------------------------- + +test_that(".cluster_gliph1 adds singletons from not_in_global_ids", { + d <- .make_cluster_data() + + clone_network <- data.frame( + V1 = "CASSLAPGATNEKLFF", + V2 = "CASSLDRGEVFF", + type = "local", + stringsAsFactors = FALSE + ) + + # Sequence at index 5 is not in any edge + result <- immGLIPH:::.cluster_gliph1( + clone_network = clone_network, + sequences = d$sequences, + not_in_global_ids = c(4L, 5L), # indices not in global edges + seqs = d$seqs, + vgene.info = TRUE, + patient.info = TRUE, + global_vgene = FALSE, + public_tcrs = TRUE, + cluster_min_size = 1, + verbose = FALSE + ) + + # The clone_network should contain singleton rows + expect_true(any(result$clone_network$type == "singleton")) +}) + +# =========================================================================== +# .cluster_gliph2() tests +# =========================================================================== + +test_that(".cluster_gliph2 returns list with expected elements", { + d <- .make_cluster_data() + + local_res <- data.frame( + motif = c("SL"), + start = c(1), + stop = c(2), + num_in_sample = c(3), + num_in_ref = c(10), + num_fold = c(5.0), + fisher.score = c(0.001), + members = paste(d$seqs[1:3], collapse = " "), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph2( + local_res = local_res, + global_res = NULL, + sequences = d$sequences, + local_similarities = TRUE, + global_similarities = FALSE, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 1, + cluster_min_size = 1, + boost_local_significance = FALSE, + verbose = FALSE + ) + + expect_type(result, "list") + expect_true("merged_clusters" %in% names(result)) + expect_true("cluster_list" %in% names(result)) + expect_true("clone_network" %in% names(result)) + expect_true("save_cluster_list_df" %in% names(result)) +}) + +test_that(".cluster_gliph2 clone_network has expected edge types", { + d <- .make_cluster_data() + + local_res <- data.frame( + motif = c("SL"), + start = c(1), + stop = c(2), + num_in_sample = c(3), + num_in_ref = c(10), + num_fold = c(5.0), + fisher.score = c(0.001), + members = paste(d$seqs[1:3], collapse = " "), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph2( + local_res = local_res, + global_res = NULL, + sequences = d$sequences, + local_similarities = TRUE, + global_similarities = FALSE, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 10, + cluster_min_size = 1, + boost_local_significance = FALSE, + verbose = FALSE + ) + + if (!is.null(result$clone_network)) { + expect_true("type" %in% colnames(result$clone_network)) + # Non-NA edge types should be local, global, or singleton + type_col <- result$clone_network$type[!is.na(result$clone_network$type)] + expect_true(all(type_col %in% c("local", "global", "singleton"))) + # At least some edges should be present + expect_true(length(type_col) > 0) + } +}) + +test_that(".cluster_gliph2 cluster_min_size filters small clusters", { + d <- .make_cluster_data() + + local_res <- data.frame( + motif = c("SL"), + start = c(1), + stop = c(2), + num_in_sample = c(2), + num_in_ref = c(10), + num_fold = c(5.0), + fisher.score = c(0.001), + members = paste(d$seqs[1:2], collapse = " "), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.cluster_gliph2( + local_res = local_res, + global_res = NULL, + sequences = d$sequences, + local_similarities = TRUE, + global_similarities = FALSE, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 10, + cluster_min_size = 5, + boost_local_significance = FALSE, + verbose = FALSE + ) + + # Cluster of size 2 should be eliminated by min_size = 5 + if (!is.null(result$merged_clusters)) { + expect_true(all(result$merged_clusters$cluster_size >= 5)) + } else { + expect_null(result$merged_clusters) + } +}) + +test_that(".cluster_gliph2 handles no local and no global similarities", { + d <- .make_cluster_data() + + result <- immGLIPH:::.cluster_gliph2( + local_res = NULL, + global_res = NULL, + sequences = d$sequences, + local_similarities = FALSE, + global_similarities = FALSE, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + structboundaries = TRUE, + boundary_size = 3, + motif_distance_cutoff = 1, + cluster_min_size = 2, + boost_local_significance = FALSE, + verbose = FALSE + ) + + expect_null(result$merged_clusters) + expect_equal(length(result$cluster_list), 0) +}) diff --git a/tests/testthat/test-deNovoTCRs.R b/tests/testthat/test-deNovoTCRs.R index 63c9b34..bbecabe 100644 --- a/tests/testthat/test-deNovoTCRs.R +++ b/tests/testthat/test-deNovoTCRs.R @@ -57,3 +57,117 @@ test_that("deNovoTCRs rejects non-logical normalization", { "logical" ) }) + +test_that("deNovoTCRs rejects tag not in cluster_list", { + mock_output <- list( + cluster_list = list( + "CRG-CASSLAPGATNEKLFF" = data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF"), + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + ) + ) + + expect_error( + deNovoTCRs(convergence_group_tag = "NONEXISTENT_TAG", + clustering_output = mock_output, + sims = 10, num_tops = 5, n_cores = 1), + "Could not find" + ) +}) + +test_that("deNovoTCRs rejects sims less than 1", { + expect_error( + deNovoTCRs(convergence_group_tag = "CRG-1", + clustering_output = list(cluster_list = list()), + sims = 0), + "at least 1" + ) +}) + +test_that("deNovoTCRs rejects num_tops less than 1", { + expect_error( + deNovoTCRs(convergence_group_tag = "CRG-1", + clustering_output = list(cluster_list = list()), + num_tops = 0), + "at least 1" + ) +}) + +test_that("deNovoTCRs rejects min_length less than 1", { + expect_error( + deNovoTCRs(convergence_group_tag = "CRG-1", + clustering_output = list(cluster_list = list()), + min_length = 0), + "at least 1" + ) +}) + +# ---- Functional tests ------------------------------------------------------- + +test_that("deNovoTCRs generates sequences from a pre-computed cluster", { + skip_on_cran() + + # Build a mock clustering output with a cluster of similar sequences + cluster_members <- data.frame( + seq_ID = 1:5, + CDR3b = c("CASSLAPGATNEKLFF", "CASSLAPRATNEKLFF", + "CASSLAPGETQEKLFF", "CASSLAPQATNEKLFF", + "CASSLAPGAGNEKLFF"), + TRBV = rep("TRBV5-1", 5), + patient = rep("P1", 5), + stringsAsFactors = FALSE + ) + + mock_output <- list( + cluster_list = list("CRG-CASSLAPGATNEKLFF" = cluster_members), + cluster_properties = data.frame( + tag = "CRG-CASSLAPGATNEKLFF", + cluster_size = 5, + stringsAsFactors = FALSE + ), + parameters = list(method = "gliph1") + ) + + ref_df <- data.frame( + CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF"), + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV5-1"), + stringsAsFactors = FALSE + ) + + result <- deNovoTCRs( + convergence_group_tag = "CRG-CASSLAPGATNEKLFF", + clustering_output = mock_output, + refdb_beta = ref_df, + sims = 100, + num_tops = 10, + min_length = 10, + make_figure = FALSE, + n_cores = 1 + ) + + expect_type(result, "list") + + # Check output structure + expect_true("de_novo_sequences" %in% names(result)) + expect_true("sample_sequences_scores" %in% names(result)) + expect_true("cdr3_length_probability" %in% names(result)) + expect_true("PWM_Scoring" %in% names(result)) + expect_true("PWM_Prediction" %in% names(result)) + + # de_novo_sequences should be a data.frame + expect_s3_class(result$de_novo_sequences, "data.frame") + + # PWM_Scoring should have amino acid-related columns + expect_s3_class(result$PWM_Scoring, "data.frame") + expect_true(ncol(result$PWM_Scoring) > 0) + + # PWM_Prediction should be a list of data.frames + expect_type(result$PWM_Prediction, "list") + + # cdr3_length_probability should have probabilities summing to ~1 + expect_s3_class(result$cdr3_length_probability, "data.frame") + probs <- result$cdr3_length_probability[, ncol(result$cdr3_length_probability)] + expect_true(abs(sum(as.numeric(probs)) - 1) < 0.01) +}) diff --git a/tests/testthat/test-getGLIPHreference.R b/tests/testthat/test-getGLIPHreference.R new file mode 100644 index 0000000..fb8f830 --- /dev/null +++ b/tests/testthat/test-getGLIPHreference.R @@ -0,0 +1,23 @@ +# Tests for getGLIPHreference() + +test_that("getGLIPHreference requires BiocFileCache", { + skip_if(requireNamespace("BiocFileCache", quietly = TRUE), + "BiocFileCache is installed; skipping missing-dependency test") + expect_error(getGLIPHreference(verbose = FALSE), "BiocFileCache") +}) + +test_that("getGLIPHreference returns a list when BiocFileCache is available", { + skip_if_not_installed("BiocFileCache") + skip_on_cran() + + result <- getGLIPHreference(verbose = FALSE) + expect_type(result, "list") + expect_true(length(result) > 0) + + # All valid reference names should be present + valid_names <- immGLIPH:::.valid_reference_names() + for (nm in valid_names) { + expect_true(nm %in% names(result), + info = paste("Missing reference:", nm)) + } +}) diff --git a/tests/testthat/test-global-fisher.R b/tests/testthat/test-global-fisher.R new file mode 100644 index 0000000..1c22b9b --- /dev/null +++ b/tests/testthat/test-global-fisher.R @@ -0,0 +1,192 @@ +# Tests for .global_fisher() + +# Register sequential backend for %dopar% +foreach::registerDoSEQ() + +# ---- Helper: small synthetic data ------------------------------------------ + +.make_global_fisher_data <- function() { + # Sequences with similar structures (differ by 1 AA) + sample_seqs <- c( + "CASSLAPGATNEKLFF", + "CASSLTPGATNEKLFF", # differs at pos 5 (A -> T) + "CASSLMPGATNEKLFF", # differs at pos 5 (A -> M) + "CASSLDRGEVFF", + "CASSLDRGEVFF", # duplicate + "CASSYLAGGRNTLYF" + ) + sample_seqs_uniq <- unique(sample_seqs) + + sequences <- data.frame( + seq_ID = seq_along(sample_seqs), + CDR3b = sample_seqs, + TRBV = c("TRBV5-1", "TRBV5-1", "TRBV6-2", + "TRBV5-1", "TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + motif_region <- substr(sample_seqs_uniq, 4, + nchar(sample_seqs_uniq) - 3) + + set.seed(42) + ref_seqs <- vapply(seq_len(100), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_seqs <- unique(ref_seqs) + ref_motif_region <- substr(ref_seqs, 4, nchar(ref_seqs) - 3) + + list( + sample_seqs = sample_seqs_uniq, + sequences = sequences, + motif_region = motif_region, + ref_seqs = ref_seqs, + ref_motif_region = ref_motif_region + ) +} + +# ---- Output structure ------------------------------------------------------- + +test_that(".global_fisher returns list with cluster_list and global_similarities", { + skip_if_not_installed("immApex") + + d <- .make_global_fisher_data() + result <- immGLIPH:::.global_fisher( + seqs = d$sample_seqs, + motif_region = d$motif_region, + sequences = d$sequences, + refseqs = d$ref_seqs, + refseqs_motif_region = d$ref_motif_region, + structboundaries = TRUE, + boundary_size = 3, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + no_cores = 1, + verbose = FALSE + ) + + expect_type(result, "list") + expect_true("cluster_list" %in% names(result)) + expect_true("global_similarities" %in% names(result)) + expect_type(result$global_similarities, "logical") +}) + +test_that(".global_fisher cluster_list has expected columns when similarities found", { + skip_if_not_installed("immApex") + + d <- .make_global_fisher_data() + result <- immGLIPH:::.global_fisher( + seqs = d$sample_seqs, + motif_region = d$motif_region, + sequences = d$sequences, + refseqs = d$ref_seqs, + refseqs_motif_region = d$ref_motif_region, + structboundaries = TRUE, + boundary_size = 3, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + no_cores = 1, + verbose = FALSE + ) + + expect_s3_class(result$cluster_list, "data.frame") + if (nrow(result$cluster_list) > 0) { + expected_cols <- c("cluster_tag", "cluster_size", "unique_CDR3b", + "num_in_ref", "fisher.score", "aa_at_position", + "TRBV", "CDR3b") + expect_true(all(expected_cols %in% colnames(result$cluster_list))) + expect_true(is.numeric(result$cluster_list$fisher.score)) + } +}) + +# ---- No similarities -------------------------------------------------------- + +test_that(".global_fisher returns FALSE for no structural matches", { + skip_if_not_installed("immApex") + + # All very different sequences - no two share a struct + dissimilar_seqs <- c("CASSLAPGATNEKLFF", "CXXXXXXXXXXXXXXF", + "CYYYYYYYYYYYYYYYF") + motif_region <- substr(dissimilar_seqs, 4, nchar(dissimilar_seqs) - 3) + + sequences <- data.frame( + seq_ID = seq_along(dissimilar_seqs), + CDR3b = dissimilar_seqs, + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV7-2"), + stringsAsFactors = FALSE + ) + + ref_seqs <- c("CASSQQQQQQQQQQF", "CASSWWWWWWWWWWF") + + result <- immGLIPH:::.global_fisher( + seqs = dissimilar_seqs, + motif_region = motif_region, + sequences = sequences, + refseqs = ref_seqs, + refseqs_motif_region = substr(ref_seqs, 4, nchar(ref_seqs) - 3), + structboundaries = TRUE, + boundary_size = 3, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + no_cores = 1, + verbose = FALSE + ) + + expect_false(result$global_similarities) + expect_equal(nrow(result$cluster_list), 0) +}) + +# ---- V-gene restriction ----------------------------------------------------- + +test_that(".global_fisher with global_vgene restricts to same V-gene", { + skip_if_not_installed("immApex") + + # Two similar seqs but different V-genes + similar_seqs <- c("CASSLAPGATNEKLFF", "CASSLTPGATNEKLFF") + motif_region <- substr(similar_seqs, 4, nchar(similar_seqs) - 3) + + sequences <- data.frame( + seq_ID = 1:2, + CDR3b = similar_seqs, + TRBV = c("TRBV5-1", "TRBV6-2"), # different V-genes + stringsAsFactors = FALSE + ) + + ref_seqs <- c("CASSQQQQQQQQQQF", "CASSWWWWWWWWWWF") + + result_vgene <- immGLIPH:::.global_fisher( + seqs = similar_seqs, + motif_region = motif_region, + sequences = sequences, + refseqs = ref_seqs, + refseqs_motif_region = substr(ref_seqs, 4, nchar(ref_seqs) - 3), + structboundaries = TRUE, + boundary_size = 3, + global_vgene = TRUE, + all_aa_interchangeable = TRUE, + no_cores = 1, + verbose = FALSE + ) + + result_no_vgene <- immGLIPH:::.global_fisher( + seqs = similar_seqs, + motif_region = motif_region, + sequences = sequences, + refseqs = ref_seqs, + refseqs_motif_region = substr(ref_seqs, 4, nchar(ref_seqs) - 3), + structboundaries = TRUE, + boundary_size = 3, + global_vgene = FALSE, + all_aa_interchangeable = TRUE, + no_cores = 1, + verbose = FALSE + ) + + # V-gene restriction should yield fewer or equal clusters + n_vgene <- if (result_vgene$global_similarities) + nrow(result_vgene$cluster_list) else 0 + n_no_vgene <- if (result_no_vgene$global_similarities) + nrow(result_no_vgene$cluster_list) else 0 + expect_true(n_vgene <= n_no_vgene) +}) diff --git a/tests/testthat/test-globalCutoff.R b/tests/testthat/test-globalCutoff.R index 9862dc1..e601ff6 100644 --- a/tests/testthat/test-globalCutoff.R +++ b/tests/testthat/test-globalCutoff.R @@ -128,3 +128,99 @@ test_that("immApex buildNetwork backend matches stringdist backend", { expect_setequal(sd_pairs, apex_pairs) } }) + +# ---- Edge case tests ------------------------------------------------------- + +test_that(".global_cutoff_stringdist returns zero edges for single sequence", { + seqs <- c("CASSLAPGATNEKLFF") + motif_region <- substr(seqs, 4, nchar(seqs) - 3) + sequences <- data.frame( + CDR3b = seqs, + TRBV = "TRBV5-1", + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.global_cutoff_stringdist( + seqs = seqs, + motif_region = motif_region, + sequences = sequences, + gccutoff = 5, + global_vgene = FALSE, + no_cores = 1, + verbose = FALSE + ) + + expect_equal(nrow(result$edges), 0) +}) + +test_that(".global_cutoff_stringdist finds edges for identical sequences", { + seqs <- c("CASSLAPGATNEKLFF", "CASSLAPGATNEKLFF") + seqs_uniq <- unique(seqs) + motif_region <- substr(seqs_uniq, 4, nchar(seqs_uniq) - 3) + sequences <- data.frame( + CDR3b = seqs, + TRBV = c("TRBV5-1", "TRBV5-1"), + stringsAsFactors = FALSE + ) + + # Identical sequences have hamming distance 0, should be found with cutoff >= 0 + result <- immGLIPH:::.global_cutoff_stringdist( + seqs = seqs_uniq, + motif_region = motif_region, + sequences = sequences, + gccutoff = 0, + global_vgene = FALSE, + no_cores = 1, + verbose = FALSE + ) + + # With only 1 unique sequence, there's no pair to form + expect_equal(nrow(result$edges), 0) +}) + +test_that(".global_cutoff_stringdist with global_vgene restricts to same V-gene", { + seqs <- c("CASSLAPGATNEKLFF", "CASSLAPRATNEKLFF") + motif_region <- substr(seqs, 4, nchar(seqs) - 3) + + sequences_same <- data.frame( + CDR3b = seqs, + TRBV = c("TRBV5-1", "TRBV5-1"), + stringsAsFactors = FALSE + ) + sequences_diff <- data.frame( + CDR3b = seqs, + TRBV = c("TRBV5-1", "TRBV6-2"), + stringsAsFactors = FALSE + ) + + result_same <- immGLIPH:::.global_cutoff_stringdist( + seqs = seqs, motif_region = motif_region, sequences = sequences_same, + gccutoff = 5, global_vgene = TRUE, no_cores = 1, verbose = FALSE + ) + result_diff <- immGLIPH:::.global_cutoff_stringdist( + seqs = seqs, motif_region = motif_region, sequences = sequences_diff, + gccutoff = 5, global_vgene = TRUE, no_cores = 1, verbose = FALSE + ) + + # Same V-gene should find the edge; different V-genes should not + expect_true(nrow(result_same$edges) >= nrow(result_diff$edges)) +}) + +test_that(".global_cutoff not_in_global_ids tracks isolated sequences", { + seqs <- c("CASSLAPGATNEKLFF", "XXXXXXXXXXXXXXXF", "CASSLDRGEVFF") + motif_region <- substr(seqs, 4, nchar(seqs) - 3) + sequences <- data.frame( + CDR3b = seqs, + TRBV = c("TRBV5-1", "TRBV6-2", "TRBV7-2"), + stringsAsFactors = FALSE + ) + + result <- immGLIPH:::.global_cutoff_stringdist( + seqs = seqs, motif_region = motif_region, sequences = sequences, + gccutoff = 0, global_vgene = FALSE, no_cores = 1, verbose = FALSE + ) + + # not_in_global_ids should contain indices of sequences without global edges + expect_type(result$not_in_global_ids, "integer") + expect_true(length(result$not_in_global_ids) > 0) +}) diff --git a/tests/testthat/test-local-fisher.R b/tests/testthat/test-local-fisher.R new file mode 100644 index 0000000..8576b6f --- /dev/null +++ b/tests/testthat/test-local-fisher.R @@ -0,0 +1,218 @@ +# Tests for .local_fisher() + +# Register sequential backend for %dopar% +foreach::registerDoSEQ() + +# ---- Helper: small synthetic data ------------------------------------------ + +.make_fisher_data <- function() { + # Sample sequences that share motifs (e.g., "SLA" appears in several) + sample_seqs <- c( + "CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF", + "CASSLTGGEETQYF", "CASSLGGRETQYF", "CASSLGQAYEQYF", + "CASSFSTCSANYGYTF", "CASSPTGGYEQYF" + ) + sequences <- data.frame( + seq_ID = seq_along(sample_seqs), + CDR3b = sample_seqs, + TRBV = rep(c("TRBV5-1", "TRBV6-2"), length.out = length(sample_seqs)), + stringsAsFactors = FALSE + ) + motif_region <- substr(sample_seqs, 4, nchar(sample_seqs) - 3) + + # Reference sequences (larger, different distribution) + set.seed(42) + ref_seqs <- vapply(seq_len(100), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_seqs <- unique(ref_seqs) + ref_motif_region <- substr(ref_seqs, 4, nchar(ref_seqs) - 3) + + list( + sample_seqs = sample_seqs, + sequences = sequences, + motif_region = motif_region, + ref_seqs = ref_seqs, + ref_motif_region = ref_motif_region + ) +} + +# ---- Output structure ------------------------------------------------------- + +test_that(".local_fisher returns list with selected_motifs and all_motifs", { + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3), + kmer_mindepth = 2, + lcminp = 1.0, + lcminove = c(1, 1), + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + expect_type(result, "list") + expect_true("selected_motifs" %in% names(result)) + expect_true("all_motifs" %in% names(result)) +}) + +test_that(".local_fisher all_motifs has expected columns", { + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3), + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = c(0, 0), + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + expected_cols <- c("motif", "counts", "num_in_ref", "avgRef", + "topRef", "OvE", "p.value") + expect_true(all(expected_cols %in% colnames(result$all_motifs))) + expect_s3_class(result$all_motifs, "data.frame") +}) + +# ---- p-value and fold change ------------------------------------------------ + +test_that(".local_fisher p-values are numeric and in [0, 1]", { + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3), + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = c(0, 0), + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + pvals <- result$all_motifs$p.value + expect_true(is.numeric(pvals)) + expect_true(all(pvals >= 0 & pvals <= 1)) +}) + +test_that(".local_fisher OvE is numeric", { + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + refseqs = d$ref_seqs, + sequences = d$sequences, + motif_length = c(2, 3), + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = c(0, 0), + discontinuous_motifs = FALSE, + motif_distance_cutoff = 1, + no_cores = 1, + verbose = FALSE + ) + + expect_true(is.numeric(result$all_motifs$OvE)) +}) + +# ---- Filtering parameters --------------------------------------------------- + +test_that(".local_fisher kmer_mindepth filters out rare motifs", { + d <- .make_fisher_data() + + result_low <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, refseqs = d$ref_seqs, sequences = d$sequences, + motif_length = 2, kmer_mindepth = 1, lcminp = 1.0, lcminove = 0, + discontinuous_motifs = FALSE, motif_distance_cutoff = 1, + no_cores = 1, verbose = FALSE + ) + result_high <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, refseqs = d$ref_seqs, sequences = d$sequences, + motif_length = 2, kmer_mindepth = 5, lcminp = 1.0, lcminove = 0, + discontinuous_motifs = FALSE, motif_distance_cutoff = 1, + no_cores = 1, verbose = FALSE + ) + + # Higher mindepth should yield fewer or equal selected motifs + expect_true(nrow(result_high$selected_motifs) <= + nrow(result_low$selected_motifs)) + # All selected should satisfy mindepth + if (nrow(result_high$selected_motifs) > 0) { + expect_true(all(result_high$selected_motifs$counts >= 5)) + } +}) + +test_that(".local_fisher lcminp filters out high p-value motifs", { + d <- .make_fisher_data() + + result_strict <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, refseqs = d$ref_seqs, sequences = d$sequences, + motif_length = 2, kmer_mindepth = 1, lcminp = 1e-10, lcminove = 0, + discontinuous_motifs = FALSE, motif_distance_cutoff = 1, + no_cores = 1, verbose = FALSE + ) + result_lenient <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, refseqs = d$ref_seqs, sequences = d$sequences, + motif_length = 2, kmer_mindepth = 1, lcminp = 1.0, lcminove = 0, + discontinuous_motifs = FALSE, motif_distance_cutoff = 1, + no_cores = 1, verbose = FALSE + ) + + expect_true(nrow(result_strict$selected_motifs) <= + nrow(result_lenient$selected_motifs)) +}) + +test_that(".local_fisher lcminove filters by fold change per motif length", { + d <- .make_fisher_data() + + # Very high OvE threshold should eliminate most motifs + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, refseqs = d$ref_seqs, sequences = d$sequences, + motif_length = c(2, 3), kmer_mindepth = 1, lcminp = 1.0, + lcminove = c(1e6, 1e6), + discontinuous_motifs = FALSE, motif_distance_cutoff = 1, + no_cores = 1, verbose = FALSE + ) + + expect_equal(nrow(result$selected_motifs), 0) +}) + +# ---- Discontinuous motifs --------------------------------------------------- + +test_that(".local_fisher includes discontinuous motifs when enabled", { + d <- .make_fisher_data() + result <- immGLIPH:::.local_fisher( + motif_region = d$motif_region, refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, refseqs = d$ref_seqs, sequences = d$sequences, + motif_length = 2, kmer_mindepth = 1, lcminp = 1.0, lcminove = 0, + discontinuous_motifs = TRUE, motif_distance_cutoff = 1, + no_cores = 1, verbose = FALSE + ) + + disc <- result$all_motifs[grep("\\.", result$all_motifs$motif), ] + expect_true(nrow(disc) > 0) +}) diff --git a/tests/testthat/test-local-rrs.R b/tests/testthat/test-local-rrs.R new file mode 100644 index 0000000..1617c39 --- /dev/null +++ b/tests/testthat/test-local-rrs.R @@ -0,0 +1,213 @@ +# Tests for .local_rrs() + +# Register sequential backend for %dopar% +foreach::registerDoSEQ() + +# ---- Helper: small synthetic data ------------------------------------------ + +.make_rrs_data <- function() { + sample_seqs <- c( + "CASSLAPGATNEKLFF", "CASSLDRGEVFF", "CASSYLAGGRNTLYF", + "CASSLTGGEETQYF", "CASSLGGRETQYF", "CASSLGQAYEQYF", + "CASSFSTCSANYGYTF", "CASSPTGGYEQYF" + ) + sequences <- data.frame( + seq_ID = seq_along(sample_seqs), + CDR3b = sample_seqs, + TRBV = rep(c("TRBV5-1", "TRBV6-2"), length.out = length(sample_seqs)), + stringsAsFactors = FALSE + ) + motif_region <- substr(sample_seqs, 4, nchar(sample_seqs) - 3) + + set.seed(42) + ref_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_seqs <- unique(ref_seqs) + ref_motif_region <- substr(ref_seqs, 4, nchar(ref_seqs) - 3) + + list( + sample_seqs = sample_seqs, + sequences = sequences, + motif_region = motif_region, + ref_seqs = ref_seqs, + ref_motif_region = ref_motif_region + ) +} + +# ---- Output structure ------------------------------------------------------- + +test_that(".local_rrs returns list with sample_log, selected_motifs, all_motifs", { + d <- .make_rrs_data() + result <- immGLIPH:::.local_rrs( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + sequences = d$sequences, + motif_length = 2, + sim_depth = 5, + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = 0, + discontinuous_motifs = FALSE, + cdr3_len_stratify = FALSE, + vgene_stratify = FALSE, + no_cores = 1, + verbose = FALSE, + motif_lengths_list = list(), + ref_motif_lengths_id_list = list(), + motif_region_vgenes_list = list(), + ref_motif_vgenes_id_list = list(), + lengths_vgenes_list = list(), + ref_lengths_vgenes_list = list() + ) + + expect_type(result, "list") + expect_true("sample_log" %in% names(result)) + expect_true("selected_motifs" %in% names(result)) + expect_true("all_motifs" %in% names(result)) +}) + +# ---- sample_log structure --------------------------------------------------- + +test_that(".local_rrs sample_log has sim_depth + 1 rows", { + d <- .make_rrs_data() + sim_depth <- 5 + result <- immGLIPH:::.local_rrs( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + sequences = d$sequences, + motif_length = 2, + sim_depth = sim_depth, + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = 0, + discontinuous_motifs = FALSE, + cdr3_len_stratify = FALSE, + vgene_stratify = FALSE, + no_cores = 1, + verbose = FALSE, + motif_lengths_list = list(), + ref_motif_lengths_id_list = list(), + motif_region_vgenes_list = list(), + ref_motif_vgenes_id_list = list(), + lengths_vgenes_list = list(), + ref_lengths_vgenes_list = list() + ) + + expect_equal(nrow(result$sample_log), sim_depth + 1) + expect_equal(rownames(result$sample_log)[1], "Discovery") +}) + +# ---- all_motifs structure --------------------------------------------------- + +test_that(".local_rrs all_motifs has expected columns", { + d <- .make_rrs_data() + result <- immGLIPH:::.local_rrs( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + sequences = d$sequences, + motif_length = 2, + sim_depth = 5, + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = 0, + discontinuous_motifs = FALSE, + cdr3_len_stratify = FALSE, + vgene_stratify = FALSE, + no_cores = 1, + verbose = FALSE, + motif_lengths_list = list(), + ref_motif_lengths_id_list = list(), + motif_region_vgenes_list = list(), + ref_motif_vgenes_id_list = list(), + lengths_vgenes_list = list(), + ref_lengths_vgenes_list = list() + ) + + expected_cols <- c("motif", "counts", "num_in_ref", "avgRef", + "topRef", "OvE", "p.value") + expect_true(all(expected_cols %in% colnames(result$all_motifs))) +}) + +# ---- p-values --------------------------------------------------------------- + +test_that(".local_rrs p-values are in (0, 1]", { + d <- .make_rrs_data() + result <- immGLIPH:::.local_rrs( + motif_region = d$motif_region, + refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, + sequences = d$sequences, + motif_length = 2, + sim_depth = 10, + kmer_mindepth = 1, + lcminp = 1.0, + lcminove = 0, + discontinuous_motifs = FALSE, + cdr3_len_stratify = FALSE, + vgene_stratify = FALSE, + no_cores = 1, + verbose = FALSE, + motif_lengths_list = list(), + ref_motif_lengths_id_list = list(), + motif_region_vgenes_list = list(), + ref_motif_vgenes_id_list = list(), + lengths_vgenes_list = list(), + ref_lengths_vgenes_list = list() + ) + + pvals <- result$all_motifs$p.value + expect_true(is.numeric(pvals)) + expect_true(all(pvals > 0 & pvals <= 1)) +}) + +# ---- Filtering -------------------------------------------------------------- + +test_that(".local_rrs kmer_mindepth filters correctly", { + d <- .make_rrs_data() + + result <- immGLIPH:::.local_rrs( + motif_region = d$motif_region, refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, sequences = d$sequences, + motif_length = 2, sim_depth = 5, kmer_mindepth = 5, + lcminp = 1.0, lcminove = 0, discontinuous_motifs = FALSE, + cdr3_len_stratify = FALSE, vgene_stratify = FALSE, + no_cores = 1, verbose = FALSE, + motif_lengths_list = list(), ref_motif_lengths_id_list = list(), + motif_region_vgenes_list = list(), ref_motif_vgenes_id_list = list(), + lengths_vgenes_list = list(), ref_lengths_vgenes_list = list() + ) + + if (nrow(result$selected_motifs) > 0) { + expect_true(all(result$selected_motifs$counts >= 5)) + } +}) + +test_that(".local_rrs strict lcminp yields fewer selected motifs", { + d <- .make_rrs_data() + + run_rrs <- function(lcminp) { + immGLIPH:::.local_rrs( + motif_region = d$motif_region, refseqs_motif_region = d$ref_motif_region, + seqs = d$sample_seqs, sequences = d$sequences, + motif_length = 2, sim_depth = 5, kmer_mindepth = 1, + lcminp = lcminp, lcminove = 0, discontinuous_motifs = FALSE, + cdr3_len_stratify = FALSE, vgene_stratify = FALSE, + no_cores = 1, verbose = FALSE, + motif_lengths_list = list(), ref_motif_lengths_id_list = list(), + motif_region_vgenes_list = list(), ref_motif_vgenes_id_list = list(), + lengths_vgenes_list = list(), ref_lengths_vgenes_list = list() + ) + } + + res_strict <- run_rrs(0.001) + res_lenient <- run_rrs(1.0) + + expect_true(nrow(res_strict$selected_motifs) <= + nrow(res_lenient$selected_motifs)) +}) diff --git a/tests/testthat/test-plotNetwork.R b/tests/testthat/test-plotNetwork.R index 0470813..67b5560 100644 --- a/tests/testthat/test-plotNetwork.R +++ b/tests/testthat/test-plotNetwork.R @@ -51,3 +51,114 @@ test_that("plotNetwork requires clustering output with clusters", { ) expect_error(plotNetwork(clustering_output = mock_output), "does not contain") }) + +test_that("plotNetwork rejects empty cluster_list", { + mock_output <- list( + cluster_list = list(), + cluster_properties = data.frame(), + parameters = list(clustering_method = "GLIPH2.0") + ) + expect_error(plotNetwork(clustering_output = mock_output), "does not contain") +}) + +test_that("plotNetwork rejects non-character size_info", { + expect_error(plotNetwork(clustering_output = list(), size_info = 123), + "character") +}) + +test_that("plotNetwork rejects non-character show_additional_columns", { + expect_error( + plotNetwork(clustering_output = list(), show_additional_columns = 42), + "character" + ) +}) + +# ---- Functional tests ------------------------------------------------------- + +test_that("plotNetwork produces visNetwork object from clustering output", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + # Only run if there are clusters large enough + if (!is.null(res$cluster_properties) && + any(res$cluster_properties$cluster_size >= 2)) { + plot_obj <- plotNetwork( + clustering_output = res, + cluster_min_size = 2, + n_cores = 1 + ) + + expect_s3_class(plot_obj, "visNetwork") + } +}) + +test_that("plotNetwork with color_info = 'none' works", { + skip_on_cran() + skip_if_not_installed("immApex") + skip_if(!exists("calculateMotif", asNamespace("immApex")), + "immApex::calculateMotif not available") + + utils::data("gliph_input_data", package = "immGLIPH") + small_data <- gliph_input_data[seq_len(50), ] + + set.seed(42) + extra_seqs <- vapply(seq_len(200), function(i) { + paste0("C", paste0(sample(LETTERS[c(1, 3:9, 11:14, 16:20, 23, 25)], + sample(8:14, 1), replace = TRUE), + collapse = ""), "F") + }, character(1)) + ref_df <- data.frame( + CDR3b = extra_seqs, + TRBV = sample(c("TRBV5-1", "TRBV6-2", "TRBV7-2"), 200, replace = TRUE), + stringsAsFactors = FALSE + ) + ref_df <- ref_df[!duplicated(ref_df$CDR3b), ] + + res <- runGLIPH( + cdr3_sequences = small_data, + method = "gliph1", + refdb_beta = ref_df, + sim_depth = 10, + n_cores = 1, + verbose = FALSE + ) + + if (!is.null(res$cluster_properties) && + any(res$cluster_properties$cluster_size >= 2)) { + plot_obj <- plotNetwork( + clustering_output = res, + color_info = "none", + cluster_min_size = 2, + n_cores = 1 + ) + + expect_s3_class(plot_obj, "visNetwork") + } +}) diff --git a/tests/testthat/test-runGLIPH.R b/tests/testthat/test-runGLIPH.R index 9037bc3..1ee6b2a 100644 --- a/tests/testthat/test-runGLIPH.R +++ b/tests/testthat/test-runGLIPH.R @@ -84,6 +84,66 @@ test_that(".validate_params validates logical parameters", { "must be logical") }) +test_that(".validate_params rejects bad n_cores", { + expect_error(immGLIPH:::.validate_params(n_cores = 0), "n_cores") + expect_error(immGLIPH:::.validate_params(n_cores = -1), "n_cores") + expect_error(immGLIPH:::.validate_params(n_cores = "abc"), "n_cores") +}) + +test_that(".validate_params accepts NULL n_cores", { + result <- immGLIPH:::.validate_params(n_cores = NULL) + expect_null(result$n_cores) +}) + +test_that(".validate_params rejects bad motif_length", { + expect_error(immGLIPH:::.validate_params(motif_length = "abc"), + "motif_length") + expect_error(immGLIPH:::.validate_params(motif_length = 0), + "motif_length") + expect_error(immGLIPH:::.validate_params(motif_length = -1), + "motif_length") +}) + +test_that(".validate_params rounds motif_length", { + # Must also adjust lcminove to match motif_length length + result <- immGLIPH:::.validate_params(motif_length = c(2.7, 3.2), + lcminove = c(100, 10)) + expect_equal(result$motif_length, c(3, 3)) +}) + +test_that(".validate_params rejects bad cluster_min_size", { + expect_error(immGLIPH:::.validate_params(cluster_min_size = "abc"), + "cluster_min_size") + expect_error(immGLIPH:::.validate_params(cluster_min_size = 0), + "cluster_min_size") +}) + +test_that(".validate_params rejects bad boundary_size", { + expect_error(immGLIPH:::.validate_params(boundary_size = "abc"), + "boundary_size") + expect_error(immGLIPH:::.validate_params(boundary_size = -1), + "boundary_size") + # 0 is valid (>= 0) + result <- immGLIPH:::.validate_params(boundary_size = 0) + expect_equal(result$boundary_size, 0) +}) + +test_that(".validate_params validates ref_cluster_size", { + expect_error(immGLIPH:::.validate_params(ref_cluster_size = "invalid"), + "ref_cluster_size") + result <- immGLIPH:::.validate_params(ref_cluster_size = "original") + expect_equal(result$ref_cluster_size, "original") + result2 <- immGLIPH:::.validate_params(ref_cluster_size = "simulated") + expect_equal(result2$ref_cluster_size, "simulated") +}) + +test_that(".validate_params validates gccutoff", { + expect_error(immGLIPH:::.validate_params(gccutoff = -1), "gccutoff") + expect_error(immGLIPH:::.validate_params(gccutoff = "abc"), "gccutoff") + result <- immGLIPH:::.validate_params(gccutoff = NULL) + expect_null(result$gccutoff) +}) + # ---- Input validation -------------------------------------------------------- test_that("runGLIPH rejects invalid method", {