From 13c5af29a4eb6f6e59416986070c65fbb5fb7232 Mon Sep 17 00:00:00 2001 From: Philip Lijnzaad Date: Fri, 16 Feb 2018 15:06:34 +0100 Subject: [PATCH 01/11] added (back) coordinates for chrM --- inst/extdata/UCSC.HG38.Human.CytoBandIdeogram.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/extdata/UCSC.HG38.Human.CytoBandIdeogram.txt b/inst/extdata/UCSC.HG38.Human.CytoBandIdeogram.txt index 82abd9a..8da32e0 100644 --- a/inst/extdata/UCSC.HG38.Human.CytoBandIdeogram.txt +++ b/inst/extdata/UCSC.HG38.Human.CytoBandIdeogram.txt @@ -859,4 +859,5 @@ chrY 12400000 17100000 q11.221 gpos50 chrY 17100000 19600000 q11.222 gneg chrY 19600000 23800000 q11.223 gpos50 chrY 23800000 26600000 q11.23 gneg -chrY 26600000 57227415 q12 gvar \ No newline at end of file +chrY 26600000 57227415 q12 gvar +chrM 0 16569 m0 gneg From f43e76f65d0624a38703df0ea5951707db83c51d Mon Sep 17 00:00:00 2001 From: Philip Lijnzaad Date: Fri, 16 Feb 2018 15:57:00 +0100 Subject: [PATCH 02/11] don't leave out the mitochrondria leave this up to end user (e.g. MT-chr3 fusions display fine) --- R/plotCircle.R | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/R/plotCircle.R b/R/plotCircle.R index fc72d5f..9504cda 100644 --- a/R/plotCircle.R +++ b/R/plotCircle.R @@ -180,18 +180,18 @@ plotCircle <- function(fusionList) { stop("Invalid input. genomeVersion must be either \"hg19\", \"hg38\" or \"mm10\".") } - if (any(rapply(fusionList, function(fusion) fusion@geneA@chromosome == "chrM" || fusion@geneB@chromosome == "chrM"))) { - indexesMitochondrialGenes <- - rapply(fusionList, function(fusion) fusion@geneA@chromosome == "chrM" || fusion@geneB@chromosome == "chrM") - fusionList <- fusionList[!indexesMitochondrialGenes] - message( - paste0( - "Removing ", - length(fusionList[indexesMitochondrialGenes]), - " fusions involving mitochondrial genes as they cannot be plotted in the circle plot." - ) - ) - } +## if (any(rapply(fusionList, function(fusion) fusion@geneA@chromosome == "chrM" || fusion@geneB@chromosome == "chrM"))) { +## indexesMitochondrialGenes <- +## rapply(fusionList, function(fusion) fusion@geneA@chromosome == "chrM" || fusion@geneB@chromosome == "chrM") +## fusionList <- fusionList[!indexesMitochondrialGenes] +## message( +## paste0( +## "Removing ", +## length(fusionList[indexesMitochondrialGenes]), +## " fusions involving mitochondrial genes as they cannot be plotted in the circle plot." +## ) +## ) +## } # Read cytoband information depending on genome version if (fusionList[[1]]@genomeVersion == "hg19") { From b7e605efaf33ad1523f188b532f47a6d4a27a6de Mon Sep 17 00:00:00 2001 From: Philip Lijnzaad Date: Fri, 16 Feb 2018 16:16:12 +0100 Subject: [PATCH 03/11] report number of txpts per type (cherry picked from commit f17c89decb83c2bc97811231b97f546672767d03) --- R/utilities.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 60cd956..f07dc9e 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -847,15 +847,17 @@ selectTranscript <- function( # If the user has chosen one of the four transcript categories, then we want to check whether or not such # transcripts exist. If they exist, simply return them. If they don't exist, go on to try the other categories. Try - # the wanted category first: - if (length(genePartner@transcripts[mcols(genePartner@transcripts)$transcriptCategory == whichTranscripts[[1]] ]) > 0) { - message(paste0("..found transcripts of type ", whichTranscripts[[1]])) + # the wanted category first: + l <- length(genePartner@transcripts[mcols(genePartner@transcripts)$transcriptCategory == whichTranscripts[[1]] ]) + if (l > 0) { + message(paste0("... found ", l, " transcripts of type ", whichTranscripts[[1]])) return(genePartner@transcripts[mcols(genePartner@transcripts)$transcriptCategory == whichTranscripts[[1]] ]) } # Check the remaining categories for(transcriptCategory in transcriptCategories[transcriptCategories != whichTranscripts[[1]]]) { - if (length(genePartner@transcripts[mcols(genePartner@transcripts)$transcriptCategory == transcriptCategory ]) > 0) { - message(paste0("..found transcripts of type ", transcriptCategory)) + l <- length(genePartner@transcripts[mcols(genePartner@transcripts)$transcriptCategory == transcriptCategory ]) + if (l > 0) { + message(paste0(" ... found ",l, " transcripts of type ", transcriptCategory)) return(genePartner@transcripts[mcols(genePartner@transcripts)$transcriptCategory == transcriptCategory ]) } } From 3fb780c2fddc3a9c49c3e3495b69e544cce32af0 Mon Sep 17 00:00:00 2001 From: Philip Lijnzaad Date: Fri, 16 Feb 2018 17:24:45 +0100 Subject: [PATCH 04/11] added check on fusionJunctionSequenceLength (cherry picked from commit 8768ce180b2c7b324c980940e4457f7be0fa6835) --- R/utilities.R | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index f07dc9e..13b8057 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1156,13 +1156,19 @@ is.nucleotideAmount.valid <- function(argument_checker, nucleotideAmount, fusion length(fusion@geneA@junctionSequence) + length(fusion@geneB@junctionSequence) + if (fusionJunctionSequenceLength == 0 ) { + ArgumentCheck::addError( + msg = "length of junction sequence is 0, have they been determined?", + argcheck = argument_checker + ) + } + if (class(nucleotideAmount) != "numeric" || nucleotideAmount <= 0 || nucleotideAmount > fusionJunctionSequenceLength) { ArgumentCheck::addError( - msg = paste0("'nucleotideAmount' must be a numeric bigger than or equal ", - "to 0 and less than or equal to the fusion junction ", - "sequence length."), + msg = paste0("'nucleotideAmount' must be a numeric larger > 0 ", + "and <= fusion junction sequence length."), argcheck = argument_checker ) } From b3304409b60533f3d4dfdf9fa792a35db65a079e Mon Sep 17 00:00:00 2001 From: Philip Lijnzaad Date: Tue, 20 Feb 2018 12:38:56 +0100 Subject: [PATCH 05/11] start of plotFusionReadsSimple --- R/plotFusionReads.R | 174 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 173 insertions(+), 1 deletion(-) diff --git a/R/plotFusionReads.R b/R/plotFusionReads.R index dae8983..ad3a206 100644 --- a/R/plotFusionReads.R +++ b/R/plotFusionReads.R @@ -229,7 +229,179 @@ plotFusionReads <- function( grid::popViewport(1) } -} +} #plotFusionReads + +plotFusionReadsSimple <- function(fusion) { +### .validatePlotFusionReadsParams(fusion, showAllNucleotides, nucleotideAmount) + + # Create Biostrings::DNAStringSet of fusion sequence + fusionSequence <- Biostrings::DNAStringSet( + x = c(fusion@geneA@junctionSequence, + fusion@geneB@junctionSequence)) + names(fusionSequence) <- "chrNA" + # And also of a "zoomed-in" version only displaying some nucleotides on each + # end of the fusion junction + fusionSequenceShort <- Biostrings::DNAStringSet( + x = c( + substr( + fusion@geneA@junctionSequence, + nchar(fusion@geneA@junctionSequence)-nucleotideAmount+1, + nchar(fusion@geneA@junctionSequence)), + substr( + fusion@geneB@junctionSequence, + nchar(fusion@geneB@junctionSequence)-nucleotideAmount+1 -1, + # Added -1 again on the line above because Gviz does not show the last + # nucleotide. Ref: http://permalink.gmane.org/gmane.science.biology.informatics.conductor.devel/5726 + # Or if that link is down, google + # "Gviz doesn't provide enough horizontial space" + nchar(fusion@geneB@junctionSequence)))) + names(fusionSequenceShort) <- "chrNA" + + # Load fusion sequences to tracks + ref_chimera_short <- Gviz::SequenceTrack( + fusionSequenceShort, + genome = fusion@genomeVersion, + name = "Fusion Sequence") + ref_chimera <- Gviz::SequenceTrack( + fusionSequence, + genome = fusion@genomeVersion, + name = "Fusion Sequence") + + # DISPLAY PARAMETERS + + # Set display parameters for the fusion sequence + displayParameters <- list( + showTitle = FALSE, + col = "blue" # The color of the line when no indiviual letters can be plotted due to size limitations. + ) + Gviz::displayPars(ref_chimera) <- displayParameters + Gviz::displayPars(ref_chimera_short) <- displayParameters + + # Set display parameters for the fusion reads + Gviz::displayPars(fusion@fusionReadsAlignment) <- list( + showTitle = FALSE, + showMismatches = FALSE # Show mismatched reads? + ) + + # Do some plotting + + if (showAllNucleotides) { + # Create the grid we're gonna use for the plot + nf <- grid::grid.layout( + nrow = 1, + ncol = 1) + + # Open a new page to plot it + grid::grid.newpage() + + # Divide the page according to the grid we created + grid::pushViewport(grid::viewport(layout = nf)) + + # Open row 1, column 1 + grid::pushViewport(grid::viewport(layout.pos.row = 1, layout.pos.col = 1)) + # Plot the fusion read alignment + Gviz::plotTracks( + c(fusion@fusionReadsAlignment, ref_chimera), + from = 1, + to = nchar(fusionSequence), + chromosome = fusion@fusionReadsAlignment@chromosome, + type = "pileup", + sizes = c(6, 1), + add = TRUE + ) + # Calculate where in the alignment the breakpoint is + breakpoint <- nchar(fusion@geneA@junctionSequence)/nchar(fusionSequence) + # Plot line indicating fusion breakpoint + grid::grid.lines(x = grid::unit(breakpoint, "npc"), + y = grid::unit(c(0, 1), "npc"), + default.units = "npc", + arrow = NULL, + name = NULL, + gp = grid::gpar( + col = "black", + lty = "dotted"), + draw = TRUE, + vp = NULL) + # Close row 1, column 1 + grid::popViewport(1) + } else { + # Create the grid we're gonna use for the plot + nf <- grid::grid.layout( + nrow = 2, + ncol = 1, + heights = grid::unit(c(6, 1), "null"), + widths = grid::unit(c(1, 1), "null")) + + # Open a new page to plot it + grid::grid.newpage() + + # Divide the page according to the grid we created + grid::pushViewport(grid::viewport(layout = nf)) + + # Open row 1, column 1 + grid::pushViewport(grid::viewport(layout.pos.row = 1, layout.pos.col = 1)) + # Plot the fusion read alignment + Gviz::plotTracks( + fusion@fusionReadsAlignment, + from = 1, + to = nchar(fusionSequence), + chromosome = fusion@fusionReadsAlignment@chromosome, + type = "pileup", + add = TRUE + ) + # Calculate where in the alignment the breakpoint is + breakpoint <- nchar(fusion@geneA@junctionSequence)/nchar(fusionSequence) + # Plot line indicating fusion breakpoint + grid::grid.lines(x = grid::unit(breakpoint, "npc"), + y = grid::unit(c(0, 1), "npc"), + default.units = "npc", + arrow = NULL, + name = NULL, + gp = grid::gpar( + col = "black", + lty = "dotted"), + draw = TRUE, + vp = NULL) + # Close row 1, column 1 + grid::popViewport(1) + + # Open row 2, column 1 + grid::pushViewport(grid::viewport(layout.pos.row = 2, layout.pos.col = 1)) + # Plot a part of the fusion junction sequence + Gviz::plotTracks( + ref_chimera_short, + from = 1, + to = nchar(fusionSequenceShort), + chromosome = fusion@fusionReadsAlignment@chromosome, + add = TRUE + ) + # Plot line indicating fusion breakpoint + grid::grid.lines(x = grid::unit(0.5, "npc"), + y = grid::unit(c(0, 1), "npc"), + default.units = "npc", + arrow = NULL, + name = NULL, + gp = grid::gpar( + col = "black", + lty = "dotted"), + draw = TRUE, + vp = NULL) + # Plot line between lines + grid::grid.lines(x = grid::unit(c(0.5, breakpoint), "npc"), + y = grid::unit(1, "npc"), + default.units = "npc", + arrow = NULL, + name = NULL, + gp = grid::gpar( + col = "black", + lty = "dotted"), + draw = TRUE, + vp = NULL) + # Close row 1, column 2 + grid::popViewport(1) + } + +} #plotFusionReadsSimple .validatePlotFusionReadsParams <- function( fusion, From 134d8e73b6d2478db05d1cbfe3b8f27892c8a207 Mon Sep 17 00:00:00 2001 From: Philip Lijnzaad Date: Tue, 20 Feb 2018 12:40:36 +0100 Subject: [PATCH 06/11] arg check (for now) warning, not dying on missing reads --- R/utilities.R | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 60cd956..ef57e33 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1154,13 +1154,19 @@ is.nucleotideAmount.valid <- function(argument_checker, nucleotideAmount, fusion length(fusion@geneA@junctionSequence) + length(fusion@geneB@junctionSequence) + if (fusionJunctionSequenceLength == 0 ) { + ArgumentCheck::addWarning( + msg = "length of junction sequence is 0, have they been determined?", + argcheck = argument_checker + ) + } + if (class(nucleotideAmount) != "numeric" || nucleotideAmount <= 0 || nucleotideAmount > fusionJunctionSequenceLength) { - ArgumentCheck::addError( - msg = paste0("'nucleotideAmount' must be a numeric bigger than or equal ", - "to 0 and less than or equal to the fusion junction ", - "sequence length."), + ArgumentCheck::addWarning( + msg = paste0("'nucleotideAmount' must be a numeric larger > 0 ", + "and <= fusion junction sequence length."), argcheck = argument_checker ) } From 541da57f8967e4d111d153d8615cde8a0f770372 Mon Sep 17 00:00:00 2001 From: Philip Lijnzaad Date: Tue, 20 Feb 2018 13:44:00 +0100 Subject: [PATCH 07/11] now also setting fusion@inframe ... ... but only if column PROT_FUSION_TYPE is available (i.e. when reading star-fusion.fusion_predictions.abridged.annotated.coding_effect.tsv ) If the (default) "star-fusion.fusion_predictions.abridged.tsv" file is used, inframe remains NA (and a warning is issued) now also setting fusion@inframe ... --- R/importStarfusion.R | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/R/importStarfusion.R b/R/importStarfusion.R index 10ff00e..bd73c02 100644 --- a/R/importStarfusion.R +++ b/R/importStarfusion.R @@ -52,7 +52,8 @@ importStarfusion <- function (filename, genomeVersion, limit) { "LeftBreakEntropy" = col_number(), "RightBreakDinuc" = col_character(), "RightBreakEntropy" = col_number(), - "FFPM" = col_number() + "FFPM" = col_number(), + "PROT_FUSION_TYPE" = col_character() ) if (missing(limit)) { # Read all lines @@ -103,6 +104,17 @@ importStarfusion <- function (filename, genomeVersion, limit) { fusionToolSpecificData[["RightBreakEntropy"]] = report[[i, "RightBreakEntropy"]] fusionToolSpecificData[["FFPM"]] = report[[i, "FFPM"]] + if(!is.null(report$PROT_FUSION_TYPE)) { + ## only available when loading from coding_effect file (i.e. + ## star-fusion.fusion_predictions.abridged.annotated.coding_effect.tsv) + ft <- report[[i, "PROT_FUSION_TYPE"]] + fusionToolSpecificData[["inframe"]] <- ft + if (ft == 'INFRAME') + inframe <- TRUE + if (ft == 'FRAMESHIFT') + inframe <- FALSE + } + # id for this fusion id <- as.character(i) From 99a78a344fddeefffe37ab64dd4cf3644b87af79 Mon Sep 17 00:00:00 2001 From: Philip Lijnzaad Date: Mon, 26 Feb 2018 16:11:21 +0100 Subject: [PATCH 08/11] keep '#FusionName' column in fusionToolSpecificData --- R/importStarfusion.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/importStarfusion.R b/R/importStarfusion.R index bd73c02..454a35b 100644 --- a/R/importStarfusion.R +++ b/R/importStarfusion.R @@ -39,7 +39,8 @@ importStarfusion <- function (filename, genomeVersion, limit) { report <- withCallingHandlers( { col_types_starfusion = readr::cols_only( - "#FusionName" = col_skip(), +## "#FusionName" = col_skip(), + "#FusionName" = col_character(), "JunctionReadCount" = col_integer(), "SpanningFragCount" = col_integer(), "SpliceType" = col_skip(), @@ -97,6 +98,7 @@ importStarfusion <- function (filename, genomeVersion, limit) { # Import starfusion-specific fields fusionToolSpecificData <- list() + fusionToolSpecificData[["FusionName"]] = report[[i, "#FusionName"]] fusionToolSpecificData[["LargeAnchorSupport"]] = report[[i, "LargeAnchorSupport"]] fusionToolSpecificData[["LeftBreakDinuc"]] = report[[i, "LeftBreakDinuc"]] fusionToolSpecificData[["LeftBreakEntropy"]] = report[[i, "LeftBreakEntropy"]] From 78799c21611cd38a05b4cc320b4d0eae8f2cafaa Mon Sep 17 00:00:00 2001 From: Philip Lijnzaad Date: Mon, 26 Feb 2018 20:51:53 +0100 Subject: [PATCH 09/11] added author and maintainer field ... ... to make R CMD check shut up --- DESCRIPTION | 2 ++ 1 file changed, 2 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 0695c58..727272a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,6 +2,8 @@ Package: chimeraviz Type: Package Title: Visualization tools for gene fusions Version: 1.5.4 +Author: Stian Lågstad +Maintainer: Stian Lågstad Authors@R: c( person( given = "Stian", From 1ea0bed92ed967a448aac164984a060eafd894c3 Mon Sep 17 00:00:00 2001 From: Philip Lijnzaad Date: Mon, 26 Feb 2018 16:05:56 +0100 Subject: [PATCH 10/11] chrNA can now be overriden (needed for FI) --- R/utilities.R | 4 ++-- man/addFusionReadsAlignment.Rd | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 2ba5253..5c95e91 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -701,7 +701,7 @@ getTranscriptsEnsembldb <- function(fusion, edb) { #' fusion <- addFusionReadsAlignment(fusion, bamfile5267) #' #' @export -addFusionReadsAlignment <- function(fusion, bamfile) { +addFusionReadsAlignment <- function(fusion, bamfile, chromosome="chrNA") { # Check if we got a fusion object if (class(fusion) != "Fusion") { @@ -713,7 +713,7 @@ addFusionReadsAlignment <- function(fusion, bamfile) { isPaired = TRUE, # Setting chromosome to chrNA because this is a fusion sequence not found in # any reference genome. - chromosome = "chrNA", + chromosome = chromosome, name="Fusion Reads", genome = fusion@genomeVersion) diff --git a/man/addFusionReadsAlignment.Rd b/man/addFusionReadsAlignment.Rd index 2855213..2b2b02d 100644 --- a/man/addFusionReadsAlignment.Rd +++ b/man/addFusionReadsAlignment.Rd @@ -4,7 +4,7 @@ \alias{addFusionReadsAlignment} \title{Add fusion reads alignment to fusion object} \usage{ -addFusionReadsAlignment(fusion, bamfile) +addFusionReadsAlignment(fusion, bamfile, chromosome = "chrNA") } \arguments{ \item{fusion}{The fusion object to add a genomic alignment to.} From dfdbbcebde28388fa7411fdb29a94a764c7c10cb Mon Sep 17 00:00:00 2001 From: Philip Lijnzaad Date: Tue, 27 Feb 2018 12:40:38 +0100 Subject: [PATCH 11/11] limit=Inf as default for importStarFusion --- R/importStarfusion.R | 3 +-- man/importStarfusion.Rd | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/R/importStarfusion.R b/R/importStarfusion.R index 454a35b..91a5be9 100644 --- a/R/importStarfusion.R +++ b/R/importStarfusion.R @@ -20,8 +20,7 @@ #' # This should import a list of 3 fusions described in Fusion objects. #' #' @export -importStarfusion <- function (filename, genomeVersion, limit) { - +importStarfusion <- function (filename, genomeVersion, limit=Inf) { # Is the genome version valid? validGenomes <- c("hg19", "hg38", "mm10") if (is.na(match(tolower(genomeVersion), tolower(validGenomes)))) { diff --git a/man/importStarfusion.Rd b/man/importStarfusion.Rd index 3f225ae..ebaa697 100644 --- a/man/importStarfusion.Rd +++ b/man/importStarfusion.Rd @@ -4,7 +4,7 @@ \alias{importStarfusion} \title{Import results from a STAR-Fusion run into a list of Fusion objects.} \usage{ -importStarfusion(filename, genomeVersion, limit) +importStarfusion(filename, genomeVersion, limit = Inf) } \arguments{ \item{filename}{Filename for the STAR-Fusion