diff --git a/R/functions_CHIPIN.R b/R/functions_CHIPIN.R index 14f4135..340900e 100644 --- a/R/functions_CHIPIN.R +++ b/R/functions_CHIPIN.R @@ -403,7 +403,7 @@ find_gene_not_moving <- function(RPKM, raw_read_count, sample_name, output_dir, ####### #run deeptools using genes not moving determined by find_gene_not_moving or supplied by the user then perform linear normalization with non zero intercept ####### -linear_normalization <- function(constant_genes_file, path_to_bw, beforeRegionStartLength, afterRegionStartLength, regionBodyLength, binSize, output_dir){ +linear_normalization <- function(constant_genes_file, path_to_bw, beforeRegionStartLength, afterRegionStartLength, regionBodyLength, binSize, output_dir, nThreads){ cat("\n") cat("*****************************************") cat("\n") @@ -460,7 +460,7 @@ linear_normalization <- function(constant_genes_file, path_to_bw, beforeRegionSt cat("\n") cat("Run deeptools for reference sample") cat("\n") - system(paste("computeMatrix scale-regions -S", bw1, "-R", constant_genes_file, "--beforeRegionStartLength", beforeRegionStartLength, "--afterRegionStartLength", afterRegionStartLength, "--regionBodyLength", regionBodyLength, "--binSize", binSize, "-o", output_name)) + system(paste("computeMatrix scale-regions -S", bw1, "-R", constant_genes_file, "--beforeRegionStartLength", beforeRegionStartLength, "--afterRegionStartLength", afterRegionStartLength, "--regionBodyLength", regionBodyLength, "--binSize", binSize, "-o", output_name, "-p", nThreads)) matrix <- read.delim(output_name, header=FALSE, comment.char="@") val=data.frame(matrix[,7:ncol(matrix)]) moyC1=apply(val, 2, function(x){return(mean(na.rm=TRUE, as.numeric(as.character(unlist(x)))))}) @@ -483,7 +483,7 @@ linear_normalization <- function(constant_genes_file, path_to_bw, beforeRegionSt cat(sample_name) cat("\n") - system(paste("computeMatrix scale-regions -S", bw1, "-R", constant_genes_file, "--beforeRegionStartLength", beforeRegionStartLength, "--afterRegionStartLength", afterRegionStartLength, "--regionBodyLength", regionBodyLength, "--binSize", binSize, "-o", output_name)) + system(paste("computeMatrix scale-regions -S", bw1, "-R", constant_genes_file, "--beforeRegionStartLength", beforeRegionStartLength, "--afterRegionStartLength", afterRegionStartLength, "--regionBodyLength", regionBodyLength, "--binSize", binSize, "-o", output_name, "-p", nThreads)) cat("deeptools done") #Now all matrices are computed. @@ -542,7 +542,7 @@ linear_normalization <- function(constant_genes_file, path_to_bw, beforeRegionSt ####### #run deeptools using genes not moving determined by find_gene_not_moving or supplied by the user then perform quantile normalization ####### -quantile_norm <- function(path_to_bw, constant_genes_file, nGroup, output_folder, beforeRegionStartLength, afterRegionStartLength, regionBodyLength, binSize){ +quantile_norm <- function(path_to_bw, constant_genes_file, nGroup, output_folder, beforeRegionStartLength, afterRegionStartLength, regionBodyLength, binSize, nThreads){ cat("\n") cat("*****************************************") cat("\n") @@ -565,7 +565,7 @@ quantile_norm <- function(path_to_bw, constant_genes_file, nGroup, output_folder cat("\n") cat(sample_name) cat("\n") - system(paste("computeMatrix scale-regions -S", current_bw, "-R", constant_genes_file, "--beforeRegionStartLength", beforeRegionStartLength, "--afterRegionStartLength", afterRegionStartLength, "--regionBodyLength", regionBodyLength, "--binSize", binSize, "-o", output_name)) + system(paste("computeMatrix scale-regions -S", current_bw, "-R", constant_genes_file, "--beforeRegionStartLength", beforeRegionStartLength, "--afterRegionStartLength", afterRegionStartLength, "--regionBodyLength", regionBodyLength, "--binSize", binSize, "-o", output_name, "-p", nThreads)) matrix <- read.delim(output_name, header=FALSE, comment.char="@") current_values=unlist(matrix[,7:ncol(matrix)], use.names=FALSE) current_values[which(is.nan(current_values)==TRUE)]=0 @@ -697,25 +697,28 @@ plot_after_quantile<-function(path_to_bw, output_folder, path_to_file_with_const sample_nametmp2_ref=sample_nametmp1_ref[length(sample_nametmp1_ref)] sample_name_ref_tmp=paste(c(strsplit(sample_nametmp2_ref, "[.]")[[1]][1:length(strsplit(sample_nametmp2_ref, "[.]")[[1]])-1]), collapse="") #sample_name_ref_tmp=strsplit(sample_nametmp2_ref, ".bw")[[1]] - sample_name_ref_tpm=strsplit(sample_name_ref_tmp, "_")[[1]][2] - if(is.na(sample_name_ref_tpm)){sample_name_ref_tpm=sample_name_ref_tmp} + #sample_name_ref_tpm=strsplit(sample_name_ref_tmp, "_")[[1]][2] + #if(is.na(sample_name_ref_tpm)){sample_name_ref_tpm=sample_name_ref_tmp} #sample_name_ref=paste(sample_name_ref_tpm[1], sample_name_ref_tpm[2], sep="_") - mylegend=c(mylegend, sample_name_ref_tpm) + mylegend=c(mylegend, sample_name_ref_tmp) + #mylegend=c(mylegend, sample_name_ref_tpm) } for (k in 1:length(bw_current)){ if (i!= nbGraph){ if ((i != 1) & (k==1)){ #The first sample in legend is always the reference - mylegend=c(mylegend, sample_name_ref_tpm) + mylegend=c(mylegend, sample_name_ref_tmp) } sample_nametmp1=strsplit(bw_current[k], "/")[[1]] sample_nametmp2=sample_nametmp1[length(sample_nametmp1)] sample_name=paste(c(strsplit(sample_nametmp2, "[.]")[[1]][1:length(strsplit(sample_nametmp2, "[.]")[[1]])-1]), collapse="") + sample_name=strsplit(sample_nametmp2, split = "[.]")[[1]][1] #sample_name=strsplit(sample_nametmp2, ".bw")[[1]] - sample_name_final=strsplit(sample_name, "_")[[1]][2] - if (is.na(sample_name_final)){sample_name_final=sample_name} - mylegend=c(mylegend, sample_name_final) + #sample_name_final=strsplit(sample_name, "_")[[1]][2] + #if (is.na(sample_name_final)){sample_name_final=sample_name} + mylegend=c(mylegend, sample_name) + #mylegend=c(mylegend, sample_name_final) #sample_name_final=paste(sample_name_tmp[1], sample_name_tmp[2], sep="_") @@ -760,7 +763,7 @@ plot_after_quantile<-function(path_to_bw, output_folder, path_to_file_with_const mylegend=c() if ((nResteToPlot-4)<=0){ listToPlot=c(listToPlot, getDatabw_woRemoveNoise(as.character(unlist(bw_ref)))) - mylegend=c(mylegend, sample_name_ref_tpm) + mylegend=c(mylegend, sample_name_ref_tmp) for (j in 1:nResteToPlot){ listToPlot=c(listToPlot, getDatabw_woRemoveNoise(path_to_bw[(nPlotted+j)])) @@ -769,14 +772,15 @@ plot_after_quantile<-function(path_to_bw, output_folder, path_to_file_with_const sample_name=paste(c(strsplit(sample_nametmp2, "[.]")[[1]][1:length(strsplit(sample_nametmp2, "[.]")[[1]])-1]), collapse="") #sample_name=strsplit(sample_nametmp2, ".bw")[[1]] - sample_name_final=strsplit(sample_name, "_")[[1]][2] - if(is.na(sample_name_final)){sample_name_final=sample_name} + #sample_name_final=strsplit(sample_name, "_")[[1]][2] + #if(is.na(sample_name_final)){sample_name_final=sample_name} - mylegend=c(mylegend, sample_name_final) + #mylegend=c(mylegend, sample_name_final) + mylegend=c(mylegend, sample_name) } }else if (nResteToPlot-4>0){ listToPlot=c(listToPlot, getDatabw_woRemoveNoise(as.character(unlist(bw_ref)))) - mylegend=c(mylegend, sample_name_ref_tpm) + mylegend=c(mylegend, sample_name_ref_tmp) for (j in 1:nResteToPlot){ listToPlot=c(listToPlot, getDatabw_woRemoveNoise(path_to_bw[(nPlotted+j)])) @@ -786,10 +790,11 @@ plot_after_quantile<-function(path_to_bw, output_folder, path_to_file_with_const sample_name=paste(c(strsplit(sample_nametmp2, "[.]")[[1]][1:length(strsplit(sample_nametmp2, "[.]")[[1]])-1]), collapse="") #sample_name=strsplit(sample_nametmp2, ".bw")[[1]] - sample_name_final=strsplit(sample_name, "_")[[1]][2] - if(is.na(sample_name_final)){sample_name_final=sample_name} + #sample_name_final=strsplit(sample_name, "_")[[1]][2] + #if(is.na(sample_name_final)){sample_name_final=sample_name} #sample_name_final=paste(sample_name_tmp[1], sample_name_tmp[2], sep="_") - mylegend=c(mylegend, sample_name_final) + mylegend=c(mylegend, sample_name) + #mylegend=c(mylegend, sample_name_final) } } @@ -826,13 +831,15 @@ plot_after_quantile<-function(path_to_bw, output_folder, path_to_file_with_const sample_nametmp2=sample_nametmp1[length(sample_nametmp1)] sample_name=paste(c(strsplit(sample_nametmp2, "[.]")[[1]][1:length(strsplit(sample_nametmp2, "[.]")[[1]])-1]), collapse="") + sample_name= strsplit(sample_nametmp2, split = "[.]")[[1]][1] + #sample_name=strsplit(sample_nametmp2, ".bw")[[1]] - sample_name_final=strsplit(sample_name, "_")[[1]][2] - if(is.na(sample_name_final)){sample_name_final=sample_name} + #sample_name_final=strsplit(sample_name, "_")[[1]][2] + #if(is.na(sample_name_final)){sample_name_final=sample_name} #sample_name_final=paste(sample_name_tmp[1], sample_name_tmp[2], sep="_") - mylegend=c(mylegend, sample_name_final) + mylegend=c(mylegend, sample_name) } #Given the number of samples to plot we choose the appropriate function if (nSamples==2){ @@ -919,7 +926,9 @@ plot_after_linear<-function(path_to_bw, output_folder, path_to_file_with_constan sample_name=paste(c(strsplit(sample_nametmp1, "[.]")[[1]][1:length(strsplit(sample_nametmp1, "[.]")[[1]])-1]), collapse="") #sample_name=strsplit(sample_nametmp2, ".bw")[[1]] - + + sample_name= strsplit(sample_nametmp2, split = "[.]")[[1]][1] + mylegend=c(mylegend, sample_name) #sample_name_final=strsplit(sample_name, "_")[[1]][2] #sample_name_final=paste(sample_name_tmp[1], sample_name_tmp[2], sep="_") @@ -951,7 +960,13 @@ plot_after_linear<-function(path_to_bw, output_folder, path_to_file_with_constan # bw4=getDatabw_woRemoveNoise(as.character(unlist(bw_current[4]))) # bw5=getDatabw_woRemoveNoise(as.character(unlist(bw_current[5]))) } - + + #TEST + print("\n") + print(mylegend) + print("\n") + #TEST + p=plot_before_afternorm_several(bw1, bw2, bw3, bw4, bw5, NotMoving, mylegend, seq(-4000, 4000, by=step), "After normalization", 4000, step, c("indianred4", "steelblue4", "darkorchid3", "forestgreen", "lightsalmon2"), "Distance from TSS [bp]", paste("Average density of", histone_mark, sep=" ")) DF_after=list.append(fill_statsAfter(bw1, bw2, bw3, bw4, bw5, NotMoving, mylegend, seq(-4000, 4000, by=step), "After normalization", 4000, step, c("indianred4", "steelblue4", "darkorchid3", "forestgreen", "lightsalmon2"), "Distance from TSS [bp]", paste("Average density of", histone_mark, sep=" "), DF_after, ref)) plist=list.append(plist, p) @@ -976,10 +991,10 @@ plot_after_linear<-function(path_to_bw, output_folder, path_to_file_with_constan sample_nametmp2=sample_nametmp1[length(sample_nametmp1)] sample_name=paste(c(strsplit(sample_nametmp2, "[.]")[[1]][1:length(strsplit(sample_nametmp2, "[.]")[[1]])-1]), collapse="") + sample_name= strsplit(sample_nametmp2, split = "[.]")[[1]][1] #sample_name=strsplit(sample_nametmp2, ".bw")[[1]] mylegend=c(mylegend, sample_name) - print(mylegend) } }else if (nResteToPlot-4>0){ listToPlot=c(listToPlot, getDatabw_woRemoveNoise(bw_ref)) @@ -991,12 +1006,20 @@ plot_after_linear<-function(path_to_bw, output_folder, path_to_file_with_constan sample_nametmp2=sample_nametmp1[length(sample_nametmp1)] sample_name=paste(c(strsplit(sample_nametmp2, "[.]")[[1]][1:length(strsplit(sample_nametmp2, "[.]")[[1]])-1]), collapse="") + sample_name= strsplit(sample_nametmp2, split = "[.]")[[1]][1] + #sample_name=strsplit(sample_nametmp2, ".bw")[[1]] mylegend=c(mylegend, sample_name) } } + #TEST + print("\n") + print(mylegend) + print("\n") + #TEST + if (length(listToPlot)==2){ p=plot_before_afternorm_2profiles(listToPlot[[1]], listToPlot[[2]], NotMoving, mylegend, seq(-4000, 4000, by=step), "After Normalization", 4000, step, c("indianred4", "steelblue4"), "Distance from TSS [bp]", paste("Average density of", histone_mark, sep=" ")) DF_after=list.append(fill_statsAfter_1profiles(listToPlot[[2]], NotMoving, mylegend[2], seq(-4000, 4000, by=step), "After Normalization", 4000, step, c("indianred4"), "Distance from TSS [bp]", paste("Average density of", histone_mark, sep=" "), DF_after)) @@ -1097,19 +1120,33 @@ plot_before_quantile<-function(path_to_bw, output_folder, path_to_file_with_cons } mylegend=c() if (i==1){ - #Define the reference + bw_ref=path_to_bw[1] - sample_nametmp1_ref=strsplit(bw_ref, "/")[[1]] + sample_nametmp1_ref=strsplit(as.character(unlist(bw_ref)), "/")[[1]] sample_nametmp2_ref=sample_nametmp1_ref[length(sample_nametmp1_ref)] - sample_name_ref_tmp=paste(c(strsplit(sample_nametmp2_ref, "[.]")[[1]][1:length(strsplit(sample_nametmp2_ref, "[.]")[[1]])-1]), collapse="") #sample_name_ref_tmp=strsplit(sample_nametmp2_ref, ".bw")[[1]] + #sample_name_ref_tpm=strsplit(sample_name_ref_tmp, "_")[[1]][2] + #if(is.na(sample_name_ref_tpm)){sample_name_ref_tpm=sample_name_ref_tmp} + #sample_name_ref=paste(sample_name_ref_tpm[1], sample_name_ref_tpm[2], sep="_") + mylegend=c(mylegend, sample_name_ref_tmp) + #mylegend=c(mylegend, sample_name_ref_tpm) + + #OLD START + #Define the reference + #bw_ref=path_to_bw[1] + #sample_nametmp1_ref=strsplit(bw_ref, "/")[[1]] + #sample_nametmp2_ref=sample_nametmp1_ref[length(sample_nametmp1_ref)] + + #sample_name_ref_tmp=paste(c(strsplit(sample_nametmp2_ref, "[.]")[[1]][1:length(strsplit(sample_nametmp2_ref, "[.]")[[1]])-1]), collapse="") + #sample_name_ref_tmp=strsplit(sample_nametmp2_ref, ".bw")[[1]] - sample_name_ref=sample_name_ref_tmp + #sample_name_ref=sample_name_ref_tmp #sample_name_ref_tpm=strsplit(sample_name_ref_tmp, "_")[[1]][2] #sample_name_ref=paste(sample_name_ref_tpm[1], sample_name_ref_tpm[2], sep="_") - mylegend=c(mylegend, sample_name_ref_tmp) + #mylegend=c(mylegend, sample_name_ref_tmp) + #OLD END } for (k in 1:length(bw_current)){ if (i!= nbGraph){ @@ -1122,6 +1159,9 @@ plot_before_quantile<-function(path_to_bw, output_folder, path_to_file_with_cons sample_name=paste(c(strsplit(sample_nametmp2, "[.]")[[1]][1:length(strsplit(sample_nametmp2, "[.]")[[1]])-1]), collapse="") + #TEST START + sample_name=strsplit(sample_nametmp2, split = "[.]")[[1]][1] + #TEST END #sample_name=strsplit(sample_nametmp2, ".bw")[[1]] mylegend=c(mylegend, sample_name) @@ -1181,6 +1221,7 @@ plot_before_quantile<-function(path_to_bw, output_folder, path_to_file_with_cons sample_name=paste(c(strsplit(sample_nametmp2, "[.]")[[1]][1:length(strsplit(sample_nametmp2, "[.]")[[1]])-1]), collapse="") + sample_name=strsplit(sample_nametmp2, split = "[.]")[[1]][1] #sample_name=strsplit(sample_nametmp2, ".bw")[[1]] mylegend=c(mylegend, sample_name) @@ -1278,7 +1319,7 @@ plot_ggplot_4curves <- function(matrixC1, matrixC2, matrixC3, matrixC4, mylegend plot_data=gather(G1, condition, valeur, mylegend, factor_key=TRUE) return(ggplot(plot_data) + geom_line(data=plot_data, aes(x=abscisse, y=valeur, group=condition, colour=condition )) + geom_ribbon(aes(ymin = plot_data$valeur - std.error(matrixC1), ymax = plot_data$valeur + std.error(matrixC1), x=plot_data$abscisse, fill=condition), alpha = 0.2, stat="identity") - + scale_color_manual(values=color) + scale_fill_manual(values=color) + + + scale_color_manual(values=color) + scale_fill_colorblind() + labs(x=xlab, y=ylab, title=title) + theme_bw() + ylim(0, xlimite)) } @@ -1288,7 +1329,7 @@ plot_ggplot_3curves <- function(matrixC1, matrixC2, matrixC3, mylegend, x, title plot_data=gather(G1, condition, valeur, mylegend, factor_key=TRUE) return(ggplot(plot_data) + geom_line(data=plot_data, aes(x=abscisse, y=valeur, group=condition, colour=condition )) + geom_ribbon(aes(ymin = plot_data$valeur - std.error(matrixC1), ymax = plot_data$valeur + std.error(matrixC1), x=plot_data$abscisse, fill=condition), alpha = 0.2, stat="identity") + - scale_color_manual(values=color) +scale_fill_manual(values=color) + + scale_color_manual(values=color) + scale_fill_colorblind() + labs(x=xlab, y=ylab, title=title) + theme_bw() + ylim(0, xlimite)) } @@ -1298,7 +1339,7 @@ plot_ggplot_2curves <- function(matrixC1, matrixC2, mylegend, x, title, color, y plot_data=gather(G1, condition, valeur, mylegend, factor_key=TRUE) return(ggplot(plot_data) + geom_line(data=plot_data, aes(x=abscisse, y=valeur, group=condition, colour=condition )) + geom_ribbon(aes(ymin = plot_data$valeur - std.error(matrixC1), ymax = plot_data$valeur + std.error(matrixC1), x=plot_data$abscisse, fill=condition), alpha = 0.2, stat="identity") + - scale_color_manual(values=color) +scale_fill_manual(values=color) + + scale_color_manual(values=color) +scale_fill_colorblind() + labs(x=xlab, y=ylab, title=title) + theme_bw() + ylim(0, xlimite)) } @@ -1308,8 +1349,9 @@ plot_ggplot_fivecurves <- function(matrixC1, matrixC2, matrixC3, matrixC4, matri plot_data=gather(G1, condition, valeur, mylegend, factor_key=TRUE) return(ggplot(plot_data) + geom_line(data=plot_data, aes(x=abscisse, y=valeur, group=condition, colour=condition )) + geom_ribbon(aes(ymin = plot_data$valeur - std.error(matrixC1), ymax = plot_data$valeur + std.error(matrixC1), x=plot_data$abscisse, fill=condition), alpha = 0.2, stat="identity") + - scale_color_manual(values=color) + scale_fill_manual(values=color) + + scale_color_manual(values=color) + scale_fill_colorblind() + labs(x=xlab, y=ylab, title=title) + theme_bw() + ylim(0, xlimite)) + #scale_fill_manual(values=color) } @@ -1781,35 +1823,13 @@ computeStats<-function(fileBefore, fileAfter, nSamples){ #################################################### plot_expression <- function(RPKM=NULL, raw_read_count=NULL, path_to_bw, output_dir=".", organism, histone_mark="ChIP-seq signal"){ - if (organism == "hg19"){ - data("A_hg19") - data("TSS_hg19") - data("allgenes_hg19") - exon_lengths=A_hg19 - D_TSS=TSS_hg19 - Allgenes=allgenes_hg19 - }else if(organism == "hg38"){ - data("A_hg38") - data("TSS_hg38") - data("allgenes_hg38") - exon_lengths=A_hg38 - D_TSS=TSS_hg38 - Allgenes=allgenes_hg38 - }else if(organism == "mm10"){ - data("A_mm10") - data("TSS_mm10") - data("allgenes_mm10") - exon_lengths=A_mm10 - D_TSS=TSS_mm10 - Allgenes=allgenes_mm10 - }else if(organism == "mm9"){ - data("A_mm9") - data("TSS_mm9") - data("allgenes_mm9") - exon_lengths=A_mm9 - D_TSS=TSS_mm9 - Allgenes=allgenes_mm9 - } + + data(list = paste0("A_", organism)) + data(list = paste0("TSS_", organism)) + data(list = paste0("allgenes_", organism)) + exon_lengths=eval(parse(text = paste0("A_", organism))) + D_TSS=eval(parse(text = paste0("TSS_", organism))) + Allgenes=eval(parse(text = paste0("allgenes_", organism))) cat("\n") cat("*****************************************") @@ -1873,6 +1893,7 @@ plot_expression <- function(RPKM=NULL, raw_read_count=NULL, path_to_bw, output_d RPKM_rep=read.table(RPKM, header=TRUE) } + tmpb <- RPKM_rep[,2:ncol(RPKM_rep)] #rownames(tmpb) <- RPKM_rep$V1 tmp_apply=apply(data.matrix(tmpb), 1, mean) @@ -1881,7 +1902,11 @@ plot_expression <- function(RPKM=NULL, raw_read_count=NULL, path_to_bw, output_d #Clustering rep_clusters=build_clusters_expression(tmp_apply) nSamples=length(path_to_bw) + for (i in 1:nSamples){ + + start.time <- Sys.time() + bw1=path_to_bw[i] sample_nametmp1=strsplit(bw1, "/")[[1]] @@ -1889,6 +1914,8 @@ plot_expression <- function(RPKM=NULL, raw_read_count=NULL, path_to_bw, output_d sample_name=paste(strsplit(sample_nametmp2, "[.]")[[1]][1:length(strsplit(sample_nametmp2, "[.]")[[1]])-1], collapse = ".") bw1_read=getDatabw_woRemoveNoise(bw1) + + print(sample_name) rep_profiles=profiles_gene_expression(rep_clusters, bw1_read, D_TSS, radius, step) @@ -1897,6 +1924,12 @@ plot_expression <- function(RPKM=NULL, raw_read_count=NULL, path_to_bw, output_d p=plot_ggplot_threecurves_expression(rep_profiles[[1]], rep_profiles[[2]], rep_profiles[[3]], x, sample_name, color, paste("Average density of", histone_mark, sep=" "), "Distance from TSS [bp]") #p=plot_ggplot_several(rep_profiles[[1]], rep_profiles[[2]], rep_profiles[[3]], x, title, color) plist_expression=list.append(plist_expression, p) + + end.time <- Sys.time() + time.taken <- end.time - start.time + print("Time taken for building clusters : ") + print(time.taken) + } #pdf('expression2.pdf',width=5, height=5) main=marrangeGrob(grobs=plist_expression,ncol=2, nrow=2) @@ -1913,43 +1946,21 @@ plot_expression <- function(RPKM=NULL, raw_read_count=NULL, path_to_bw, output_d CHIPIN_normalize <- function(path_to_bw, type_norm="linear", RPKM=NULL, raw_read_count=NULL, path_to_file_with_constant_genes=NULL, sample_name="sample", output_dir=".", organism, beforeRegionStartLength=4000, afterRegionStartLength=4000, regionBodyLength=40000, binSize=10, expression_plot=FALSE, - compute_stat=FALSE, percentage=0.1, nGroup=20, histone_mark="ChIP-seq signal"){ + compute_stat=FALSE, percentage=0.1, nGroup=20, histone_mark="ChIP-seq signal", nThreads = 1){ radius=4000 step=50 DF_before=list() DF_after=list() - + nSamples=length(path_to_bw) - if (organism == "hg19"){ - data("A_hg19") - data("TSS_hg19") - data("allgenes_hg19") - exon_lengths=A_hg19 - D_TSS=TSS_hg19 - Allgenes=allgenes_hg19 - }else if(organism == "hg38"){ - data("A_hg38") - data("TSS_hg38") - data("allgenes_hg38") - exon_lengths=A_hg38 - D_TSS=TSS_hg38 - Allgenes=allgenes_hg38 - }else if(organism == "mm10"){ - data("A_mm10") - data("TSS_mm10") - data("allgenes_mm10") - exon_lengths=A_mm10 - D_TSS=TSS_mm10 - Allgenes=allgenes_mm10 - }else if(organism == "mm9"){ - data("A_mm9") - data("TSS_mm9") - data("allgenes_mm9") - exon_lengths=A_mm9 - D_TSS=TSS_mm9 - Allgenes=allgenes_mm9 - } + data(list = paste0("A_", organism)) + data(list = paste0("TSS_", organism)) + data(list = paste0("allgenes_", organism)) + exon_lengths=eval(parse(text = paste0("A_", organism))) + D_TSS=eval(parse(text = paste0("TSS_", organism))) + Allgenes=eval(parse(text = paste0("allgenes_", organism))) + if (is.null(output_dir)) {output_dir="."} #should never happen #add / to output_dir if needed: if (!(str_sub(output_dir, start= -1)=="/")) { @@ -1977,14 +1988,14 @@ CHIPIN_normalize <- function(path_to_bw, type_norm="linear", RPKM=NULL, raw_read } if (type_norm=="linear" & is.null(D_TSS)==FALSE){ - pathRenorm=linear_normalization(path_to_file_with_constant_genes, path_to_bw, beforeRegionStartLength, afterRegionStartLength, regionBodyLength, binSize, output_dir) + pathRenorm=linear_normalization(path_to_file_with_constant_genes, path_to_bw, beforeRegionStartLength, afterRegionStartLength, regionBodyLength, binSize, output_dir, nThreads) rep_stats_after=plot_after_linear(unlist(pathRenorm), output_dir, path_to_file_with_constant_genes, step, DF_after,histone_mark) rep_stats_before=plot_before_quantile(path_to_bw, output_dir, path_to_file_with_constant_genes, step, DF_before, histone_mark) }else if (type_norm=="quantile" & is.null(D_TSS)==FALSE){ - pathRenorm=quantile_norm(path_to_bw, path_to_file_with_constant_genes, nGroup, output_dir, beforeRegionStartLength, afterRegionStartLength, regionBodyLength, binSize) + pathRenorm=quantile_norm(path_to_bw, path_to_file_with_constant_genes, nGroup, output_dir, beforeRegionStartLength, afterRegionStartLength, regionBodyLength, binSize, nThreads) rep_stats_after=plot_after_quantile(unlist(pathRenorm), output_dir, path_to_file_with_constant_genes, step, DF_after, histone_mark) diff --git a/data/TSS_hg38.RData b/data/TSS_hg38.RData index 9783830..0136a51 100644 Binary files a/data/TSS_hg38.RData and b/data/TSS_hg38.RData differ