diff --git a/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala index 78bf5f309f77ea2916715a0681640b4d1400cd72..67240e7ed0dc80ac33eee75c9fb0c8f25ba0f6f4 100644 --- a/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala +++ b/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala @@ -17,15 +17,16 @@ package nl.lumc.sasc.biopet.pipelines.bammetrics import java.io.File -import nl.lumc.sasc.biopet.core.annotations.{ RibosomalRefFlat, AnnotationRefFlat } +import nl.lumc.sasc.biopet.core.annotations.{ AnnotationRefFlat, RibosomalRefFlat } import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.summary.SummaryQScript -import nl.lumc.sasc.biopet.core.{ Reference, BiopetFifoPipe, PipelineCommand, SampleLibraryTag } +import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag } import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect } import nl.lumc.sasc.biopet.extensions.picard._ import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat +import nl.lumc.sasc.biopet.utils.intervals.BedCheck import org.broadinstitute.gatk.queue.QScript class BamMetrics(val root: Configurable) extends QScript @@ -72,6 +73,8 @@ class BamMetrics(val root: Configurable) extends QScript /** executed before script */ def init(): Unit = { inputFiles :+= new InputFile(inputBam) + ampliconBedFile.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true)) + roiBedFiles.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true)) } /** Script to add jobs */ diff --git a/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala b/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala index 4a75d99e00d3a21324c8d5459d891e6435691185..6da49168e2166189dd05fb481ded00ccff66b43a 100644 --- a/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala +++ b/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala @@ -61,9 +61,9 @@ class BamMetricsTest extends TestNGSuite with Matchers { def testBamMetrics(rois: Int, amplicon: Boolean, rna: Boolean, wgs: Boolean) = { val map = ConfigUtils.mergeMaps(Map("output_dir" -> BamMetricsTest.outputDir, "rna_metrics" -> rna, "wgs_metrics" -> wgs), Map(BamMetricsTest.executables.toSeq: _*)) ++ - (if (amplicon) Map("amplicon_bed" -> "amplicon.bed") else Map()) ++ + (if (amplicon) Map("amplicon_bed" -> BamMetricsTest.ampliconBed.getAbsolutePath) else Map()) ++ (if (rna) Map("annotation_refflat" -> "transcripts.refFlat") else Map()) ++ - Map("regions_of_interest" -> (1 to rois).map("roi_" + _ + ".bed").toList) + Map("regions_of_interest" -> (1 to rois).map(BamMetricsTest.roi(_).getAbsolutePath).toList) val bammetrics: BamMetrics = initPipeline(map) bammetrics.inputBam = BamMetricsTest.bam @@ -94,6 +94,14 @@ object BamMetricsTest { val bam = new File(outputDir, "input" + File.separator + "bla.bam") Files.touch(bam) + val ampliconBed = new File(outputDir, "input" + File.separator + "amplicon_bed.bed") + Files.touch(ampliconBed) + + def roi(i: Int): File = { + val roi = new File(outputDir, "input" + File.separator + s"roi${i}.bed") + Files.touch(roi) + roi + } private def copyFile(name: String): Unit = { val is = getClass.getResourceAsStream("/" + name) diff --git a/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala b/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala index 3c03ebe2cd0839f9955623ce3f03da29e290b113..d0162ea58d23d9410913f3d3992cfdbf751b4bde 100644 --- a/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala +++ b/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala @@ -26,7 +26,6 @@ import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand } import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins } import nl.lumc.sasc.biopet.pipelines.shiva.Shiva import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta -import nl.lumc.sasc.biopet.utils.ConfigUtils import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript diff --git a/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala index 16626532ce11401652fef73d9320b689a8dc138d..8c044f719aa4516f519705ee37476ff7c0c7c4b9 100644 --- a/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala +++ b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala @@ -73,6 +73,8 @@ class BiopetFifoPipe(val root: Configurable, _pipesJobs :::= commands _pipesJobs = _pipesJobs.distinct + + analysisName = commands.map(_.analysisName).mkString("_") } override def beforeCmd(): Unit = { diff --git a/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala index 43af927eb622b8831a1a0343b9eecb761de433d8..8365cc946ef6395409744f612eec2d9e3b6b52af 100644 --- a/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala +++ b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala @@ -132,7 +132,7 @@ trait ReportBuilder extends ToolCommand { logger.info("Start") val argsParser = new OptParser - val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) require(cmdArgs.outputDir.exists(), "Output dir does not exist") require(cmdArgs.outputDir.isDirectory, "Output dir is not a directory") diff --git a/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/cnmops.R b/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/cnmops.R new file mode 100644 index 0000000000000000000000000000000000000000..b2a4be982f897a9555da631a9602d0c9ac37cc45 --- /dev/null +++ b/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/cnmops.R @@ -0,0 +1,68 @@ +#!/usr/bin/env Rscript +suppressPackageStartupMessages(library('cn.mops')) +suppressPackageStartupMessages(library('optparse')) + +# Script from https://git.lumc.nl/lgtc-bioinformatics/gapss3/blob/master/src/CNV/makeCnmops.sh +# modified to take arguments + +option_list <- list( + make_option(c("--rawoutput"), dest="rawoutput"), + make_option(c("--cnv"), dest="cnv"), + make_option(c("--cnr"), dest="cnr"), + make_option(c("--chr"), dest="chr"), + make_option(c("--threads"), dest="threads", default=8, type="integer") + ) + +parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) +arguments = parse_args(parser, positional_arguments=TRUE) +opt = arguments$options +args = arguments$args + +chromosome <- opt$chr +CNVoutput <- opt$cnv +CNRoutput <- opt$cnr +bamFile <- args + +BAMFiles <- c(bamFile) +bamDataRanges <- getReadCountsFromBAM(BAMFiles, mode="paired", refSeqName=chromosome, WL=1000, parallel=opt$threads) + +write.table(as.data.frame( bamDataRanges ), quote = FALSE, opt$rawoutput, row.names=FALSE) + +res <- cn.mops(bamDataRanges) +res <- calcIntegerCopyNumbers(res) + +write.table(as.data.frame(cnvs(res)), quote = FALSE, CNVoutput, row.names=FALSE) +write.table(as.data.frame(cnvr(res)), quote = FALSE, CNRoutput, row.names=FALSE) + + +ppi <- 300 +plot_margins <- c(3,4,1,2)+0.1 +label_positions <- c(2,0.5,0) + +dir.create(chromosome, showWarnings=FALSE, recursive=TRUE, mode="0744") + +# Plot chromosome per sample. +for ( i in 1:length(BAMFiles)){ + png(file=paste(chromosome,"/",chromosome,"-segplot-",i,".png", sep=""), + width = 16 * ppi, height = 10 * ppi, + res=ppi, bg = "white" + ) + par(mfrow = c(1,1)) + par(mar=plot_margins) + par(mgp=label_positions) + segplot(res,sampleIdx=i) + dev.off() +} + +# Plot cnvr regions. +for ( i in 1:nrow(as.data.frame(cnvr(res)))) { + png(file=paste(chromosome,"/",chromosome,"-cnv-",i,".png",sep=""), + width = 16 * ppi, height = 10 * ppi, + res=ppi, bg = "white") + par(mfrow = c(1,1)) + par(mar=plot_margins) + par(mgp=label_positions) + plot(res,which=i,toFile=TRUE) + dev.off() +} + diff --git a/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/cnmops_wes.R b/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/cnmops_wes.R new file mode 100644 index 0000000000000000000000000000000000000000..3869cf8a1254d9fd0ff1e32ad1e74bceab4cb705 --- /dev/null +++ b/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/cnmops_wes.R @@ -0,0 +1,77 @@ +#!/usr/bin/env Rscript +suppressPackageStartupMessages(library('cn.mops')) +suppressPackageStartupMessages(library('optparse')) + +# Script from https://git.lumc.nl/lgtc-bioinformatics/gapss3/blob/master/src/CNV/makeCnmops.sh +# modified to take arguments + +option_list <- list( + make_option(c("--rawoutput"), dest="rawoutput"), + make_option(c("--cnv"), dest="cnv"), + make_option(c("--cnr"), dest="cnr"), + make_option(c("--chr"), dest="chr"), + make_option(c("--targetBed"), dest="targetBed"), + make_option(c("--threads"), dest="threads", default=8, type="integer") + ) + +parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) +arguments = parse_args(parser, positional_arguments=TRUE) +opt = arguments$options +args = arguments$args + +chromosome <- opt$chr +CNVoutput <- opt$cnv +CNRoutput <- opt$cnr +bamFile <- args + +BAMFiles <- c(bamFile) + +### WES Specific code +segments <- read.table(opt$targetBed, sep="\t", as.is=TRUE) +# filter the segments by the requested chromosome +segments <- segments[ segments[,1] == chromosome, ] +gr <- GRanges(segments[,1],IRanges(segments[,2],segments[,3])) +### END WES Specific code + +bamDataRanges <- getSegmentReadCountsFromBAM(BAMFiles, GR=gr, mode="paired", parallel=opt$threads) + +write.table(as.data.frame( bamDataRanges ), quote = FALSE, opt$rawoutput, row.names=FALSE) + +res <- exomecn.mops(bamDataRanges) +res <- calcIntegerCopyNumbers(res) + +write.table(as.data.frame(cnvs(res)), quote = FALSE, CNVoutput, row.names=FALSE) +write.table(as.data.frame(cnvr(res)), quote = FALSE, CNRoutput, row.names=FALSE) + + +ppi <- 300 +plot_margins <- c(3,4,1,2)+0.1 +label_positions <- c(2,0.5,0) + +dir.create(chromosome, showWarnings=FALSE, recursive=TRUE, mode="0744") + +# Plot chromosome per sample. +for ( i in 1:length(BAMFiles)){ + png(file=paste(chromosome,"/",chromosome,"-segplot-",i,".png", sep=""), + width = 16 * ppi, height = 10 * ppi, + res=ppi, bg = "white" + ) + par(mfrow = c(1,1)) + par(mar=plot_margins) + par(mgp=label_positions) + segplot(res,sampleIdx=i) + dev.off() +} + +# Plot cnvr regions. +for ( i in 1:nrow(as.data.frame(cnvr(res)))) { + png(file=paste(chromosome,"/",chromosome,"-cnv-",i,".png",sep=""), + width = 16 * ppi, height = 10 * ppi, + res=ppi, bg = "white") + par(mfrow = c(1,1)) + par(mar=plot_margins) + par(mgp=label_positions) + plot(res,which=i,toFile=TRUE) + dev.off() +} + diff --git a/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/freec/freec_BAFPlot.R b/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/freec/freec_BAFPlot.R new file mode 100644 index 0000000000000000000000000000000000000000..1a464e9476a0bc2e7e0ca03b7524f92bb0b7dd71 --- /dev/null +++ b/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/freec/freec_BAFPlot.R @@ -0,0 +1,76 @@ +library('optparse') +# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet + +option_list <- list( + make_option(c("-i", "--input"), dest="input"), + make_option(c("-o", "--output"), dest="output") + ) + +parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) +opt = parse_args(parser) + + +# +# Load Data +# + +dataTable <-read.table(opt$input, header=TRUE); +BAF<-data.frame(dataTable) +chromosomes <- levels(dataTable$Chromosome) +ppi <- 300 +plot_margins <- c(3,4,1,2)+0.1 +label_positions <- c(2,0.5,0) + + +png(filename = opt$output, width = 16 * ppi, height = 10 * ppi, + res=ppi, bg = "white") +par(mfrow = c(6,4)) +par(mar=plot_margins) +par(mgp=label_positions) + + +for (i in chromosomes) { + tt <- which(BAF$Chromosome==i) + if (length(tt)>0){ + lBAF <-BAF[tt,] + plot(lBAF$Position, + lBAF$BAF, + ylim = c(-0.1,1.1), + xlab = paste ("position, chr",i), + ylab = "BAF", + pch = ".", + col = colors()[1]) + + tt <- which(lBAF$A==0.5) + points(lBAF$Position[tt],lBAF$BAF[tt],pch = ".",col = colors()[92]) + tt <- which(lBAF$A!=0.5 & lBAF$A>=0) + points(lBAF$Position[tt],lBAF$BAF[tt],pch = ".",col = colors()[62]) + tt <- 1 + pres <- 1 + + if (length(lBAF$A)>4) { + for (j in c(2:(length(lBAF$A)-pres-1))) { + if (lBAF$A[j]==lBAF$A[j+pres]) { + tt[length(tt)+1] <- j + } + } + points(lBAF$Position[tt],lBAF$A[tt],pch = ".",col = colors()[24],cex=4) + points(lBAF$Position[tt],lBAF$B[tt],pch = ".",col = colors()[24],cex=4) + } + + tt <- 1 + pres <- 1 + if (length(lBAF$FittedA)>4) { + for (j in c(2:(length(lBAF$FittedA)-pres-1))) { + if (lBAF$FittedA[j]==lBAF$FittedA[j+pres]) { + tt[length(tt)+1] <- j + } + } + points(lBAF$Position[tt],lBAF$FittedA[tt],pch = ".",col = colors()[463],cex=4) + points(lBAF$Position[tt],lBAF$FittedB[tt],pch = ".",col = colors()[463],cex=4) + } + + } + +} +dev.off() diff --git a/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/freec/freec_CNVPlot.R b/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/freec/freec_CNVPlot.R new file mode 100644 index 0000000000000000000000000000000000000000..a0061bc68b46953a34ba06923ee0d7478dd6618b --- /dev/null +++ b/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/freec/freec_CNVPlot.R @@ -0,0 +1,211 @@ +library('optparse') +library('naturalsort') + +# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet + +option_list <- list( + make_option(c("-m", "--mappability"), dest="mappability"), + make_option(c("-p", "--ploidy"), default=2, type="integer", dest="ploidy"), + make_option(c("-i", "--input"), dest="input"), + make_option(c("-o", "--output"), dest="output") + ) + +parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) +opt = parse_args(parser) + + +# +# Load mappability track +# + +mappabilityFile <- opt$mappability +mappabilityTrack <- read.table(mappabilityFile, header=FALSE, col.names=c("chrom", "start", "end", "score")) + +mappabilityTrack$Start <- mappabilityTrack$start+1 +mappabilityTrack$Chromosome <- gsub("chr", "", mappabilityTrack$chrom) + + +# +# Load Data +# + + +dataTable <- read.table( opt$input , header=TRUE) +input_ratio <- data.frame(dataTable) + +chromosomes <- naturalsort(levels(input_ratio$Chromosome)) +input_ratio$Chromosome <- factor(input_ratio$Chromosome, levels=chromosomes, ordered=T) + +sorted_ratio <- input_ratio[order(input_ratio$Chromosome),] +ratio <- merge(sorted_ratio, mappabilityTrack, sort=TRUE) +ratio <- ratio[order(ratio$Chromosome, ratio$Start),] + + +ploidy <- opt$ploidy +ppi <- 300 +plot_margins <- c(3,4,1,2)+0.1 +label_positions <- c(2,0.5,0) + + +maxLevelToPlot <- 3 +for (i in c(1:length(ratio$Ratio))) { + if (ratio$Ratio[i]>maxLevelToPlot) { + ratio$Ratio[i]=maxLevelToPlot + } +} + +# +# Plot the graphs per chromosome +# + +for (i in chromosomes) { + + png(filename = paste(opt$output, ".", i,".png",sep=""), width = 4 * ppi, height = 2.5 * ppi, + res=ppi, bg = "white") + par(mfrow = c(1,1)) + par(mar=plot_margins) + par(mgp=label_positions) + + + tt <- which(ratio$Chromosome==i) + if (length(tt)>0) { + plot(ratio$Start[tt], + ratio$Ratio[tt]*ploidy, + ylim = c(0,maxLevelToPlot*ploidy), + xlab = paste ("position, chr",i), + ylab = "normalized CN", + pch = ".", + col = colors()[88]) + + title(outer=TRUE) + tt <- which(ratio$Chromosome==i & ratio$CopyNumber>ploidy ) + points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136]) + + tt <- which(ratio$Chromosome==i & ratio$Ratio==maxLevelToPlot & ratio$CopyNumber>ploidy) + points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136],cex=4) + + tt <- which(ratio$Chromosome==i & ratio$CopyNumber<ploidy & ratio$CopyNumber!= -1) + points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[461]) + tt <- which(ratio$Chromosome==i) + + #UNCOMMENT HERE TO SEE THE PREDICTED COPY NUMBER LEVEL: + #points(ratio$Start[tt],ratio$CopyNumber[tt], pch = ".", col = colors()[24],cex=4) + } + #tt <- which(ratio$Chromosome==i) + + #UNCOMMENT HERE TO SEE THE EVALUATED MEDIAN LEVEL PER SEGMENT: + #points(ratio$Start[tt],ratio$MedianRatio[tt]*ploidy, pch = ".", col = colors()[463],cex=4) + + dev.off() +} + + + + + + + + + + +png(filename = paste(opt$output, ".png",sep=""), width = 16 * ppi, height = 10 * ppi, + res=ppi, bg = "white") +par(mfrow = c(6,4)) +par(mar=plot_margins) +par(mgp=label_positions) + +for (i in chromosomes) { + tt <- which(ratio$Chromosome==i) + if (length(tt)>0) { + plot(ratio$Start[tt], + ratio$Ratio[tt]*ploidy, + ylim = c(0,maxLevelToPlot*ploidy), + xlab = paste ("position, chr",i), + ylab = "normalized CN", + pch = ".", + col = colors()[88]) + + tt <- which(ratio$Chromosome==i & ratio$CopyNumber>ploidy ) + points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136]) + + tt <- which(ratio$Chromosome==i & ratio$Ratio==maxLevelToPlot & ratio$CopyNumber>ploidy) + points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136],cex=4) + + tt <- which(ratio$Chromosome==i & ratio$CopyNumber<ploidy & ratio$CopyNumber!= -1) + points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[461]) + tt <- which(ratio$Chromosome==i) + + #UNCOMMENT HERE TO SEE THE PREDICTED COPY NUMBER LEVEL: + #points(ratio$Start[tt],ratio$CopyNumber[tt], pch = ".", col = colors()[24],cex=4) + } + #tt <- which(ratio$Chromosome==i) + + #UNCOMMENT HERE TO SEE THE EVALUATED MEDIAN LEVEL PER SEGMENT: + #points(ratio$Start[tt],ratio$MedianRatio[tt]*ploidy, pch = ".", col = colors()[463],cex=4) + +} + +dev.off() + + + +# Export the whole genome graph + +png(filename = paste(opt$output, ".wg.png",sep=""), width = 16 * ppi, height = 10 * ppi, +res=ppi, bg = "white") + +plot_margins <- c(3,4,2,2)+0.1 +label_positions <- c(2,0.5,0) + +par(mfrow = c(1,1)) +par(mar=plot_margins) +par(mgp=label_positions) +par(xaxs="i", yaxs="i") + + +maxLevelToPlot <- 3 +for (i in c(1:length(ratio$Ratio))) { + if (ratio$Ratio[i]>maxLevelToPlot) { + ratio$Ratio[i]=maxLevelToPlot + } +} + +for (i in c(1:length(ratio$Start))) { + ratio$Position[i] = (i-1) *5000 +1 +} + + +plotRatioLT <- 0.10 + +filteredSet <- ratio[ ratio$score > plotRatioLT, ] + +plot(filteredSet$Position, +filteredSet$Ratio*ploidy, +ylim = c(0,maxLevelToPlot*ploidy), +xlab = paste ("Chr. on genome"), +ylab = "normalized CN", +pch = ".", +col = colors()[88]) + + +title(outer=TRUE) +tt <- which(filteredSet$CopyNumber>ploidy) +points(filteredSet$Position[tt],filteredSet$Ratio[tt]*ploidy,pch = ".",col = colors()[136]) + +tt <- which(filteredSet$Ratio==maxLevelToPlot & filteredSet$CopyNumber>ploidy) +points(filteredSet$Position[tt],filteredSet$Ratio[tt]*ploidy,pch = ".",col = colors()[136],cex=4) + +tt <- which(filteredSet$CopyNumber<ploidy & filteredSet$CopyNumber!= -1) +points(filteredSet$Position[tt],filteredSet$Ratio[tt]*ploidy,pch = ".",col = colors()[461], bg="black") + + +for (chrom in chromosomes) { + tt <- which(filteredSet$Chromosome == chrom) + print(filteredSet[tt[1],]) + xpos <- filteredSet$Position[tt][1] + abline(v=xpos, col="grey") + axis(3, at=xpos, labels=chrom , las=2) +} + + +dev.off() diff --git a/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/freec/freec_assess_significance.R b/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/freec/freec_assess_significance.R new file mode 100644 index 0000000000000000000000000000000000000000..197c24fba9c855caabd7d5527d11af2871629ed1 --- /dev/null +++ b/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/freec/freec_assess_significance.R @@ -0,0 +1,71 @@ +library(rtracklayer) +library('optparse') +library(GenomicRanges); +library(IRanges); +# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet + +option_list <- list( + make_option(c("-c", "--cnv"), dest="cnv"), + make_option(c("-r", "--ratios"), dest="ratios"), + make_option(c("-o", "--output"), dest="output") + ) + +parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) +opt = parse_args(parser) + +dataTable <-read.table(opt$ratios, header=TRUE); +ratio<-data.frame(dataTable) + +dataTable <- read.table(opt$cnv, header=FALSE) +cnvs<- data.frame(dataTable) + +ratio$Ratio[which(ratio$Ratio==-1)]=NA + +cnvs.bed=GRanges(cnvs[,1],IRanges(cnvs[,2],cnvs[,3])) +ratio.bed=GRanges(ratio$Chromosome,IRanges(ratio$Start,ratio$Start),score=ratio$Ratio) + +overlaps <- subsetByOverlaps(ratio.bed,cnvs.bed) +normals <- setdiff(ratio.bed,cnvs.bed) +normals <- subsetByOverlaps(ratio.bed,normals) + +#mu <- mean(score(normals),na.rm=TRUE) +#sigma<- sd(score(normals),na.rm=TRUE) + +#hist(score(normals),n=500,xlim=c(0,2)) +#hist(log(score(normals)),n=500,xlim=c(-1,1)) + +#shapiro.test(score(normals)[which(!is.na(score(normals)))][5001:10000]) +#qqnorm (score(normals)[which(!is.na(score(normals)))],ylim=(c(0,10))) +#qqline(score(normals)[which(!is.na(score(normals)))], col = 2) + +#shapiro.test(log(score(normals))[which(!is.na(score(normals)))][5001:10000]) +#qqnorm (log(score(normals))[which(!is.na(score(normals)))],ylim=(c(-6,10))) +#qqline(log(score(normals))[which(!is.na(score(normals)))], col = 2) + +numberOfCol=length(cnvs) + +for (i in c(1:length(cnvs[,1]))) { + values <- score(subsetByOverlaps(ratio.bed,cnvs.bed[i])) + #wilcox.test(values,mu=mu) + W <- function(values,normals){resultw <- try(wilcox.test(values,score(normals)), silent = TRUE) + if(class(resultw)=="try-error") return(list("statistic"=NA,"parameter"=NA,"p.value"=NA,"null.value"=NA,"alternative"=NA,"method"=NA,"data.name"=NA)) else resultw} + KS <- function(values,normals){resultks <- try(ks.test(values,score(normals)), silent = TRUE) + if(class(resultks)=="try-error") return(list("statistic"=NA,"p.value"=NA,"alternative"=NA,"method"=NA,"data.name"=NA)) else resultks} + #resultks <- try(KS <- ks.test(values,score(normals)), silent = TRUE) + # if(class(resultks)=="try-error") NA) else resultks + cnvs[i,numberOfCol+1]=W(values,normals)$p.value + cnvs[i,numberOfCol+2]=KS(values,normals)$p.value + } + +if (numberOfCol==5) { + names(cnvs)=c("chr","start","end","copy number","status","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue") +} +if (numberOfCol==7) { + names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue") +} +if (numberOfCol==9) { + names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","somatic/germline","precentageOfGermline","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue") +} +write.table(cnvs, file=opt$output, sep="\t",quote=F,row.names=F) + + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cnmops.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cnmops.scala new file mode 100644 index 0000000000000000000000000000000000000000..aa6998058bf7d49e3172d23bdd4799fe20da5077 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cnmops.scala @@ -0,0 +1,59 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2015 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +/** + * Wrapper for the Cnmops command line tool. + * Written based on Cnmops version v2.2.1. + */ +class Cnmops(val root: Configurable) extends RscriptCommandLineFunction { + + protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/cnmops.R") + + @Input(doc = "Input file BAM", required = true) + var input: List[File] = List() + + // output files, computed automatically from output directory + @Output(doc = "Output CNV file") + private lazy val outputCnv: File = { + require(outputDir == null, "Unexpected error when trying to set cn.MOPS CNV output") + new File(outputDir, "cnv.txt") + } + + @Output(doc = "Output CNR file") + private lazy val outputCnr: File = { + require(outputDir == null, "Unexpected error when trying to set cn.MOPS CNR output") + new File(outputDir, "cnr.txt") + } + + /** write all output files to this directory [./] */ + var outputDir: String = _ + + override def beforeGraph = { + super.beforeGraph + require(!outputDir.isEmpty, "Outputdir for cn.MOPS should not be empty") + require(input.length >= 2, "Please supply at least 2 BAM files for cn.MOPS") + } + + override def cmdLine = super.cmdLine + + required(input.foreach(f => f.getAbsolutePath).toString.mkString(" ")) +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala index 20b4ae8422936bfc5a8b3b8310074b01b46445a6..281712a2969578652ba11c9ff269771d2d54cfb5 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala @@ -18,10 +18,9 @@ package nl.lumc.sasc.biopet.extensions import java.io.File import nl.lumc.sasc.biopet.core.summary.Summarizable -import nl.lumc.sasc.biopet.utils.Logging +import nl.lumc.sasc.biopet.utils.{ Logging, VcfUtils, tryToParseNumber } import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, Reference } -import nl.lumc.sasc.biopet.utils.tryToParseNumber +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference, Version } import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import scala.io.Source @@ -164,147 +163,149 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu } /** Returns command to execute */ - def cmdLine = required(executable) + - required(vepScript) + - required("-i", input) + - required("-o", output) + - conditional(v, "-v") + - conditional(q, "-q") + - conditional(offline, "--offline") + - conditional(noProgress, "--no_progress") + - conditional(everything, "--everything") + - conditional(force, "--force_overwrite") + - conditional(noStats, "--no_stats") + - conditional(statsText, "--stats_text") + - conditional(html, "--html") + - conditional(cache, "--cache") + - conditional(humdiv, "--humdiv") + - conditional(regulatory, "--regulatory") + - conditional(cellType, "--cel_type") + - conditional(phased, "--phased") + - conditional(alleleNumber, "--allele_number") + - conditional(numbers, "--numbers") + - conditional(domains, "--domains") + - conditional(noEscape, "--no_escape") + - conditional(hgvs, "--hgvs") + - conditional(protein, "--protein") + - conditional(symbol, "--symbol") + - conditional(ccds, "--ccds") + - conditional(uniprot, "--uniprot") + - conditional(tsl, "--tsl") + - conditional(canonical, "--canonical") + - conditional(biotype, "--biotype") + - conditional(xrefRefseq, "--xref_refseq") + - conditional(checkExisting, "--check_existing") + - conditional(checkAlleles, "--check_alleles") + - conditional(checkSvs, "--check_svs") + - conditional(gmaf, "--gmaf") + - conditional(maf1kg, "--maf_1kg") + - conditional(mafEsp, "--maf_esp") + - conditional(pubmed, "--pubmed") + - conditional(vcf, "--vcf") + - conditional(json, "--json") + - conditional(gvf, "--gvf") + - conditional(checkRef, "--check_ref") + - conditional(codingOnly, "--coding_only") + - conditional(noIntergenic, "--no_intergenic") + - conditional(pick, "--pick") + - conditional(pickAllele, "--pick_allele") + - conditional(flagPick, "--flag_pick") + - conditional(flagPickAllele, "--flag_pick_allele") + - conditional(perGene, "--per_gene") + - conditional(mostSevere, "--most_severe") + - conditional(summary, "--summary") + - conditional(filterCommon, "--filter_common") + - conditional(checkFrequency, "--check_frequency") + - conditional(allowNonVariant, "--allow_non_variant") + - conditional(database, "--database") + - conditional(genomes, "--genomes") + - conditional(gencodeBasic, "--gencode_basic") + - conditional(refseq, "--refseq") + - conditional(merged, "--merged") + - conditional(allRefseq, "--all_refseq") + - conditional(lrg, "--lrg") + - conditional(noWholeGenome, "--no_whole_genome") + - conditional(skibDbCheck, "--skip_db_check") + - optional("--config", vepConfig) + - optional("--species", species) + - optional("--assembly", assembly) + - optional("--format", format) + - optional("--dir", dir) + - optional("--dir_cache", dirCache) + - optional("--dir_plugins", dirPlugins) + - optional("--fasta", fasta) + - optional("--sift", sift) + - optional("--polyphen", polyphen) + - repeat("--custom", custom) + - repeat("--plugin", plugin) + - optional("--individual", individual) + - optional("--fields", fields) + - optional("--convert", convert) + - optional("--terms", terms) + - optional("--chr", chr) + - optional("--pick_order", pickOrder) + - optional("--freq_pop", freqPop) + - optional("--freq_gt_lt", freqGtLt) + - optional("--freq_filter", freqFilter) + - optional("--filter", filter) + - optional("--host", host) + - optional("--user", user) + - optional("--password", password) + - optional("--registry", registry) + - optional("--build", build) + - optional("--compress", compress) + - optional("--cache_region_size", cacheRegionSize) + - optional("--fork", threads) + - optional("--cache_version", cacheVersion) + - optional("--freq_freq", freqFreq) + - optional("--port", port) + - optional("--db_version", dbVersion) + - optional("--buffer_size", bufferSize) + - optional("--failed", failed) + def cmdLine = { + if (input.exists() && VcfUtils.vcfFileIsEmpty(input)) { + val zcat = Zcat(this, input, output) + zcat.cmdLine + } else required(executable) + + required(vepScript) + + required("-i", input) + + required("-o", output) + + conditional(v, "-v") + + conditional(q, "-q") + + conditional(offline, "--offline") + + conditional(noProgress, "--no_progress") + + conditional(everything, "--everything") + + conditional(force, "--force_overwrite") + + conditional(noStats, "--no_stats") + + conditional(statsText, "--stats_text") + + conditional(html, "--html") + + conditional(cache, "--cache") + + conditional(humdiv, "--humdiv") + + conditional(regulatory, "--regulatory") + + conditional(cellType, "--cel_type") + + conditional(phased, "--phased") + + conditional(alleleNumber, "--allele_number") + + conditional(numbers, "--numbers") + + conditional(domains, "--domains") + + conditional(noEscape, "--no_escape") + + conditional(hgvs, "--hgvs") + + conditional(protein, "--protein") + + conditional(symbol, "--symbol") + + conditional(ccds, "--ccds") + + conditional(uniprot, "--uniprot") + + conditional(tsl, "--tsl") + + conditional(canonical, "--canonical") + + conditional(biotype, "--biotype") + + conditional(xrefRefseq, "--xref_refseq") + + conditional(checkExisting, "--check_existing") + + conditional(checkAlleles, "--check_alleles") + + conditional(checkSvs, "--check_svs") + + conditional(gmaf, "--gmaf") + + conditional(maf1kg, "--maf_1kg") + + conditional(mafEsp, "--maf_esp") + + conditional(pubmed, "--pubmed") + + conditional(vcf, "--vcf") + + conditional(json, "--json") + + conditional(gvf, "--gvf") + + conditional(checkRef, "--check_ref") + + conditional(codingOnly, "--coding_only") + + conditional(noIntergenic, "--no_intergenic") + + conditional(pick, "--pick") + + conditional(pickAllele, "--pick_allele") + + conditional(flagPick, "--flag_pick") + + conditional(flagPickAllele, "--flag_pick_allele") + + conditional(perGene, "--per_gene") + + conditional(mostSevere, "--most_severe") + + conditional(summary, "--summary") + + conditional(filterCommon, "--filter_common") + + conditional(checkFrequency, "--check_frequency") + + conditional(allowNonVariant, "--allow_non_variant") + + conditional(database, "--database") + + conditional(genomes, "--genomes") + + conditional(gencodeBasic, "--gencode_basic") + + conditional(refseq, "--refseq") + + conditional(merged, "--merged") + + conditional(allRefseq, "--all_refseq") + + conditional(lrg, "--lrg") + + conditional(noWholeGenome, "--no_whole_genome") + + conditional(skibDbCheck, "--skip_db_check") + + optional("--config", vepConfig) + + optional("--species", species) + + optional("--assembly", assembly) + + optional("--format", format) + + optional("--dir", dir) + + optional("--dir_cache", dirCache) + + optional("--dir_plugins", dirPlugins) + + optional("--fasta", fasta) + + optional("--sift", sift) + + optional("--polyphen", polyphen) + + repeat("--custom", custom) + + repeat("--plugin", plugin) + + optional("--individual", individual) + + optional("--fields", fields) + + optional("--convert", convert) + + optional("--terms", terms) + + optional("--chr", chr) + + optional("--pick_order", pickOrder) + + optional("--freq_pop", freqPop) + + optional("--freq_gt_lt", freqGtLt) + + optional("--freq_filter", freqFilter) + + optional("--filter", filter) + + optional("--host", host) + + optional("--user", user) + + optional("--password", password) + + optional("--registry", registry) + + optional("--build", build) + + optional("--compress", compress) + + optional("--cache_region_size", cacheRegionSize) + + optional("--fork", threads) + + optional("--cache_version", cacheVersion) + + optional("--freq_freq", freqFreq) + + optional("--port", port) + + optional("--db_version", dbVersion) + + optional("--buffer_size", bufferSize) + + optional("--failed", failed) + } def summaryFiles: Map[String, File] = Map() def summaryStats: Map[String, Any] = { - if (statsText) { - val statsFile: File = new File(output.getAbsolutePath + "_summary.txt") - parseStatsFile(statsFile) - } else { - Map() - } + val statsFile = new File(output.getAbsolutePath + "_summary.txt") + if (statsText && statsFile.exists()) parseStatsFile(statsFile) + else Map() } - def parseStatsFile(file: File): Map[String, Any] = { - val contents = Source.fromFile(file).getLines().toList - val headers = getHeadersFromStatsFile(contents) - headers.foldLeft(Map.empty[String, Any])((acc, x) => acc + (x.replace(" ", "_") -> getBlockFromStatsFile(contents, x))) - } + protected val removeOnConflict = Set("Output_file", "Command_line_options", "Run_time", "Start_time", "End_time", "Input_file_(format)", "Novel_/_existing_variants") + protected val nonNumber = Set("VEP_version_(API)", "Cache/Database", "Species") - def getBlockFromStatsFile(contents: List[String], header: String): Map[String, Any] = { - var inBlock = false - var theMap: Map[String, Any] = Map() - for (x <- contents) { - val stripped = x.stripPrefix("[").stripSuffix("]") - if (stripped == header) { - inBlock = true - } else { - if (inBlock) { - val key = stripped.split('\t').head.replace(" ", "_") - val value = stripped.split('\t').last - theMap ++= Map(key -> tryToParseNumber(value, fallBack = true).getOrElse(value)) - } - } - if (stripped == "") { - inBlock = false + override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = { + if (removeOnConflict.contains(key)) None + else if (nonNumber.contains(key)) v1 + else { + (v1, v2) match { + case (x1: Int, x2: Int) => x1 + x2 + case _ => throw new IllegalStateException(s"Value are not Int's, unable to sum them up, key: $key, v1: $v1, v2: $v2") } } - theMap } - def getHeadersFromStatsFile(contents: List[String]): List[String] = { - // block headers are of format '[block]' - contents.filter(_.startsWith("[")).filter(_.endsWith("]")).map(_.stripPrefix("[")).map(_.stripSuffix("]")) - } + def parseStatsFile(file: File): Map[String, Any] = { + val reader = Source.fromFile(file) + val contents = reader.getLines().filter(_ != "").toArray + reader.close() + + def isHeader(line: String) = line.startsWith("[") && line.endsWith("]") + val headers = contents.zipWithIndex + .filter(x => x._1.startsWith("[") && x._1.endsWith("]")) + + (for ((header, headerIndex) <- headers) yield { + val name = header.stripPrefix("[").stripSuffix("]") + name.replaceAll(" ", "_") -> (contents.drop(headerIndex + 1).takeWhile(!isHeader(_)).map { line => + val values = line.split("\t", 2) + values.head.replaceAll(" ", "_") -> tryToParseNumber(values.last).getOrElse(0) + }.toMap) + }).toMap + } } diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/Conifer.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/Conifer.scala index f241c576ee1fa2ff7b7169f5e7cee7abc265045e..e4bfcfbe956b00027edd426559d68dc639ee4d34 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/Conifer.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/Conifer.scala @@ -21,7 +21,7 @@ import nl.lumc.sasc.biopet.core.extensions.PythonCommandLineFunction abstract class Conifer extends PythonCommandLineFunction with Version { override def subPath = "conifer" :: super.subPath // executable = config("exe", default = "conifer") - setPythonScript(config("script", default = "conifer")) + setPythonScript(config("script", default = "conifer.py", namespace = "conifer")) def versionRegex = """(.*)""".r override def versionExitcode = List(0) def versionCommand = executable + " " + pythonScript + " --version" diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/freec/FreeC.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/freec/FreeC.scala new file mode 100644 index 0000000000000000000000000000000000000000..365cde8e49ecdcfa888480d3b1c4b21bea8769b4 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/freec/FreeC.scala @@ -0,0 +1,214 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.freec + +import java.io.{ File, PrintWriter } + +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference, Version } +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline._ + +class FreeC(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version { + + @Input(doc = "BAMfile", required = true) + var input: File = _ + + var inputFormat: Option[String] = config("inputFormat") + + var outputPath: File = _ + + @Output(doc = "Output", shortName = "out") + protected var output: File = _ + + @Output(doc = "FreeC GC_profile") + private var _gcProfile: File = _ + def gcProfile = new File(outputPath, "GC_profile.cnp") + + @Output(doc = "FreeC Read numbers per bin") + private var _sampleBins: File = _ + def sampleBins = new File(outputPath, input.getName + "_sample.cpn") + + @Output + private var _cnvOutput: File = _ + def cnvOutput = new File(outputPath, input.getName + "_CNVs") + + @Output + private var _bafOutput: File = _ + def bafOutput = new File(outputPath, input.getName + "_BAF.txt") + + @Output + private var _ratioOutput: File = _ + def ratioOutput = new File(outputPath, input.getName + "_ratio.txt") + + @Output + private var _ratioBedGraph: File = _ + def ratioBedGraph = new File(outputPath, input.getName + "_ratio.BedGraph") + + executable = config("exe", default = "freec") + var bedGraphOutput: Boolean = config("BedGraphOutput", default = false) + var _bedtools: File = config("exe", default = "bedtools", namespace = "bedtools") + var bedtools: Option[String] = config("bedtools", default = _bedtools, freeVar = false) + var breakPointThreshold: Option[Double] = config("breakPointThreshold") + var breakPointType: Option[Int] = config("breakPointType") + + var chrFiles: File = config("chrFiles") + var chrLenFile: File = config("chrLenFile") + + var coefficientOfVariation: Option[Double] = config("coefficientOfVariation") + var contamination: Option[Double] = config("contamination") + var contaminationAdjustment: Boolean = config("contaminationAdjustment", default = false) + + var degree: Option[String] = config("degree") + var forceGCcontentNormalization: Option[Int] = config("forceGCcontentNormalization") + + var gcContentProfile: Option[File] = config("GCcontentProfile") + var gemMappabilityFile: Option[String] = config("gemMappabilityFile") + + var intercept: Option[Int] = config("intercept") + var minCNAlength: Option[Int] = config("minCNAlength") + var minMappabilityPerWindow: Option[Double] = config("minMappabilityPerWindow") + var minExpectedGC: Option[Double] = config("minExpectedGC") + var maxExpectedGC: Option[Double] = config("maxExpectedGC") + var minimalSubclonePresence: Option[Int] = config("minimalSubclonePresence") + var maxThreads: Int = getThreads + + var noisyData: Boolean = config("noisyData", default = false) + //var outputDir: File + var ploidy: Option[String] = config("ploidy") + var printNA: Boolean = config("printNA", default = false) + var readCountThreshold: Option[Int] = config("readCountThreshold") + + var _sambamba: File = config("exe", namespace = "sambamba", default = "sambamba") + var sambamba: File = config("sambamba", default = _sambamba, freeVar = false) + var sambambaThreads: Option[Int] = config("SambambaThreads") + + var _samtools: File = config("exe", namespace = "samtools", default = "samtools") + var samtools: File = config("samtools", default = _samtools, freeVar = false) + + var sex: Option[String] = config("sex") + var step: Option[Int] = config("step") + var telocentromeric: Option[Int] = config("telocentromeric") + + var uniqueMatch: Boolean = config("uniqueMatch", default = false) + var window: Option[Int] = config("window") + + /** [sample] options */ + // var mateFile: File = input + var mateCopyNumberFile: Option[File] = config("mateCopyNumberFile") + // var inputFormat: Option[String] = config("inputFormat") + var mateOrientation: Option[String] = config("mateOrientation") + + /** [BAF] options */ + var snpFile: Option[File] = config("snpFile") + var minimalCoveragePerPosition: Option[Int] = config("minimalCoveragePerPosition") + var makePileup: Option[File] = config("makePileup") + var fastaFile: Option[File] = config("fastaFile") + var minimalQualityPerPosition: Option[Int] = config("minimalQualityPerPosition") + var shiftInQuality: Option[Int] = config("shiftInQuality") + + /** [target] */ + var captureRegions: Option[File] = config("captureRegions") + + // Control-FREEC v8.7 : calling copy number alterations and LOH regions using deep-sequencing data + override def versionCommand = executable + override def versionRegex = """Control-FREEC v([0-9\.]+) : .*""".r + override def defaultThreads = 4 + override def defaultCoreMemory = 4.0 + + private var configFile: File = _ + + override def beforeGraph { + super.beforeGraph + + _gcProfile = gcProfile + _sampleBins = sampleBins + _cnvOutput = cnvOutput + _bafOutput = bafOutput + _ratioOutput = ratioOutput + _ratioBedGraph = ratioBedGraph + + configFile = new File(outputPath, input.getName + ".freec_config.txt") + output = cnvOutput + } + + override def beforeCmd: Unit = { + super.beforeCmd + + outputPath.mkdirs() + + logger.info("Creating FREEC config file: " + configFile.getAbsolutePath) + createConfigFile + } + + protected def createConfigFile = { + val writer = new PrintWriter(configFile) + + val conf: String = "[general]" + "\n" + + conditional(bedGraphOutput, "BedGraphOutput=TRUE", escape = false) + "\n" + + required("bedtools=", bedtools, spaceSeparated = false, escape = false) + "\n" + + optional("breakPointThreshold=", breakPointThreshold, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("breakPointType=", breakPointType, suffix = "", spaceSeparated = false, escape = false) + "\n" + + required("chrFiles=", chrFiles, spaceSeparated = false, escape = false) + "\n" + + required("chrLenFile=", chrLenFile, spaceSeparated = false, escape = false) + "\n" + + optional("coefficientOfVariation=", coefficientOfVariation, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("contamination=", contamination, suffix = "", spaceSeparated = false, escape = false) + "\n" + + conditional(contaminationAdjustment, "contaminationAdjustment=TRUE", escape = false) + "\n" + + optional("degree=", degree, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("forceGCcontentNormalization=", forceGCcontentNormalization, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("GCcontentProfile=", gcContentProfile, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("gemMappabilityFile=", gemMappabilityFile, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("intercept=", intercept, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("minCNAlength=", minCNAlength, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("minMappabilityPerWindow=", minMappabilityPerWindow, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("minExpectedGC=", minExpectedGC, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("maxExpectedGC=", maxExpectedGC, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("minimalSubclonePresence=", minimalSubclonePresence, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("maxThreads=", getThreads, suffix = "", spaceSeparated = false, escape = false) + "\n" + + conditional(noisyData, "noisyData=TRUE", escape = false) + "\n" + + required("outputDir=", outputPath, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("ploidy=", ploidy, suffix = "", spaceSeparated = false, escape = false) + "\n" + + conditional(printNA, "printNA=TRUE", escape = false) + "\n" + + optional("readCountThreshold=", readCountThreshold, suffix = "", spaceSeparated = false, escape = false) + "\n" + + required("sambamba=", sambamba, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("SambambaThreads=", sambambaThreads, suffix = "", spaceSeparated = false, escape = false) + "\n" + + required("samtools=", samtools, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("sex=", sex, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("step=", step, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("telocentromeric=", telocentromeric, suffix = "", spaceSeparated = false, escape = false) + "\n" + + conditional(uniqueMatch, "uniqueMatch=TRUE", escape = false) + "\n" + + optional("window=", window, suffix = "", spaceSeparated = false, escape = false) + "\n" + + "[sample]" + "\n" + + required("mateFile=", input, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("mateCopyNumberFile=", mateCopyNumberFile, suffix = "", spaceSeparated = false, escape = false) + "\n" + + required("inputFormat=", inputFormat, suffix = "", spaceSeparated = false, escape = false) + "\n" + + required("mateOrientation=", mateOrientation, suffix = "", spaceSeparated = false, escape = false) + "\n" + + "[BAF]" + "\n" + + optional("SNPfile=", snpFile, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("minimalCoveragePerPosition=", minimalCoveragePerPosition, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("makePileup=", makePileup, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("fastaFile=", fastaFile, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("minimalQualityPerPosition=", minimalQualityPerPosition, suffix = "", spaceSeparated = false, escape = false) + "\n" + + optional("shiftInQuality=", shiftInQuality, suffix = "", spaceSeparated = false, escape = false) + "\n" + + "[target]" + "\n" + + optional("captureRegions=", captureRegions, suffix = "", spaceSeparated = false, escape = false) + "\n" + + writer.write(conf) + writer.close() + } + + def cmdLine = required(executable) + + required("--conf", configFile) +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/freec/FreeCAssessSignificancePlot.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/freec/FreeCAssessSignificancePlot.scala new file mode 100644 index 0000000000000000000000000000000000000000..57f5420c8676e4cc275017ef4af080933b551457 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/freec/FreeCAssessSignificancePlot.scala @@ -0,0 +1,32 @@ +package nl.lumc.sasc.biopet.extensions.freec + +import java.io.File + +import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +class FreeCAssessSignificancePlot(val root: Configurable) extends RscriptCommandLineFunction { + protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/freec/freec_assess_significance.R") + + @Input(doc = "Output file from FreeC. *_CNV", required = true) + var cnv: File = null + + @Input(doc = "Output file from FreeC. *_ratio.txt", required = true) + var ratios: File = null + + @Output(doc = "Destination for the PNG file", required = true) + var output: File = null + + /* cmdLine to execute R-script and with arguments + * Arguments should be pasted in the same order as the script is expecting it. + * Unless some R library is used for named arguments + * */ + override def cmdLine: String = { + super.cmdLine + + required("--cnv", cnv) + + required("--ratios", ratios) + + required("--output", output) + + } +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/freec/FreeCBAFPlot.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/freec/FreeCBAFPlot.scala new file mode 100644 index 0000000000000000000000000000000000000000..2dbbf2470fbee22dfd526c4da6a9a48231c806c5 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/freec/FreeCBAFPlot.scala @@ -0,0 +1,28 @@ +package nl.lumc.sasc.biopet.extensions.freec + +import java.io.File + +import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +class FreeCBAFPlot(val root: Configurable) extends RscriptCommandLineFunction { + protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/freec/freec_BAFPlot.R") + + @Input(doc = "Output file from FreeC. *_BAF.txt") + var input: File = null + + @Output(doc = "Destination for the PNG file") + var output: File = null + + /* cmdLine to execute R-script and with arguments + * Arguments should be pasted in the same order as the script is expecting it. + * Unless some R library is used for named arguments + * */ + override def cmdLine: String = { + super.cmdLine + + required("-i", input) + + required("-o", output) + } + +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/freec/FreeCCNVPlot.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/freec/FreeCCNVPlot.scala new file mode 100644 index 0000000000000000000000000000000000000000..087bc3b79cad2b4f40408fb0980fc64ea61806d9 --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/freec/FreeCCNVPlot.scala @@ -0,0 +1,26 @@ +package nl.lumc.sasc.biopet.extensions.freec + +import java.io.File + +import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +class FreeCCNVPlot(val root: Configurable) extends RscriptCommandLineFunction { + protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/freec/freec_CNVPlot.R") + + @Input(doc = "Output file from FreeC. *_CNV", required = true) + var input: File = null + + @Output(doc = "Destination for the PNG file", required = true) + var output: File = null + + /** + * cmdLine to execute R-script and with arguments + * Arguments should be pasted in the same order as the script is expecting it. + * Unless some R library is used for named arguments + */ + override def cmdLine = super.cmdLine + + required("-i", input) + + required("-o", output) +} diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala index 4d712a8407abb8f09b8e6e7fdcceaba8d11d2bb4..904a0fc08069a53954b7d0525e86d18f199ed37b 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala @@ -2,11 +2,11 @@ package nl.lumc.sasc.biopet.extensions.gatk import java.io.File -import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction +import nl.lumc.sasc.biopet.core.{ BiopetJavaCommandLineFunction, Reference } import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output } -class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction { +class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction with Reference { analysisName = "CatVariants" javaMainClass = "org.broadinstitute.gatk.tools.CatVariants" @@ -44,6 +44,11 @@ class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) var log_to_file: File = _ + override def beforeGraph() = { + super.beforeGraph() + if (reference == null) reference = referenceFasta() + } + override def cmdLine = super.cmdLine + required("-R", reference, spaceSeparated = true, escape = true, format = "%s") + repeat("-V", variant, spaceSeparated = true, escape = true, format = "%s") + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMpileup.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMpileup.scala new file mode 100644 index 0000000000000000000000000000000000000000..a9029b334aabfea047502ac7cc00a23371678dab --- /dev/null +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMpileup.scala @@ -0,0 +1,43 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.sambamba + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +class SambambaMpileup(val root: Configurable) extends Sambamba { + override val defaultThreads = 4 + + @Input(doc = "Bam File") + var input: List[File] = Nil + + @Output(doc = "Output file", required = false) + var output: File = null + + val buffer: Option[Int] = config("buffer", default = 8 * 1024 * 1024) + + def cmdLine = { + required(executable) + + required("mpileup") + + optional("-t", threads) + + optional("-b", buffer) + + repeat(input) + " | " + + "pigz -9 -p " + threads + " -i -c > " + + output.getAbsolutePath + } +} diff --git a/biopet-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableMain.scala b/biopet-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableMain.scala index 9867191f5136fa350a840740f1d89ef075943cf0..e58cd9456b328d9fe1786df6c5840491f7878b77 100644 --- a/biopet-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableMain.scala +++ b/biopet-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableMain.scala @@ -28,6 +28,7 @@ object BiopetExecutableMain extends BiopetExecutable { nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics, nl.lumc.sasc.biopet.pipelines.sage.Sage, nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig, + nl.lumc.sasc.biopet.pipelines.kopisu.Kopisu, nl.lumc.sasc.biopet.pipelines.carp.Carp, nl.lumc.sasc.biopet.pipelines.toucan.Toucan, nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling, diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBed.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBed.scala index 5eedb3db7c718a6b5293fab25bff182c629e3909..e80887e8edcb5e8f2c73cfa268964fb39cf326c8 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBed.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/AnnotateVcfWithBed.scala @@ -80,7 +80,7 @@ object AnnotateVcfWithBed extends ToolCommand { logger.info("Start") val argsParser = new OptParser - val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) val fieldType = cmdArgs.fieldType match { case "Integer" => VCFHeaderLineType.Integer diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala index 0add274bfbc477ab771466f9b4b5cdfc2024a1f1..6bba497ca162ca63574fb99f048cba119f9e9cf6 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BaseCounter.scala @@ -53,10 +53,7 @@ object BaseCounter extends ToolCommand { def main(args: Array[String]): Unit = { val argsParser = new OptParser - val cmdArgs: Args = argsParser.parse(args, Args()) match { - case Some(x) => x - case _ => throw new IllegalArgumentException - } + val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) //Sets picard logging level htsjdk.samtools.util.Log.setGlobalLogLevel(htsjdk.samtools.util.Log.LogLevel.valueOf(logger.getLevel.toString)) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala index 2ad247900d7a1537e93c3f20b039cf1962d1fa42..100209b1d266bd326c91c543631a8b4dfd78c10d 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala @@ -113,7 +113,7 @@ object BastyGenerateFasta extends ToolCommand { */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - cmdArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1) + cmdArgs = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) if (cmdArgs.outputVariants != null) { writeVariantsOnly() diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala index f9646c4c9ebe7473e5ccb39e833d8ae87176b84a..b82b7b74286258ac5bdd11a796c1924bd1c3d4af 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala @@ -47,7 +47,7 @@ object BedToInterval extends ToolCommand { */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) val writer = new PrintWriter(commandArgs.outputFile) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala index b9ef718165ff3a7f2b67853c83c3e7d0fbbbe297..40ad0b93b3ad8404382be17ff8fa862851db961a 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala @@ -39,7 +39,7 @@ object BedtoolsCoverageToCounts extends ToolCommand { */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala index 4d98bda24b5c4eb1072cd5d1739efbf242d4a1bf..51f1c1cbecba4e079534eaa380f8be072950c3d1 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala @@ -50,7 +50,7 @@ object BiopetFlagstat extends ToolCommand { */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) val inputSam = SamReaderFactory.makeDefault.open(commandArgs.inputFile) val iterSam = if (commandArgs.region.isEmpty) inputSam.iterator else { diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala index 141633c5358a21c931555d5efbd18c615b7c6f77..fccebd08aa15ea28c90f6b64af57e196e6091e55 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala @@ -71,7 +71,7 @@ object CheckAllelesVcfInBam extends ToolCommand { def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) if (commandArgs.bamFiles.size != commandArgs.samples.size) logger.warn("Number of samples is different from number of bam files: additional samples or bam files will not be used") diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala index b02074875b2e27021e123139da7ef42d1bce8700..622c4a9c4cc9807e67a15719561d94eba3970355 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala @@ -242,7 +242,7 @@ object ExtractAlignedFastq extends ToolCommand { def parseArgs(args: Array[String]): Args = new OptParser() .parse(args, Args()) - .getOrElse(sys.exit(1)) + .getOrElse(throw new IllegalArgumentException) def main(args: Array[String]): Unit = { diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala index 46a4abd0cac604dedc239845565f5d1acdd85b72..f0214b3392255f2c4e81de571c35a152343a86d5 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala @@ -45,7 +45,7 @@ object FastqSplitter extends ToolCommand { */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) val groupSize = 100 val output = for (file <- commandArgs.outputFile) yield new AsyncFastqWriter(new BasicFastqWriter(file), groupSize) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala index e56393b38eb62499c39469b452baefc818812e28..7b364d1ffb36ab8879236d125196119813cbadb8 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala @@ -151,7 +151,7 @@ object FastqSync extends ToolCommand { */ def parseArgs(args: Array[String]): Args = new OptParser() .parse(args, Args()) - .getOrElse(sys.exit(1)) + .getOrElse(throw new IllegalArgumentException) def main(args: Array[String]): Unit = { diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala index f752e3be6863928122bf233b9b911b2d50b8a432..92c8782a75707b646e9cc10e3a97f822a2f53ae0 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala @@ -46,7 +46,7 @@ object FindRepeatsPacBio extends ToolCommand { def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) val bamReader = SamReaderFactory.makeDefault .validationStringency(ValidationStringency.SILENT) .open(commandArgs.inputBam) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala index e9b24a378eef5b06a0a1e10832943b07852985c1..fcb302c3c7b9e551d9a7190d14c4d9f29ce48478 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala @@ -57,7 +57,7 @@ object GvcfToBed extends ToolCommand { def main(args: Array[String]): Unit = { val argsParser = new OptParser - val cmdArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val cmdArgs = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) logger.debug("Opening reader") val reader = new VCFFileReader(cmdArgs.inputVcf, false) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala index ee59a22aaab5a1b21ad8565a74f41d432cde096b..ce47b649de633d0db2cdf497421c972d43fe32bb 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala @@ -90,7 +90,7 @@ object KrakenReportToJson extends ToolCommand { */ def parseArgs(args: Array[String]): Args = new OptParser() .parse(args, Args()) - .getOrElse(sys.exit(1)) + .getOrElse(throw new IllegalArgumentException) /** * Takes a line from the kraken report, converts into Map with taxonID and diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala index 02f4b4709398e244a40ba13139993d2230d54fd3..8187064d10ddbfda5a4642a821b30b23e56bfbe4 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala @@ -50,7 +50,7 @@ object MergeAlleles extends ToolCommand { */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) val readers = commandArgs.inputFiles.map(new VCFFileReader(_, true)) val referenceFile = new FastaSequenceFile(commandArgs.reference, true) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeOtuMaps.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeOtuMaps.scala index 15b76aa44063bbeeb3f79606225d88b0d4666ed3..f853d66a5018559569a02b5ded85a04e19d57711 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeOtuMaps.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeOtuMaps.scala @@ -41,7 +41,7 @@ object MergeOtuMaps extends ToolCommand { */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) var map: Map[Long, String] = Map() diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala index 426043c4b92ff49a1f987c418658ffd4180ba702..07de8a2b895e9c6b079087dae3844a084b5380a3 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala @@ -188,7 +188,7 @@ object MergeTables extends ToolCommand { /** Parses the command line argument */ def parseArgs(args: Array[String]): Args = new OptParser() .parse(args, Args()) - .getOrElse(sys.exit(1)) + .getOrElse(throw new IllegalArgumentException) /** Transforms the input file seq into a seq of [[InputTable]] objects */ def prepInput(inFiles: Seq[File], ext: String, columnNames: Option[Seq[String]]): Seq[InputTable] = { diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala index 91761c1965510f240af328bb54f4408d6388a75b..df159c33dd4c05af042ea8f6cfc5a1735cac6013 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala @@ -62,7 +62,7 @@ object MpileupToVcf extends ToolCommand { */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) if (commandArgs.input != null && !commandArgs.input.exists) throw new IllegalStateException("Input file does not exist") val writer = new PrintWriter(commandArgs.output) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/PrefixFastq.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/PrefixFastq.scala index a5040a24f735e88d0c0877b7e2e0923c1e5de2c8..6be7f11fb878af4e6679f661327f9b85cc3ab4a3 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/PrefixFastq.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/PrefixFastq.scala @@ -52,7 +52,7 @@ object PrefixFastq extends ToolCommand { logger.info("Start") val argsParser = new OptParser - val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) val writer = new AsyncFastqWriter(new BasicFastqWriter(cmdArgs.output), 3000) val reader = new FastqReader(cmdArgs.input) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala index bdfb0be7f8e51495fed176ac7ab386104a04948e..4af0fc390709c0e7e5ac7f3fc7e3fdeb0c48e8a1 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala @@ -49,7 +49,7 @@ object RegionAfCount extends ToolCommand { def main(args: Array[String]): Unit = { val argsParser = new OptParser - val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) logger.info("Start") logger.info("Reading bed file") diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala index e559f22216b5f08fd625b24931251694733a81c4..0ea5fcc73ff7f1c5caed520ce8360d1d2c1dd820 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala @@ -41,7 +41,7 @@ object SageCountFastq extends ToolCommand { */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala index f5ffa5e5a43e63c95d6610ca9e58c1170ae44369..71dfbbacb788829a98821e00fe51d28f47830423 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala @@ -74,7 +74,7 @@ object SageCreateLibrary extends ToolCommand { */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala index 9ff037eae73d81cd39345d6028e903af220ae35c..2ae201ea18512c2f3ee5a6b7c05efad3015b3cba 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala @@ -52,7 +52,7 @@ object SageCreateTagCounts extends ToolCommand { */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala index d9b1d25fd67b51b76e52d62bca776dd3773417e4..4d1f2b82d865cbed58ee329c6f985646df6ed5e3 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala @@ -46,7 +46,7 @@ object SamplesTsvToJson extends ToolCommand { /** Executes SamplesTsvToJson */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) val jsonString = stringFromInputs(cmdArgs.inputFiles, cmdArgs.tagFiles) cmdArgs.outputFile match { diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala index fe59522374391b9ed5af74df61ed34d58db15339..3c39667cbff6ce005b9e45d411c410cd3bed4eae 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala @@ -76,7 +76,7 @@ object SeqStat extends ToolCommand { */ def parseArgs(args: Array[String]): Args = new OptParser() .parse(args, Args()) - .getOrElse(sys.exit(1)) + .getOrElse(throw new IllegalArgumentException) /** * diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala index 0941da9e512949c19ebcd10991d29ad56d30ca85..413d335ab01b3897e7d4a2eff2205923cd06fc6b 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala @@ -46,7 +46,7 @@ object SquishBed extends ToolCommand { */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) if (!cmdArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + cmdArgs.input) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala index 9c4629bd45d0111657f871e9587ed09faa6dd611..084a8cf693a0dbb9fd9708dd624df60c3c2629ed 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala @@ -61,7 +61,7 @@ object SummaryToTsv extends ToolCommand { def main(args: Array[String]): Unit = { val argsParser = new OptParser - val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) val summary = new Summary(cmdArgs.summary) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ValidateFastq.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ValidateFastq.scala index b2e1aeb9651cc9690ad7926ecb76f4f4a5627d8c..975a6dbd13e4741e93a1709f8f02e01283f216b1 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ValidateFastq.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/ValidateFastq.scala @@ -51,7 +51,7 @@ object ValidateFastq extends ToolCommand { //parse all possible options into OptParser val argsParser = new OptParser - val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) //read in fastq file 1 and if present fastq file 2 val readFq1 = new FastqReader(cmdArgs.input) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala index cffe91f625699a517d0a7b07a60cd171d805917f..3897c3c9e577e9bb81e0075ba3729b8aa52d7ce3 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala @@ -137,7 +137,7 @@ object VcfFilter extends ToolCommand { def main(args: Array[String]): Unit = { logger.info("Start") val argsParser = new OptParser - val cmdArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val cmdArgs = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) val reader = new VCFFileReader(cmdArgs.inputVcf, false) val header = reader.getFileHeader diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala index 62b04375f630ab59fea18d9c1d974bdf038cb767..677b4d739621f718b0d4c1d1f9f5531ca465ee7f 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala @@ -206,7 +206,7 @@ object VcfStats extends ToolCommand { def main(args: Array[String]): Unit = { logger.info("Started") val argsParser = new OptParser - cmdArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1) + cmdArgs = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) val reader = new VCFFileReader(cmdArgs.inputFile, true) val header = reader.getFileHeader diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala index 20a15dacacba466b41235f32e77062179ad5f0a4..a86eb7c630f4a605c9f4c96a56f8c8e2ea7e89e0 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala @@ -72,7 +72,7 @@ object VcfToTsv extends ToolCommand { def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) // Throw exception if separator and listSeparator are identical if (commandArgs.separator == commandArgs.listSeparator) throw new IllegalArgumentException( diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala index 71f7e2b12f831e699e991af51792692e43abf68a..6c046a2b4002336fed7f031d2ab1d5e3295a4b31 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala @@ -24,6 +24,7 @@ import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, Variant import htsjdk.variant.vcf._ import nl.lumc.sasc.biopet.utils.ToolCommand import nl.lumc.sasc.biopet.utils.VcfUtils.scalaListToJavaObjectArrayList +import nl.lumc.sasc.biopet.utils.BamUtils.SamDictCheck import scala.collection.JavaConversions._ @@ -89,14 +90,14 @@ object VcfWithVcf extends ToolCommand { val header = reader.getFileHeader val vcfDict = header.getSequenceDictionary match { case r if r != null => - r.assertSameDictionary(referenceDict) + r.assertSameDictionary(referenceDict, true) r case _ => referenceDict } val secondHeader = secondaryReader.getFileHeader secondHeader.getSequenceDictionary match { - case r if r != null => r.assertSameDictionary(referenceDict) + case r if r != null => r.assertSameDictionary(referenceDict, true) case _ => } @@ -123,6 +124,7 @@ object VcfWithVcf extends ToolCommand { var counter = 0 for (record <- reader) { + require(vcfDict.getSequence(record.getContig) != null, s"Contig ${record.getContig} does not exist on reference") val secondaryRecords = getSecondaryRecords(secondaryReader, record, commandArgs.matchAllele) val fieldMap = createFieldMap(commandArgs.fields, secondaryRecords) @@ -201,7 +203,6 @@ object VcfWithVcf extends ToolCommand { } case FieldMethod.unique => scalaListToJavaObjectArrayList(attribute._2.distinct) case _ => { - print(attribute._2.getClass.toString) scalaListToJavaObjectArrayList(attribute._2) } }) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala index 50893046b949792ddccedf00752b34c632dc00ee..feee8e280c1a9314da92dd183e6d23560395228a 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala @@ -24,7 +24,6 @@ import htsjdk.variant.vcf._ import nl.lumc.sasc.biopet.utils.ToolCommand import scala.collection.JavaConversions._ -import scala.collection.mutable.{ Map => MMap } /** * This tool parses a VEP annotated VCF into a standard VCF file. @@ -40,7 +39,7 @@ object VepNormalizer extends ToolCommand { def main(args: Array[String]): Unit = { val commandArgs: Args = new OptParser() .parse(args, Args()) - .getOrElse(sys.exit(1)) + .getOrElse(throw new IllegalArgumentException) val input = commandArgs.inputVCF val output = commandArgs.outputVCF @@ -57,31 +56,37 @@ object VepNormalizer extends ToolCommand { } val header = reader.getFileHeader - logger.debug("Checking for CSQ tag") - csqCheck(header) - logger.debug("CSQ tag OK") - logger.debug("Checkion VCF version") - versionCheck(header) - logger.debug("VCF version OK") - logger.debug("Parsing header") - val newInfos = parseCsq(header) - header.setWriteCommandLine(true) val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder(). setOutputFile(output).setReferenceDictionary(header.getSequenceDictionary) build ()) - for (info <- newInfos) { - val tmpheaderline = new VCFInfoHeaderLine(info, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "A VEP annotation") - header.addMetaDataLine(tmpheaderline) - } - logger.debug("Header parsing done") + if (reader.iterator().hasNext) { + logger.debug("Checking for CSQ tag") + csqCheck(header) + logger.debug("CSQ tag OK") + logger.debug("Checkion VCF version") + versionCheck(header) + logger.debug("VCF version OK") + logger.debug("Parsing header") + val newInfos = parseCsq(header) + header.setWriteCommandLine(true) + + for (info <- newInfos) { + val tmpheaderline = new VCFInfoHeaderLine(info, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "A VEP annotation") + header.addMetaDataLine(tmpheaderline) + } + logger.debug("Header parsing done") - logger.debug("Writing header to file") + logger.debug("Writing header to file") - writer.writeHeader(header) - logger.debug("Wrote header to file") + writer.writeHeader(header) + logger.debug("Wrote header to file") - normalize(reader, writer, newInfos, commandArgs.mode, commandArgs.removeCSQ) + normalize(reader, writer, newInfos, commandArgs.mode, commandArgs.removeCSQ) + } else { + logger.debug("No variants found, skipping normalize step") + writer.writeHeader(header) + } writer.close() logger.debug("Closed writer") reader.close() @@ -91,6 +96,7 @@ object VepNormalizer extends ToolCommand { /** * Normalizer + * * @param reader input VCF VCFFileReader * @param writer output VCF AsyncVariantContextWriter * @param newInfos array of string containing names of new info fields @@ -118,6 +124,7 @@ object VepNormalizer extends ToolCommand { /** * Checks whether header has a CSQ tag + * * @param header VCF header */ def csqCheck(header: VCFHeader) = { @@ -131,6 +138,7 @@ object VepNormalizer extends ToolCommand { * Checks whether version of input VCF is at least 4.0 * VEP is known to cause issues below 4.0 * Throws exception if not + * * @param header VCFHeader of input VCF */ def versionCheck(header: VCFHeader) = { @@ -149,6 +157,7 @@ object VepNormalizer extends ToolCommand { /** * Parses the CSQ tag in the header + * * @param header the VCF header * @return list of strings with new info fields */ @@ -160,6 +169,7 @@ object VepNormalizer extends ToolCommand { /** * Explode a single VEP-annotated record to multiple normal records * Based on the number of annotated transcripts in the CSQ tag + * * @param record the record as a VariantContext object * @param csqInfos An array with names of new info tags * @return An array with the new records diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala index 082ad2626b1667d4438857b64f6579cfc725f4af..882cb649df599b4ef492a9653dd7f1cb99c58cb9 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala @@ -399,7 +399,7 @@ object WipeReads extends ToolCommand { /** Parses the command line argument */ def parseArgs(args: Array[String]): Args = new OptParser() .parse(args, Args()) - .getOrElse(sys.exit(1)) + .getOrElse(throw new IllegalArgumentException) def main(args: Array[String]): Unit = { diff --git a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala index df8068c0c7e28b57e3344d736aa7cc04abe2b8f2..2cedc0ef3bddbb81d634c25c466d22843ccd1ed2 100644 --- a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala @@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.utils import java.io.File -import htsjdk.samtools.{ SamReader, SamReaderFactory } +import htsjdk.samtools.{ SAMSequenceDictionary, SamReader, SamReaderFactory } import nl.lumc.sasc.biopet.utils.intervals.{ BedRecord, BedRecordList } import scala.collection.JavaConversions._ @@ -129,11 +129,8 @@ object BamUtils { val counts = bamInsertSizes.flatMap(x => x) // avoid division by zero - if (counts.size != 0) { - counts.sum / counts.size - } else { - 0 - } + if (counts.size != 0) counts.sum / counts.size + else 0 } /** @@ -146,4 +143,21 @@ object BamUtils { bamFile -> sampleBamInsertSize(bamFile, samplingSize, binSize) }.toMap + /** This class will add functionality to [[SAMSequenceDictionary]] */ + implicit class SamDictCheck(samDics: SAMSequenceDictionary) { + /** + * This method will check if all contig and sizes are the same without looking at the order of the contigs + * + * @throws AssertionError + * @param that Dict to compare to + * @param ignoreOrder When true the order of the contig does not matter + */ + def assertSameDictionary(that: SAMSequenceDictionary, ignoreOrder: Boolean): Unit = { + if (ignoreOrder) { + assert(samDics.getReferenceLength == that.getReferenceLength) + val thisContigNames = samDics.getSequences.map(x => (x.getSequenceName, x.getSequenceLength)).sorted.toSet + assert(thisContigNames == that.getSequences.map(x => (x.getSequenceName, x.getSequenceLength)).sorted.toSet) + } else samDics.assertSameDictionary(that) + } + } } diff --git a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BiopetExecutable.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BiopetExecutable.scala index 43522e36dbe93abe174483953caa343b67ca572b..cfc7ac1e12a1022501eb8eb9e517e93d1c05627d 100644 --- a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BiopetExecutable.scala +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BiopetExecutable.scala @@ -33,8 +33,7 @@ trait BiopetExecutable extends Logging { val modules: Map[String, List[MainCommand]] = Map( "pipeline" -> pipelines, - "tool" -> tools - ) + "tool" -> tools) /** * @param args the command line arguments diff --git a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala index 5b100606f0dc00bcd637c667871cb5c2d8da99cd..d4a05a7fd9beb48d01040f6188cf67bdef080043 100644 --- a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala @@ -134,4 +134,11 @@ object VcfUtils { else if (name.endsWith(".vcf.gz")) new File(name + ".tbi") else throw new IllegalArgumentException(s"File given is no vcf file: $vcfFile") } + + def vcfFileIsEmpty(file: File): Boolean = { + val reader = new VCFFileReader(file, false) + val hasNext = reader.iterator().hasNext + reader.close() + !hasNext + } } diff --git a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedCheck.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedCheck.scala new file mode 100644 index 0000000000000000000000000000000000000000..c432fbab3c20a28ed6f3dc7f759b07feb248e03e --- /dev/null +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedCheck.scala @@ -0,0 +1,27 @@ +package nl.lumc.sasc.biopet.utils.intervals + +import java.io.File +import scala.collection.mutable.Set + +import nl.lumc.sasc.biopet.utils.Logging + +/** + * Created by pjvanthof on 14/05/16. + */ +object BedCheck { + private val cache: Set[(File, File)] = Set() + + def checkBedFileToReference(bedFile: File, reference: File, biopetError: Boolean = false, ignoreCache: Boolean = false): Unit = { + if (ignoreCache || !cache.contains((bedFile, reference))) { + cache.add((bedFile, reference)) + val bedrecords = BedRecordList.fromFile(bedFile) + if (biopetError) { + try { + bedrecords.validateContigs(reference) + } catch { + case e: IllegalArgumentException => Logging.addError(e.getMessage + s", Bedfile: $bedFile") + } + } else bedrecords.validateContigs(reference) + } + } +} diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/AdapterSequence.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/AdapterSequence.scala new file mode 100644 index 0000000000000000000000000000000000000000..3e9cce9b24353d7a96711835d5896812085c8629 --- /dev/null +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/AdapterSequence.scala @@ -0,0 +1,4 @@ +package nl.lumc.sasc.biopet.pipelines.flexiprep + +/** Case class representing a known adapter sequence */ +case class AdapterSequence(name: String, seq: String) diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala index 3cb06df0e160cb97b98710de74f7ca9fa31ce919..54d80126bfcd1e6a3f6ef60187ed8bff4d6ffd58 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala @@ -16,7 +16,6 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep import nl.lumc.sasc.biopet.utils.config.Configurable -import scala.collection.JavaConversions._ /** * Cutadapt wrapper specific for Flexiprep. @@ -30,9 +29,29 @@ import scala.collection.JavaConversions._ */ class Cutadapt(root: Configurable, fastqc: Fastqc) extends nl.lumc.sasc.biopet.extensions.Cutadapt(root) { + val ignoreFastqcAdapters: Boolean = config("ignore_fastqc_adapters", default = false) + val customAdaptersConfig: Map[String, Any] = config("custom_adapters", default = Map.empty) + /** Clipped adapter names from FastQC */ - protected def seqToName: Map[String, String] = fastqc.foundAdapters - .map(adapter => adapter.seq -> adapter.name).toMap + protected def seqToName: Map[String, String] = { + if (!ignoreFastqcAdapters) { + (fastqc.foundAdapters ++ customAdapters) + .map(adapter => adapter.seq -> adapter.name).toMap + } else { + customAdapters.map(adapter => adapter.seq -> adapter.name).toMap + } + } + + def customAdapters: Set[AdapterSequence] = { + customAdaptersConfig.flatMap(adapter => { + adapter match { + case (adapterName: String, sequence: String) => + Some(AdapterSequence(adapterName, sequence)) + case _ => + throw new IllegalStateException(s"Custom adapter was setup wrong for: $adapter") + } + }).toSet + } override def summaryStats: Map[String, Any] = { val initStats = super.summaryStats diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala index bfcbf1d543a37d2e17d2897a1989183d2dfcfa58..2dadd164ab0a8b9e22372fa1525467f3924bcc8d 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala @@ -35,6 +35,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r /** Allow reporting of all found (potentially adapter) sequences in the FastQC */ var sensitiveAdapterSearch: Boolean = config("sensitiveAdapterSearch", default = false) + var enableRCtrimming: Boolean = config("enableRCtrimming", default = false) /** Class for storing a single FastQC module result */ protected case class FastQCModule(name: String, status: String, lines: Seq[String]) @@ -149,9 +150,6 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r } } else Map() - /** Case class representing a known adapter sequence */ - protected case class AdapterSequence(name: String, seq: String) - /** * Retrieves overrepresented sequences found by FastQ. * @@ -188,9 +186,10 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r val fromKnownList: Set[AdapterSequence] = (adapterSet ++ contaminantSet) .filter(x => foundAdapterNames.exists(_.startsWith(x.name))) - val fromKnownListRC: Set[AdapterSequence] = fromKnownList.map { + val fromKnownListRC: Set[AdapterSequence] = if (enableRCtrimming) fromKnownList.map { x => AdapterSequence(x.name + "_RC", reverseComplement(x.seq)) } + else Set.empty // list all sequences found by FastQC val fastQCFoundSequences: Seq[AdapterSequence] = if (sensitiveAdapterSearch) { diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala index c343be2297474025f70224024d827c1f21cfab78..2e9a1fa1967000ef88530546c41de93e7f9a39d8 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala @@ -102,9 +102,15 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman addPipeJob(seqtk) clip = if (!flexiprep.skipClip) { - val foundAdapters = fastqc.foundAdapters.map(_.seq) + val cutadapt = clip.getOrElse(new Cutadapt(root, fastqc)) + + val foundAdapters = if (!cutadapt.ignoreFastqcAdapters) { + fastqc.foundAdapters.map(_.seq) ++ cutadapt.customAdapters.map(_.seq) + } else { + cutadapt.customAdapters.map(_.seq) + } + if (foundAdapters.nonEmpty) { - val cutadapt = clip.getOrElse(new Cutadapt(root, fastqc)) cutadapt.fastqInput = seqtk.output cutadapt.fastqOutput = new File(output.getParentFile, input.getName + ".cutadapt.fq") cutadapt.statsOutput = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.clip.stats") diff --git a/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala index 2b537d9767cbc1ddbd9f2e528a1c122dfe973d7c..b9f85239a5e7460708d47dd049d44dcab9878e41 100644 --- a/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala +++ b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala @@ -27,7 +27,7 @@ class CutadaptTest extends FastqcV0101Test { val fqc = new Fastqc(null) fqc.output = outputv0101 fqc.contaminants = Option(resourceFile("fqc_contaminants_v0112.txt")) - // fqc.beforeGraph() + fqc.enableRCtrimming = true fqc } diff --git a/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala index 3cf24e8c60a570e8e51fe528ece4f81d0b66a01a..b519dc2f04a8ca38b3340d259c323b7d15747078 100644 --- a/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala +++ b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala @@ -73,11 +73,15 @@ class FastqcV0101Test extends TestNGSuite with Matchers { fqc.contaminants = Option(resourceFile("fqc_contaminants_v0101.txt")) // found adapters also contain the adapters in reverse complement (done within flexiprep/fastqc only) val adapters = fqc.foundAdapters - // we find 1 adapter which comes with the Reverse Complement counterpart - adapters.size shouldBe 2 - adapters.head.name shouldEqual "TruSeq Adapter, Index 1_RC" - adapters.head.seq shouldEqual "CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC" + if (fqc.enableRCtrimming) { + // we find 1 adapter which comes with the Reverse Complement counterpart + adapters.size shouldBe 2 + adapters.head.name shouldEqual "TruSeq Adapter, Index 1_RC" + adapters.head.seq shouldEqual "CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC" + } else { + adapters.size shouldBe 1 + } adapters.last.name shouldEqual "TruSeq Adapter, Index 1" adapters.last.seq shouldEqual "GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG" diff --git a/kopisu/pom.xml b/kopisu/pom.xml index ecbd27e5c70a608daf228fe8c6a60d9bbeae4a82..c2da4ea954d1019e4f7dad9f0114cef7fb514b1c 100644 --- a/kopisu/pom.xml +++ b/kopisu/pom.xml @@ -43,5 +43,17 @@ <artifactId>BiopetExtensions</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.testng</groupId> + <artifactId>testng</artifactId> + <version>6.8</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.10</artifactId> + <version>2.2.1</version> + <scope>test</scope> + </dependency> </dependencies> </project> diff --git a/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferPipeline.scala b/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferPipeline.scala deleted file mode 100644 index 7265bee9a1ddeab64cfec191caa9958aa40c2076..0000000000000000000000000000000000000000 --- a/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferPipeline.scala +++ /dev/null @@ -1,125 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.pipelines.kopisu - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config._ -import nl.lumc.sasc.biopet.core.{ PipelineCommand, _ } -import nl.lumc.sasc.biopet.extensions.Ln -import nl.lumc.sasc.biopet.extensions.conifer.{ ConiferAnalyze, ConiferCall, ConiferRPKM } -import org.broadinstitute.gatk.queue.QScript - -class ConiferPipeline(val root: Configurable) extends QScript with BiopetQScript { - //* - // Kopisu - Coniferpipeline is a pipeline that can run standalone - // */ - def this() = this(null) - - /** Input bamfile */ - @Input(doc = "Bamfile to start from", fullName = "bam", shortName = "bam", required = true) - var inputBam: File = _ - - @Argument(doc = "Label this sample with a name/ID [0-9a-zA-Z] and [-_]", - fullName = "label", - shortName = "label", required = false) - var sampleLabel: String = _ - - /** Exon definitions in bed format */ - @Input(doc = "Exon definition file in bed format", fullName = "exon_bed", shortName = "bed", required = false) - var probeFile: File = config("probeFile") - - @Input(doc = "Previous RPKM files (controls)", fullName = "rpkm_controls", shortName = "rc", required = false) - var controlsDir: File = config("controlsDir") - - @Argument(doc = "Enable RPKM only mode, generate files for reference db", shortName = "rpkmonly", required = false) - var RPKMonly: Boolean = false - - val summary = new ConiferSummary(this) - - def init() { - - } - - def input2RPKM(inputBam: File): String = { - if (!sampleLabel.isEmpty) sampleLabel ++ ".txt" - else swapExt(inputBam.getName, ".bam", ".txt") - } - - def input2HDF5(inputBam: File): String = { - if (!sampleLabel.isEmpty) sampleLabel ++ ".hdf5" - else swapExt(inputBam.getName, ".bam", ".hdf5") - } - def input2Calls(inputBam: File): String = { - if (!sampleLabel.isEmpty) sampleLabel ++ ".calls.txt" - else swapExt(inputBam.getName, ".bam", "calls.txt") - } - - def biopetScript(): Unit = { - - /** Setup RPKM directory */ - val sampleDir: String = outputDir - val RPKMdir: File = new File(sampleDir + File.separator + "RPKM" + File.separator) - RPKMdir.mkdir() - - val coniferRPKM = new ConiferRPKM(this) - coniferRPKM.bamFile = this.inputBam.getAbsoluteFile - coniferRPKM.probes = this.probeFile - coniferRPKM.output = new File(RPKMdir, input2RPKM(inputBam)) - add(coniferRPKM) - - if (!RPKMonly) { - /** Collect the rpkm_output to a temp directory, where we merge with the control files */ - var refRPKMlist: List[File] = Nil - // Sync the .txt only, these files contain the RPKM Values - for (controlRPKMfile <- controlsDir.list.filter(_.toLowerCase.endsWith(".txt"))) { - val target = new File(RPKMdir, controlRPKMfile) - val source = new File(controlsDir, controlRPKMfile) - - if (!target.exists) { - add(Ln(this, source, target, relative = false)) - refRPKMlist :+= target - } else if (!target.equals(source)) { - target.delete() - add(Ln(this, source, target, relative = false)) - refRPKMlist :+= target - } - } - - val coniferAnalyze = new ConiferAnalyze(this) - coniferAnalyze.deps = List(coniferRPKM.output) ++ refRPKMlist - coniferAnalyze.probes = this.probeFile - coniferAnalyze.rpkmDir = RPKMdir - coniferAnalyze.output = new File(sampleDir, input2HDF5(inputBam)) - add(coniferAnalyze) - - val coniferCall = new ConiferCall(this) - coniferCall.input = coniferAnalyze.output - coniferCall.output = new File(sampleDir, "calls.txt") - add(coniferCall) - - summary.deps = List(coniferCall.output) - summary.label = sampleLabel - summary.calls = coniferCall.output - summary.out = new File(sampleDir, input2Calls(inputBam)) - - add(summary) - } - - } -} - -object ConiferPipeline extends PipelineCommand diff --git a/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferSummary.scala b/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferSummary.scala deleted file mode 100644 index 6eddbad30c61e741a63687aa1a77df3604017f1a..0000000000000000000000000000000000000000 --- a/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/ConiferSummary.scala +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.pipelines.kopisu - -import java.io.{ BufferedWriter, File, FileWriter } - -import argonaut._ -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.function.InProcessFunction -import org.broadinstitute.gatk.utils.commandline.{ Input, Output } - -import scala.io.Source - -class ConiferSummary(val root: Configurable) extends InProcessFunction with Configurable { - def filterCalls(callFile: File, outFile: File, sampleName: String): Unit = { - // val filename = callFile.getAbsolutePath - val writer = new BufferedWriter(new FileWriter(outFile)) - - for (line <- Source.fromFile(callFile).getLines()) { - line.startsWith(sampleName) || line.startsWith("sampleID") match { - case true => writer.write(line + "\n"); - case _ => - } - } - writer.close() - } - - this.analysisName = getClass.getSimpleName - - @Input(doc = "deps") - var deps: List[File] = Nil - - @Output(doc = "Summary output", required = true) - var out: File = _ - - @Input(doc = "calls") - var calls: File = _ - - var label: String = _ - - var coniferPipeline: ConiferPipeline = root match { - case pipeline: ConiferPipeline => pipeline - case _ => - throw new IllegalStateException("Root is no instance of ConiferPipeline") - } - - var resources: Map[String, Json] = Map() - - override def run() { - logger.debug("Start") - filterCalls(calls, out, label) - logger.debug("Stop") - } -} diff --git a/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/Kopisu.scala b/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/Kopisu.scala index 9a6f002710a35203f9bff63dff9d776f2c95e246..31bc12e71a94305ddf185ac03882455f4a49fb95 100644 --- a/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/Kopisu.scala +++ b/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/Kopisu.scala @@ -15,60 +15,67 @@ */ package nl.lumc.sasc.biopet.pipelines.kopisu +import nl.lumc.sasc.biopet.core.summary.SummaryQScript +import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference } +import nl.lumc.sasc.biopet.pipelines.kopisu.methods.{ ConiferMethod, FreecMethod } +import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging } import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand } import org.broadinstitute.gatk.queue.QScript -class Kopisu(val root: Configurable) extends QScript with MultiSampleQScript { +import scala.language.reflectiveCalls + +class Kopisu(val root: Configurable) extends QScript with SummaryQScript with Reference { + qscript => def this() = this(null) - @Input(doc = "Input bamfile", required = true) - var bamFile: File = config("bam") + @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true) + protected[kopisu] var inputBamsArg: List[File] = Nil - def init() { - if (!outputDir.endsWith("/")) outputDir += "/" - } + var inputBams: Map[String, File] = Map() - def biopetScript() { - addSamplesJobs() + def init(): Unit = { + if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg) + if (inputBams.isEmpty) Logging.addError("No input bams found") } - def summaryFile: File = new File(outputDir, "Kopisu.summary.json") - - //TODO: Add summary - def summaryFiles: Map[String, File] = Map() - - //TODO: Add summary - def summarySettings: Map[String, Any] = Map() - - def makeSample(id: String) = new Sample(id) - class Sample(sampleId: String) extends AbstractSample(sampleId) { - //TODO: Add summary - def summaryFiles: Map[String, File] = Map() - - //TODO: Add summary - def summaryStats: Map[String, Any] = Map() + lazy val freecMethod = if (config("use_freec_method", default = true)) { + Some(new FreecMethod(this)) + } else None - def makeLibrary(id: String) = new Library(id) - class Library(libId: String) extends AbstractLibrary(libId) { - //TODO: Add summary - def summaryFiles: Map[String, File] = Map() + lazy val coniferMethod = if (config("use_conifer_method", default = false)) { + Some(new ConiferMethod(this)) + } else None - //TODO: Add summary - def summaryStats: Map[String, Any] = Map() - - def addJobs(): Unit = { + // This script is in fact FreeC only. + def biopetScript() { + if (freecMethod.isEmpty && coniferMethod.isEmpty) Logging.addError("No method selected") - } + freecMethod.foreach { method => + method.inputBams = inputBams + method.outputDir = new File(outputDir, "freec_method") + add(method) } - def addJobs(): Unit = { - + coniferMethod.foreach { method => + method.inputBams = inputBams + method.outputDir = new File(outputDir, "conifer_method") + add(method) } - } - def addMultiSampleJobs(): Unit = { + addSummaryJobs() } + + /** Must return a map with used settings for this pipeline */ + def summarySettings: Map[String, Any] = Map( + "reference" -> referenceSummary, + "freec_method" -> freecMethod.isDefined + ) + + /** File to put in the summary for thie pipeline */ + def summaryFiles: Map[String, File] = inputBams.map(x => s"inputbam_${x._1}" -> x._2) + + /** Name of summary output file */ + def summaryFile: File = new File(outputDir, "kopisu.summary.json") } object Kopisu extends PipelineCommand diff --git a/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/methods/CnvMethod.scala b/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/methods/CnvMethod.scala new file mode 100644 index 0000000000000000000000000000000000000000..c81ad12cfac2f5f8ab5fc7ffc78be7766abf661f --- /dev/null +++ b/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/methods/CnvMethod.scala @@ -0,0 +1,29 @@ +package nl.lumc.sasc.biopet.pipelines.kopisu.methods + +import nl.lumc.sasc.biopet.core.summary.SummaryQScript +import nl.lumc.sasc.biopet.core.Reference +import org.broadinstitute.gatk.queue.QScript + +/** + * Created by pjvanthof on 10/05/16. + */ +trait CnvMethod extends QScript with SummaryQScript with Reference { + + /** Name of mode, this should also be used in the config */ + def name: String + + var namePrefix: String = name + + var inputBams: Map[String, File] = Map.empty + + /** Name of summary output file */ + def summaryFile: File = new File(outputDir, s"$name.summary.json") + + /** Must return a map with used settings for this pipeline */ + def summarySettings: Map[String, Any] = Map() + + /** File to put in the summary for thie pipeline */ + def summaryFiles: Map[String, File] = inputBams.map(x => s"inputbam_${x._1}" -> x._2) + + def init() = {} +} diff --git a/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/methods/ConiferMethod.scala b/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/methods/ConiferMethod.scala new file mode 100644 index 0000000000000000000000000000000000000000..b3364e8e093f07740a4c75fe055ece8ab87e54fe --- /dev/null +++ b/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/methods/ConiferMethod.scala @@ -0,0 +1,62 @@ +package nl.lumc.sasc.biopet.pipelines.kopisu.methods + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.Ln +import nl.lumc.sasc.biopet.extensions.conifer.{ ConiferAnalyze, ConiferCall, ConiferRPKM } +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** + * Created by pjvanthof on 10/05/16. + */ +class ConiferMethod(val root: Configurable) extends CnvMethod { + def name = "conifer" + + /** Exon definitions in bed format */ + var probeFile: File = config("probe_file") + + var controlsDir: File = config("controls_dir") + + /**Enable RPKM only mode, generate files for reference db */ + var RPKMonly: Boolean = false + + def biopetScript: Unit = { + val RPKMdir = new File(outputDir, "rpkm") + + val rpkmFiles: List[File] = inputBams.map { + case (sampleName, bamFile) => + val coniferRPKM = new ConiferRPKM(this) + coniferRPKM.bamFile = bamFile.getAbsoluteFile + coniferRPKM.probes = this.probeFile + coniferRPKM.output = new File(RPKMdir, s"$sampleName.rpkm.txt") + add(coniferRPKM) + coniferRPKM.output + }.toList ++ controlsDir.list.filter(_.toLowerCase.endsWith(".txt")).map { path => + val oldFile = new File(path) + val newFile = new File(RPKMdir, s"control.${oldFile.getName}") + add(Ln(this, oldFile, newFile)) + newFile + } + + inputBams.foreach { + case (sampleName, bamFile) => + val sampleDir = new File(outputDir, "samples" + File.separator + sampleName) + + val coniferAnalyze = new ConiferAnalyze(this) + coniferAnalyze.deps ++= rpkmFiles + coniferAnalyze.probes = probeFile + coniferAnalyze.rpkmDir = RPKMdir + coniferAnalyze.output = new File(sampleDir, s"$sampleName.hdf5") + add(coniferAnalyze) + + val coniferCall = new ConiferCall(this) + coniferCall.input = coniferAnalyze.output + coniferCall.output = new File(sampleDir, s"${sampleName}.calls.txt") + add(coniferCall) + } + + addSummaryJobs() + } + + override def summaryFiles = super.summaryFiles ++ Map("probe_file" -> probeFile) +} diff --git a/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/methods/FreecMethod.scala b/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/methods/FreecMethod.scala new file mode 100644 index 0000000000000000000000000000000000000000..44229df3f141005589a82860de907ba0af431f7d --- /dev/null +++ b/kopisu/src/main/scala/nl/lumc/sasc/biopet/pipelines/kopisu/methods/FreecMethod.scala @@ -0,0 +1,57 @@ +package nl.lumc.sasc.biopet.pipelines.kopisu.methods + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.freec.{ FreeC, FreeCAssessSignificancePlot, FreeCBAFPlot, FreeCCNVPlot } +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** + * Created by pjvanthof on 10/05/16. + */ +class FreecMethod(val root: Configurable) extends CnvMethod { + def name = "freec" + + var snpFile: Option[File] = config("snp_file", freeVar = false) + + def biopetScript: Unit = { + inputBams.foreach { + case (sampleName, bamFile) => + + val sampleOutput = new File(outputDir, sampleName) + + val freec = new FreeC(this) + freec.input = bamFile + freec.inputFormat = Some("BAM") + freec.outputPath = sampleOutput + freec.snpFile = snpFile + add(freec) + + /* + * These scripts will wait for FreeC to Finish + * + * R-scripts to plot FreeC results + * */ + val fcAssessSignificancePlot = new FreeCAssessSignificancePlot(this) + fcAssessSignificancePlot.cnv = freec.cnvOutput + fcAssessSignificancePlot.ratios = freec.ratioOutput + fcAssessSignificancePlot.output = new File(sampleOutput, sampleName + ".freec_significant_calls.txt") + add(fcAssessSignificancePlot) + + val fcCnvPlot = new FreeCCNVPlot(this) + fcCnvPlot.input = freec.ratioOutput + fcCnvPlot.output = new File(sampleOutput, sampleName + ".freec_cnv") + add(fcCnvPlot) + + snpFile.foreach { _ => + val fcBAFPlot = new FreeCBAFPlot(this) + fcBAFPlot.input = freec.bafOutput + fcBAFPlot.output = new File(sampleOutput, sampleName + ".freec_baf") + add(fcBAFPlot) + } + } + + addSummaryJobs() + } + + override def summaryFiles = super.summaryFiles ++ snpFile.map("snp_file" -> _) +} diff --git a/kopisu/src/test/resources/paired01.bam b/kopisu/src/test/resources/paired01.bam new file mode 100644 index 0000000000000000000000000000000000000000..5d70b4512cb0f8ab620cd6d64c0efd9016432715 Binary files /dev/null and b/kopisu/src/test/resources/paired01.bam differ diff --git a/kopisu/src/test/resources/paired01.bam.bai b/kopisu/src/test/resources/paired01.bam.bai new file mode 100644 index 0000000000000000000000000000000000000000..a64a2d9734ce39938b3408e8be3bb580f05374fa Binary files /dev/null and b/kopisu/src/test/resources/paired01.bam.bai differ diff --git a/kopisu/src/test/resources/ref.dict b/kopisu/src/test/resources/ref.dict new file mode 100644 index 0000000000000000000000000000000000000000..ab5ceac32357f56f2b7c4ddbfdb17197187c9da8 --- /dev/null +++ b/kopisu/src/test/resources/ref.dict @@ -0,0 +1,2 @@ +@HD VN:1.4 SO:unsorted +@SQ SN:chr1 LN:9 UR:file:/home/pjvan_thof/pipelines/biopet/public/mapping/src/test/resources/ref.fa M5:fe15dbbd0900310caf32827f6da57550 diff --git a/kopisu/src/test/resources/ref.fa b/kopisu/src/test/resources/ref.fa new file mode 100644 index 0000000000000000000000000000000000000000..0c51751ffe1eee84552290f10f8cdb6c84625131 --- /dev/null +++ b/kopisu/src/test/resources/ref.fa @@ -0,0 +1,2 @@ +>chr1 +AGTAGTAGT diff --git a/kopisu/src/test/resources/ref.fa.fai b/kopisu/src/test/resources/ref.fa.fai new file mode 100644 index 0000000000000000000000000000000000000000..17ecf802afe8cf33c2e35f72e604bb039114d77c --- /dev/null +++ b/kopisu/src/test/resources/ref.fa.fai @@ -0,0 +1 @@ +chr1 9 6 9 10 diff --git a/kopisu/src/test/scala/nl/lumc/sasc/biopet/pipelines/kopisu/KopisuTest.scala b/kopisu/src/test/scala/nl/lumc/sasc/biopet/pipelines/kopisu/KopisuTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..8fbc637db0111cc1189cc36d4ca654891965734e --- /dev/null +++ b/kopisu/src/test/scala/nl/lumc/sasc/biopet/pipelines/kopisu/KopisuTest.scala @@ -0,0 +1,130 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.pipelines.kopisu + +import java.io.{ File, FileOutputStream } + +import com.google.common.io.Files +import nl.lumc.sasc.biopet.extensions.freec.{ FreeC, FreeCAssessSignificancePlot, FreeCCNVPlot } +import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.config.Config +import org.broadinstitute.gatk.queue.QSettings +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.{ DataProvider, Test } + +import scala.collection.mutable.ListBuffer + +/** + * Test class for [[Kopisu]] + * + * Created by pjvan_thof on 3/2/15. + */ +class KopisuTest extends TestNGSuite with Matchers { + def initPipeline(map: Map[String, Any]): Kopisu = { + new Kopisu() { + override def configNamespace = "kopisu" + override def globalConfig = new Config(ConfigUtils.mergeMaps(map, KopisuTest.config)) + qSettings = new QSettings + qSettings.runName = "test" + } + } + + @DataProvider(name = "shivaSvCallingOptions") + def shivaSvCallingOptions = { + val bool = Array(true, false) + (for ( + bams <- 0 to 3; + freec <- bool; + conifer <- bool + ) yield Array(bams, freec, conifer)).toArray + } + + @Test(dataProvider = "shivaSvCallingOptions") + def testShivaSvCalling(bams: Int, + freec: Boolean, + conifer: Boolean) = { + val callers: ListBuffer[String] = ListBuffer() + val map = Map("sv_callers" -> callers.toList) + val pipeline = initPipeline(map ++ Map( + "use_freec_method" -> freec, + "use_conifer_method" -> conifer + )) + + pipeline.inputBams = (for (n <- 1 to bams) yield n.toString -> KopisuTest.inputTouch("bam_" + n + ".bam")).toMap + + val illegalArgumentException = pipeline.inputBams.isEmpty || (!freec && !conifer) + + if (illegalArgumentException) intercept[IllegalStateException] { + pipeline.init() + pipeline.script() + } + + if (!illegalArgumentException) { + pipeline.init() + pipeline.script() + + pipeline.freecMethod.isDefined shouldBe freec + pipeline.summarySettings.get("freec_method") shouldBe Some(freec) + + pipeline.functions.count(_.isInstanceOf[FreeC]) shouldBe (if (freec) bams else 0) + pipeline.functions.count(_.isInstanceOf[FreeCAssessSignificancePlot]) shouldBe (if (freec) bams else 0) + pipeline.functions.count(_.isInstanceOf[FreeCCNVPlot]) shouldBe (if (freec) bams else 0) + } + } +} + +object KopisuTest { + val outputDir = Files.createTempDir() + outputDir.deleteOnExit() + new File(outputDir, "input").mkdirs() + private def inputTouch(name: String): File = { + val file = new File(outputDir, "input" + File.separator + name).getAbsoluteFile + Files.touch(file) + file + } + + private def copyFile(name: String): Unit = { + val is = getClass.getResourceAsStream("/" + name) + val os = new FileOutputStream(new File(outputDir, name)) + org.apache.commons.io.IOUtils.copy(is, os) + os.close() + } + + copyFile("ref.fa") + copyFile("ref.dict") + copyFile("ref.fa.fai") + + val controlDir = Files.createTempDir() + controlDir.deleteOnExit() + Files.touch(new File(controlDir, "test.txt")) + + val config = Map( + "name_prefix" -> "test", + "output_dir" -> outputDir, + "reference_fasta" -> (outputDir + File.separator + "ref.fa"), + "gatk_jar" -> "test", + "samtools" -> Map("exe" -> "test"), + "md5sum" -> Map("exe" -> "test"), + "bgzip" -> Map("exe" -> "test"), + "tabix" -> Map("exe" -> "test"), + "freec" -> Map("exe" -> "test", "chrFiles" -> "test", "chrLenFile" -> "test"), + "controls_dir" -> controlDir.getAbsolutePath, + "conifer" -> Map("script" -> "/usr/bin/test"), + "probe_file" -> "test", + "rscript" -> Map("exe" -> "test") + ) +} \ No newline at end of file diff --git a/shiva/pom.xml b/shiva/pom.xml index dd49b39beb655a851b5893afec09a71c1427f081..2750ac39f27c34fbd29e74c70969b349e6cc0603 100644 --- a/shiva/pom.xml +++ b/shiva/pom.xml @@ -40,6 +40,11 @@ <artifactId>Mapping</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Kopisu</artifactId> + <version>${project.version}</version> + </dependency> <dependency> <groupId>nl.lumc.sasc</groupId> <artifactId>Toucan</artifactId> diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala index ed0e1318d96c615346172b5e1add0df6dc4476d0..946b8cdfc39e41f25be884427aa38c0d4d114e5f 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala @@ -19,6 +19,7 @@ import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference } import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension import nl.lumc.sasc.biopet.extensions.gatk._ import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions +import nl.lumc.sasc.biopet.pipelines.kopisu.Kopisu import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait import nl.lumc.sasc.biopet.pipelines.toucan.Toucan import nl.lumc.sasc.biopet.utils.config.Configurable @@ -162,6 +163,10 @@ class Shiva(val root: Configurable) extends QScript with MultisampleMappingTrait Some(new ShivaSvCalling(this)) } else None + lazy val cnvCalling = if (config("cnv_calling", default = false).asBoolean) { + Some(new Kopisu(this)) + } else None + lazy val annotation = if (multisampleVariantCalling.isDefined && config("annotation", default = false).asBoolean) { Some(new Toucan(this)) @@ -178,16 +183,22 @@ class Shiva(val root: Configurable) extends QScript with MultisampleMappingTrait annotation.foreach { toucan => toucan.outputDir = new File(outputDir, "annotation") - toucan.inputVCF = vc.finalFile + toucan.inputVcf = vc.finalFile add(toucan) } }) - svCalling.foreach(sv => { + svCalling.foreach { sv => sv.outputDir = new File(outputDir, "sv_calling") sv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) } add(sv) - }) + } + + cnvCalling.foreach { cnv => + cnv.outputDir = new File(outputDir, "cnv_calling") + cnv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) } + add(cnv) + } } /** Location of summary file */ @@ -198,6 +209,7 @@ class Shiva(val root: Configurable) extends QScript with MultisampleMappingTrait "annotation" -> annotation.isDefined, "multisample_variantcalling" -> multisampleVariantCalling.isDefined, "sv_calling" -> svCalling.isDefined, + "cnv_calling" -> cnvCalling.isDefined, "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")), "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")) ) diff --git a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala index 7dfa340645bf8ae00e77c175400879db7ac52964..fb6a8132e5bd6822ac1087e99a544a3e180405be 100644 --- a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala +++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala @@ -60,6 +60,7 @@ trait ShivaTestTrait extends TestNGSuite with Matchers { def libraryCalling = false def dbsnp = true def svCalling = false + def cnvCalling = false def annotation = false @Test(dataProvider = "shivaOptions") @@ -77,6 +78,7 @@ trait ShivaTestTrait extends TestNGSuite with Matchers { "use_indel_realigner" -> realign, "use_base_recalibration" -> baseRecalibration, "sv_calling" -> svCalling, + "cnv_calling" -> cnvCalling, "annotation" -> annotation), m) } @@ -102,6 +104,7 @@ trait ShivaTestTrait extends TestNGSuite with Matchers { pipeline.summarySettings.get("annotation") shouldBe Some(annotation) pipeline.summarySettings.get("sv_calling") shouldBe Some(svCalling) + pipeline.summarySettings.get("cnv_calling") shouldBe Some(cnvCalling) pipeline.samples foreach { case (sampleId, sample) => @@ -151,6 +154,13 @@ class ShivaWithSvCallingTest extends ShivaTestTrait { override def baseRecalibrationProvider = Array(false) override def svCalling = true } +class ShivaWithCnvCallingTest extends ShivaTestTrait { + override def sample1 = Array(true) + override def sample2 = Array(false) + override def realignProvider = Array(false) + override def baseRecalibrationProvider = Array(false) + override def cnvCalling = true +} class ShivaWithAnnotationTest extends ShivaTestTrait { override def sample1 = Array(true) override def sample2 = Array(false) @@ -215,7 +225,12 @@ object ShivaTest { "pysvtools" -> Map( "exe" -> "test", "exclusion_regions" -> "test", - "translocations_only" -> false) + "translocations_only" -> false), + "freec" -> Map( + "exe" -> "test", + "chrFiles" -> "test", + "chrLenFile" -> "test" + ) ) val sample1 = Map( diff --git a/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala b/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala index 58dcaf82447daef751eae0d21ac7c43387026149..e05dde5274c83a3b40453f386ff028d070d3517b 100644 --- a/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala +++ b/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala @@ -15,16 +15,22 @@ */ package nl.lumc.sasc.biopet.pipelines.toucan +import java.io.File + +import htsjdk.samtools.reference.FastaSequenceFile import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsView import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsIntersect, BedtoolsMerge } +import nl.lumc.sasc.biopet.extensions.gatk.{ CatVariants, SelectVariants } import nl.lumc.sasc.biopet.extensions.manwe.{ ManweAnnotateVcf, ManweSamplesImport } import nl.lumc.sasc.biopet.extensions.tools.{ GvcfToBed, VcfWithVcf, VepNormalizer } import nl.lumc.sasc.biopet.extensions.{ Bgzip, Ln, VariantEffectPredictor } import nl.lumc.sasc.biopet.utils.VcfUtils import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.utils.intervals.BedRecordList import org.broadinstitute.gatk.queue.QScript +import scalaz._, Scalaz._ /** * Pipeline to annotate a vcf file with VEP @@ -35,22 +41,43 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum def this() = this(null) @Input(doc = "Input VCF file", shortName = "Input", required = true) - var inputVCF: File = _ + var inputVcf: File = _ @Input(doc = "Input GVCF file", shortName = "gvcf", required = false) var inputGvcf: Option[File] = None - var outputVcf: Option[File] = None + def outputName = inputVcf.getName.stripSuffix(".vcf.gz") + + def outputVcf: File = (gonlVcfFile, exacVcfFile) match { + case (Some(_), Some(_)) => new File(outputDir, s"$outputName.vep.normalized.gonl.exac.vcf.gz") + case (Some(_), _) => new File(outputDir, s"$outputName.vep.normalized.gonl.vcf.gz") + case (_, Some(_)) => new File(outputDir, s"$outputName.vep.normalized.exac.vcf.gz") + case _ => new File(outputDir, s"$outputName.vep.normalized.vcf.gz") + } + + lazy val minScatterGenomeSize: Long = config("min_scatter_genome_size", default = 75000000) + + lazy val enableScatter: Boolean = config("enable_scatter", default = { + val ref = new FastaSequenceFile(referenceFasta(), true) + val refLenght = ref.getSequenceDictionary.getReferenceLength + ref.close() + refLenght > minScatterGenomeSize + }) def sampleInfo: Map[String, Map[String, Any]] = root match { case m: MultiSampleQScript => m.samples.map { case (sampleId, sample) => sampleId -> sample.sampleTags } - case null => VcfUtils.getSampleIds(inputVCF).map(x => x -> Map[String, Any]()).toMap + case null => VcfUtils.getSampleIds(inputVcf).map(x => x -> Map[String, Any]()).toMap case s: SampleLibraryTag => s.sampleId.map(x => x -> Map[String, Any]()).toMap case _ => throw new IllegalArgumentException("") } + lazy val gonlVcfFile: Option[File] = config("gonl_vcf") + lazy val exacVcfFile: Option[File] = config("exac_vcf") + lazy val doVarda: Boolean = config("use_varda", default = false) + def init(): Unit = { - inputFiles :+= new InputFile(inputVCF) + require(inputVcf != null, "No Input vcf given") + inputFiles :+= new InputFile(inputVcf) } override def defaults = Map( @@ -58,60 +85,91 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum ) def biopetScript(): Unit = { - val doVarda: Boolean = config("use_varda", default = false) val useVcf: File = if (doVarda) { inputGvcf match { - case Some(s) => varda(inputVCF, s) + case Some(s) => varda(inputVcf, s) case _ => throw new IllegalArgumentException("You have not specified a GVCF file") } - } else inputVCF + } else inputVcf + + if (enableScatter) { + val outputVcfFiles = BedRecordList.fromReference(referenceFasta()) + .scatter(config("bin_size", default = 50000000)) + .allRecords.map { region => + + val chunkName = s"${region.chr}-${region.start}-${region.end}" + val chunkDir = new File(outputDir, "chunk" + File.separator + chunkName) + chunkDir.mkdirs() + val bedFile = new File(chunkDir, chunkName + ".bed") + BedRecordList.fromList(List(region)).writeToFile(bedFile) + bedFile.deleteOnExit() + val sv = new SelectVariants(this) + sv.variant = useVcf + sv.out = new File(chunkDir, chunkName + ".vcf.gz") + sv.intervals :+= bedFile + sv.isIntermediate = true + add(sv) + + runChunk(sv.out, chunkDir, chunkName) + } + + val cv = new CatVariants(this) + cv.variant = outputVcfFiles.toList + cv.outputFile = outputVcf + add(cv) + } else runChunk(useVcf, outputDir, outputName) + + addSummaryJobs() + } + + protected def runChunk(file: File, chunkDir: File, chunkName: String): File = { val vep = new VariantEffectPredictor(this) - vep.input = useVcf - vep.output = new File(outputDir, inputVCF.getName.stripSuffix(".gz").stripSuffix(".vcf") + ".vep.vcf") + vep.input = file + vep.output = new File(chunkDir, chunkName + ".vep.vcf") vep.isIntermediate = true add(vep) addSummarizable(vep, "variant_effect_predictor") val normalizer = new VepNormalizer(this) normalizer.inputVCF = vep.output - normalizer.outputVcf = swapExt(outputDir, vep.output, ".vcf", ".normalized.vcf.gz") + normalizer.outputVcf = new File(chunkDir, chunkName + ".vep.normalized.vcf.gz") + normalizer.isIntermediate = enableScatter || gonlVcfFile.isDefined || exacVcfFile.isDefined add(normalizer) - // Optional annotation steps, depend is some files existing in the config - val gonlVcfFile: Option[File] = config("gonl_vcf") - val exacVcfFile: Option[File] = config("exac_vcf") - - outputVcf = Some(normalizer.outputVcf) + var outputFile = normalizer.outputVcf gonlVcfFile match { case Some(gonlFile) => val vcfWithVcf = new VcfWithVcf(this) - vcfWithVcf.input = outputVcf.getOrElse(new File("")) + vcfWithVcf.input = outputFile vcfWithVcf.secondaryVcf = gonlFile - vcfWithVcf.output = swapExt(outputDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz") + vcfWithVcf.output = swapExt(chunkDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz") vcfWithVcf.fields ::= ("AF", "AF_gonl", None) + vcfWithVcf.isIntermediate = enableScatter || exacVcfFile.isDefined add(vcfWithVcf) - outputVcf = Some(vcfWithVcf.output) + outputFile = vcfWithVcf.output case _ => } exacVcfFile match { case Some(exacFile) => val vcfWithVcf = new VcfWithVcf(this) - vcfWithVcf.input = outputVcf.getOrElse(new File("")) + vcfWithVcf.input = outputFile vcfWithVcf.secondaryVcf = exacFile - vcfWithVcf.output = swapExt(outputDir, outputVcf.getOrElse(new File("")), ".vcf.gz", ".exac.vcf.gz") + vcfWithVcf.output = swapExt(chunkDir, outputFile, ".vcf.gz", ".exac.vcf.gz") vcfWithVcf.fields ::= ("AF", "AF_exac", None) + vcfWithVcf.isIntermediate = enableScatter add(vcfWithVcf) - outputVcf = Some(vcfWithVcf.output) + outputFile = vcfWithVcf.output case _ => } - addSummaryJobs() + outputFile } /** * Performs the varda import and activate for one sample + * * @param sampleID the sampleID to be used * @param inputVcf the input VCF * @param gVCF the gVCF for coverage @@ -144,8 +202,8 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum add(bgzippedBed) val singleVcf = new BcftoolsView(this) - singleVcf.input = inputVCF - singleVcf.output = swapExt(outputDir, inputVCF, ".vcf.gz", s""".$sampleID.vcf.gz""") + singleVcf.input = inputVcf + singleVcf.output = swapExt(outputDir, inputVcf, ".vcf.gz", s""".$sampleID.vcf.gz""") singleVcf.samples = List(sampleID) singleVcf.minAC = Some(1) singleVcf.isIntermediate = true @@ -182,6 +240,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum /** * Perform varda analysis + * * @param vcf input vcf * @param gVcf The gVCF to be used for coverage calculations * @return return vcf @@ -205,11 +264,16 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum add(annotatedVcf) val activates = sampleInfo map { x => - val sampleGroup = x._2.getOrElse("varda_group", Nil) match { - case x: List[String] => x - case Nil => Nil - case _ => throw new IllegalArgumentException("Sample tag 'varda_group' is not a list of strings") + val maybeSampleGroup = x._2.get("varda_group") match { + case None => Some(Nil) + case Some(vals) => vals match { + case xs: List[_] => xs + .traverse[Option, String] { x => Option(x.toString).filter(_ == x) } + case otherwise => None + } } + val sampleGroup = maybeSampleGroup + .getOrElse(throw new IllegalArgumentException("Sample tag 'varda_group' is not a list of strings")) importAndActivateSample(x._1, sampleGroup, vcf, gVcf, annotate) } @@ -225,7 +289,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum def summaryFile = new File(outputDir, "Toucan.summary.json") - def summaryFiles = Map() + def summaryFiles = Map("input_vcf" -> inputVcf, "outputVcf" -> outputVcf) def summarySettings = Map() } diff --git a/toucan/src/test/resources/chrQ2.vcf.gz b/toucan/src/test/resources/chrQ2.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..94a70075ea0258cf6132cf9a7a7dfedb18542fac Binary files /dev/null and b/toucan/src/test/resources/chrQ2.vcf.gz differ diff --git a/toucan/src/test/resources/fake_chrQ.dict b/toucan/src/test/resources/fake_chrQ.dict new file mode 100644 index 0000000000000000000000000000000000000000..e2b0e2af7579a994afedb68a5c495ba794a445df --- /dev/null +++ b/toucan/src/test/resources/fake_chrQ.dict @@ -0,0 +1,2 @@ +@HD VN:1.4 SO:unsorted +@SQ SN:chrQ LN:16571 M5:94445ec460a68206ae9781f71697d3db UR:file:/home/ahbbollen/fake_chrQ.fa diff --git a/toucan/src/test/resources/fake_chrQ.fa b/toucan/src/test/resources/fake_chrQ.fa new file mode 100644 index 0000000000000000000000000000000000000000..171b3737bd458bba03d7c457b6ba51e5ef4f774f --- /dev/null +++ b/toucan/src/test/resources/fake_chrQ.fa @@ -0,0 +1,2 @@ +>chrQ +TCGGCATCTAGAAACTCCAAGTCTGCAGATCTATAACACACGAGGGTATTCAGCCCTAAAGCATTGCGTCTAGGCATGGGTCTTTAATCTCGCCCCGGCAACCTCTACGCGAGCTCCTACCAGTCAACGTGATTGATCCCTCGTACCTAGCATATTCCACGGCGTTTCATAGCCTTGACGGGCACCTTATCCATTTTTACCTAGTTTGACATTAATTGCTTCAAGACTACTGCGGTACGGGTCCCTGCATCCAGAAAGGATGACAAAAAGTGTGACAGTGCGTTCAATCCATAGATTCCCGTCGGTGCGAGGGCTGAAGTTGAGCCCTACAACCCGGCGATAAAAATTGTAACCCTATGCATTGCGTCAAGTTAAGACCCTCCATCTTCGCCACGCGATAGGAATTTGAAACATCGGCGTGCTTAGAAAGGCTATTAGCGCGGGTGGCCTGATCGGTATCCGATAAAGGCGTTGCCACAAATTTTGCCGATTGACCTGCAATTACATCGGGTAAGTGCATTTCCGCCGTTAGAGTATTAAGTCATGACGTTTTCGACATGGGCATTTAATTGGCCCAAGGTGGAATATTCCTGATGTGCCTAAGTATGAGCTCCCCATGAGAATTTGAAGATCATGAAGACTTGAGTCAATTCTAAATGTAGTTACCATCACTTAAGTTCGTTCGCCTTCTGGGCAGGCTATGTCTAACGAGCGACGTCCGCAGTGTGACAATCTAGATATTGGTTGAGCAGGAATACCAAGGAGGTGCTTGAAGTTTCTCTTATGGAACCAACTTCAATCAATAGGTAGTCTCCGTTTCGTTTCCACTGAGACATTTGTGCAGTGGCACAGTATGTGGTCGTAAGCGCATGGTGTTGTTGGAACGAAGACTCTCACTTTTGTTTCCTTTGGTAGTTTAATTTAGCAGTATCCTGGTCGTTGACCAATTTGTGAATCTCCACGGTCTCTCTTTATCAGTCATACCTCTTAAACCGGCTTCCGCTCCTGCGCACATTTATATCCTACTTTCCGTCAATGTGAAAGAGAGATCAACATAATTTCGCGCACGATCTGTCCCATTTCAGACGCCATCGACGGGTCCCGCCTTCACAAAAAACCTGCCACCATGCAGTAGGTACCTCCATATGCTGAGCGTCCGTTACCGGAAGGGTTAAGTCACTGTTTAAAGGATAAGGCGAGGTTTCCCTTGGTGGTGTACTACTGCTCACGTTCCTGCTTTACTCTCCGAGTATTGTTTTAAAAGTGGGCACGCTGGAACGCAGCACCCTATAAGAAGACTCAGTTAGGTCCCCTTCACGAGCATGTTTGAGCTCTCGGACCTAAGACGTCCGAACTCGGTTACGACGGTTAAAGACAAAGCTCCCAGCATGGTATCAGAAGCCACCACGCGTGAGGATGGCGGGGCCCTGGGCACATTGGCCCCGAATTCTTCGCCCACTTAAGTCGGACTCACCACGGGAGACTACGCCCTAACCGGATAATATCTTTTAGATACCATACAGGGTGCAACCATCGCGGCGTCAGCTGAATGTGAGGACAGAGCCCCACACACAGCACTAACAGATCCATGATTTTACTTCGTTTGGGCGACGTGCGAGCCTATTCGGCCTGTCGGACTTTGTGTAGATTCGGGATTTGACCTAGCGTATAGGCCTTTGGTATACTCGAATTATCAGGGTCAAACTACCCTCAAGGAATCCTATTTCAGACGGACGTTTGCCACCCCGGAACGTTGTCGGATCGCTTCTTGCCGCAGGAGATTCATGGAGTGATAATCGCTGGACTCACAAGTGTCAGCGGACTTTCGGTGTCTTGTGGCCTACTTGCAGTGAACACCACCAAACGAGTAGTATTATGGATTGCCGGCGTGTGTTTGTGGCCATGATTGGTTGATGCGACGGCCTAGATTCTCCTACGGGAATCTACCAGGCCCAAAAGCGCTGACTTTTATGTATTTGAGGGGCCGAAATTACATAGTAACCCAGAACAAATACCCGTTAGTTATAAAGTGAGCGCATAAGTTTGGTCGATCCGGCAGTCGAACCATTGCGGTCGGACATATCCGCAGTAGTACACTAAGGCGGAATAGACTGCCGAGTCAACGCTCCCTCATTCTTGCTACCTTAGATCTCGCAGGTTCGACCATTGCTGAAGCCGCTGAATTACACGAGTTGTTTTGTTAACCCCCGGAATGTAGTTCGTACGCCTCAACTGATTCTTCAAAAGCTCACTGCACGTGACTTGTCATGTGTTCCTAAAACATACCTCATCTGTGGGTCTGGTCCCATAAGCATGGAATGTCCGTCGACGCAACATGGAAACCCACTCGCTCGCTATACGTTTATGGTGAGACAGAAACACACTGTATTAGACTGCCACTGATAGCCCCAGTAGCAAGGTGATGTGGCAGGCATGGTACCCAAACGTCTATCGTTTTGGGAACCAAGGGGAGTGCTAATAGGTCCGGCCACGTAGAATGACATAACCTCCAGAGGAGCGCAGGAGTTGATGCATTAAAAGATCCATACTAAACGTTAGCTTAATGCCTTCAAGGCACCAGCTACCTCCATGACAAGGAGATTTCGGAAGGGGTAAGATTTACTTCTGTCCCAAAAGGGTAATGACCCGTAGGGATGGAATCATTGATGAACTCTAAAGGGACTCAGCCGACTAGCCGAGAGGGCTGGACGATCATTTGATGGGAGAATACGCATACATCTAAGTGTCAAGTATTAAATCGGTAATCTCCGTAAAGGCGTGAAGTTCACAGGGCGCAGTTTCCAGTTACTCCAAGAAACTACCGGTTCAGTTATCGCTTCCGGTGCCTTCACAGCAAACATCATTCGCTCAAGAAAGTGCTGTCGCTGCGTGTGGATATTTCCCCCTCGTTCTGGAAAGTGCTCTCAGCGACGCACGTAAACATGCTGGCGACGAGAGAGGCGTCAACACGGTCCGTTACCAAACTGCGGCATTTACCACGAACCTGATTGCAAAGTGAGATTTCCGTAAGGAGGTTAGCCAAATATTACGTAGAGTGTTCCACACCAAATCCGTCGTCCACATTCGCGACGGCAGTCTAGACGTGTAATTCCCCGGATAATCCAGTTACTACATGCTGATGCAGTCATAGTGCACGCAAATGCGCAACTTAACAAGCACGACCTGAAACAGAGAACCCCTGTGTAGTCAATATAGGATGACGGACACACACACTTGCTGCTGCAATCTTACATTCTGCGAACGAGTGCAAAGTTGAAATCATGACGAACAGCCTTGCTTTTCAGAGTCTCTATCGAACTCCTTTACACCTCCATATCTACTTGCAAATCACACTAGAGGGGCGCAGCTTACTCACTGAGAGATGGTCTACCTAATCGATTTTCGGTGAACTTTGAGTACAGCATTGAGTCTGGAGGGTTCCACTACTTTATCGTACCGGTCCGACATGATTTCTTATCGAATAGATGTTGAGATGGACATTAATAAGCATAGTACGTCTCGATCGATGGCTACCTTTACGTCTATGAGTGCTTACATAAGGTCTCTCGTAAGTCATGGTCCCGCGGGGCTCGCGCAACATTGTGGATTAATGACTCCAGTGACGCATGTTCGATTCGCATGAAGTAGGTGGCGCGTATTCATACATGAATAGTAGGCAGAACGAGCACATTGGACCGATCTTGGAGGTTGGGCTTGAGGTCCCGCACTGATAGTTTACGGCCATGAAGACGACAATTGTCAATACTTCTCTATCCTGAGCGAATGCTGACGGGGGCCAGGCGGAAAAGTGCGACACAGTCTACTGATTACGTGTAGTACGTGGCTAAGCATATTCCCGGGTTCGCAAACATAGGTCTCTGATGGGGTATGGGTAAGAAATCTGAAGGTTGCGTCTCTCACCACGGTCAGGATACCGAATCAGCTCATAAGAGTTACAAACGCGCAAATGAAGGCCTAGTCCACAGGGGTGTCATTCGCACGAGCTGGGCTTAGAATCACTGTTTTCGGTCGCACCTGTAGGGGGACATGGGACGGATTTCACAACAAAAAAGCATCTCTCATGATTCGATTCAGATAGAGGAGAGGAGGTAAATGCCAACAAATCTATTCTTTAGTACCGCCTGACGGAACGTATTTAATCCTCGCCTCAGATCGACACCGCAGGGTAGCTGAAGACGTCCGTTCTTAGACATTTAGTCGATCATTTGTTATGAAACAGAACTAGGAGTCCGTGCCCTTCAGGCCGGCTCAGGGGCACCTACCTCCAGATCGCCCAGGTTGGGTTTATTAGGCGCCGAAAAGTTACTGCCCTATCAGCCCCTCCAATCCGACCTACGGACATCATCCCACTGGCTCGCAAAATATAAATTGCGGATGGGAAAGGATAAGGAAAATCATTACCTACACAGAAGGACAATGTCAGTTCCAAATAACACTGATACTTTCGGAGCAACTTGGTCCGGAAATGTAAGTACGACTATAGCCCTTTCGACCAACGCCGACAGTCCTATTTGGACGCCGAGAGAGGCGACGGGTAGCCGAATGTAAAGCTCTCGGGTCGCTCTTGGCGGAATGCGCTGCGGGTCCTACCCTAAACCCTTACCACCACCAACTTCGTTAGGAGCCGTATAGATTACAGCTCCCGCAAAATTAGAGAGGAATCTGAGTTATTAGCTGAGGACCCCGCATTTTCTGCGACGGCGTAGCTGCAGTGACGTACGATATGAGTTCCCGACTGTGAGGGAGTCCCAGTCGTGACTCCCTACAACGGCTCCAGATATTGTTACTTATGGTCAATATGCCCCGACCGCCCATTGTCTCGAGTACAGTCTTCCCCAAAGTTAAGCTGTGCATTACCTTACCGTTTTAGGTCCAGCTGGTAGCACCGAATGCTGCGCAATCCGAGCCCCCGAAATAGACTACGTGTCCACGGTCAATTGTCATGGGTAGCAGAGCTCAAAGAGGAGAAACGTGCCCCGTAAACCTATTAGATCTCGGTTGATAAATATCAGGCCACAGCAGGCTGCCCGATGCTTGTTTGAACAACAACTTCGGGAGCCGCGGTCCTTGGTTCTCCCGATATTCGGCCGCACCGAACGGTACGCGTCATCGCGAGGTGCGTTCTCGCAGCAAGAAATATTTGTTGTTGTTGTCTTCCTTCCGCATAGGAAACCTTAAGCGGTACCTTTCTACGAAGTTGAACCCTAGAAGCACGTGTAACAATTTTTTTTACGCTACACCCGGATCTGCTTCCATCTGTTGATCATATGAGCCTAATGTGACTAATCTGTGCCGTCGATTGAAAATTCGTTCTGAACCTAATCACATGAATTAAAATTAGGGCGAGAATTGGCTCCTTTTGGGCCGTAATCCTTCAAAGGGTTAACCGAATTTAGCCTCCACGGTGACACAAACTCCCATAGGTAAGGCAAACCCAATAACGAGGAAGCCTTGCCCACAGCATGTTTGATAAATACCCTTAGGGTAATATCGCGTGCAATACTGAAGCCGCTCTTCTAGCATCCGTGTTTGACATACTATGACCTTGAAGCCTGCCGCAGCTTCTAGGTCATCCAAGTAGATCAAAACGCCATGTTGTGGATCCATGCATCTTCCCAGTGAACATGGATCTTAGTGTGACAGGCGAGGAGCGGCGAACACTATCGGTGTGGCAAGCTCGGGCCTTCGTACGTTGTGGAAGTATGCGAATAAGGAGACCGTAATGTATCAAGTTCTTAAGAGCCTTGGTACCGTTGCAATTCGGCATGTTCCTACAGAGACACTCCGTGTTTGTCATCCGTCATAGATCTATGGCGTAGTTAGCGCCTCTGAAGTAGTTGTCCATTCAGCAGGCATTGCTTAGGGAGTTTCTGGCGCTTGCCGCTCAAGATGCTCACGGGCCTAAGTAGCACGGCAACCTTTTGACAAAGCATTTTATAAACTGAGCATATTGGCCCGAAACTAATCCAGCAAAGGGTGAAGACCTGTCAGCGGGCCCAGAGTGTGAACGGTCTACTGCGCGGTACATAAGTGGCGTAATCCATCAACAAGACCTACACGACCTGAATGATTTCCAACAACTTTATATGCTTTTCCGCATCTCGAGAGTACCGGAATCTATGCAATCTCCCAAGGATCCGTAGATTTGAAATTCAATCCGACGGGGTAAGGTTGCCGCGCCGGTTAGCTAATGTGCGGATTTATAGTCTTTTTCCCAGAAGGCGTAGTTAGTTTCGCACCTAACTACGACACATACTTGGGTCGACTGTTGAAGGTGGTAAGTTGCGAGCAGTCCGCCGCTCTCACGCGCCGAACCACGTTCATATCGGCAAAGTTGCGCGATGACCTATAGGTGTGCAAAGCTCGTCCGACATTGGGATTGGATTCACGTACATACGTTAGTATCATGGGTAAGCTTCCATGTCAGCCTCGTGTATAGCACCGGTGCGCCGCGCGTTAAGGATTCTATGCCCAGCAAATGTGCCAACGTTGTGGGGAGAAAAGTGTAGTTGGATGCGATCGTGACATCGGCACACCGAAACTCTGCAGCCAGTCCCGCTAATCTCATTGGCACCGGGTAAGAGATTACCTTTGGTTAGGAATCGCGTGCGACGTACTGCACGAAAACAGTGCCTGAACCGAGGTGTTTACTTAGATGGTTCTAGACCCAGCATGTTCCTCACTGGAACCTGACGTCGGTACGTGATCCTCTATACCTCCTTTTCGGTATTGGCCTGGCAGCTACTCTAACTGTTTGGGCCGCGCCGATTTCTCGAGTCCACACGGCGAGGTCAGCAAAATTGCCAGTTAGTGGATGTTGGGATCTCAACGCATTACCATGAGAGTTCTTGGTTTACCCGTTAACATCGCTGCGCACGGTGTGAAAAGCCTGTTTCTTTGGCCCCCATCATCTTCGGCCCGCAGATCTCAGATCAATGATGTAAGGTTGCGGCGGCAAAGACTAGACTTGAGTCGTGAGATGGTGCTTTGCTGAGGCCGTCTCCTATAGCTTATTCTAGGACTTTCCGCAAACCACCCGACGTGCGGCTGTCCACGATCGGATTCCATTCTGTCTCGGAGCATACAGCACTAGATTTGCCGCTTGAAAAATGTTCCATAACCATGATTTCAACCCCATCTAGTCGGCAGGCACAGCTGAGAACAGCGAAGGGCGTCGTGAAGGGCATTGCCCGTAGTGTTTCAGACGTGCTAGAGACTAAATCAACTATCTGCACTCGTAGCCTGGCGTGTGAGATGTCACCACGATGTGCCTAGAGGAGTGATTATGAACATGTATTACCACGTCCGGGTGTCGACGGCTATATGGCTAACATTTCTTATGGCTAGACGTGCTTGGAAAGGTTCCCCAGCCTTCTGTTTCCCGGTGCTTTCCACGAGTCTGGAGTTCTGGTAATTAACTACATGGCGTTAACGCGGAGGTAACCCCCAGTCATTGCATTGCAGGTAGGGCTTAGGTGCAATATAATTCACCAAGGCGCGGATTCCTCACGATTGTTACGAAGACACCCGGAGGGTTTCAGTATGGCTTGAGAAGTGTACGTTTTTCCGGCCAGGGTGTAACTATAACCAACACATGTTTGGCCACGGGCTAAGTCGGTCCGCACGACTGATTTCCCCCGCCCATGTGTTTGGGAGCAATAAACTGCGTCTGCCAAGAGTAACAACTCGAGTAGAGAAGGGAAGTCTCAGACTATTTTGCAAATCAGACTGTAAGGCTCAACAGCCATACAGCTTGCCCTACTACTGAATACTAGCGTAGCGTGGCCACATAGGAAAGACTTCATGTCTTCTAATAACCTTTTACCTCCAACGTCCCCGCCGTCTTCACGCGGTCCAACGATGAGGAAACAACCACCCCTATCTTCCGCGGAGTGGTTCACACGACCCCCGGCGTTAACGCGCACGTTGTTGTCTTTCGGGACGGCACTACCCCCAAATGCCCAGACCCAGTGCTAGCGATATTCAAACGCCGTCCGGTAAGTCCTGACGTTTTTCAACTGGATGCACTGGCGACACGTAGTTCGCAAGGCGTCCATGAGAGGTTTTAACCGTCATGTTTCCGTATCACGTCTTATGTCTGTCTCTATTCTCAGCGAAATTCTCATCATAGGGCGGAGACTATCTGAAGGCCAGCGAATACAAGATTTAATATCAAATATAGCATGGGGGCCAACAGAGGCCCCCCTGGTGCTGACGAATTATCGTGATATTAGTACAGCTGTCTGCAATGCCATTTCGAAGGCTTTTTGTTCGTATCACTGCTCTATGCATAGCGGTCACTATGACCTCTCAGCTTGACTCACCCGAATGACCAATTGTGGTCCAGCACTCCCTCATCTTCCCCCATTAACGATACGTTGGGCACCATCGGTGTGAGCTACCCGTTACAGTCATAGAATCGTTCTTTGCGTTGTACGCGGCACGGAGGTGACCGGGAAAAGCGCCGCGAAGGCCCCGCACTGAATAAAGCTAGTATTAGCGTCTGTCAAAGTGTTTTGACACCTAATTCGCTTCCAAGTCCCAATATCTAATCTAGCCTGCTTTGGGCCAACATCTCATTGCGTTATGCTAATGAAGAGGGTGCGGGATCACATCCGCTCTTCTCTTCCTATACACAGCGGACATTCGGGTTGGACGTTTGGAGTGATAATTTATCGTTAGGGATAAGTATGTCGGCGCTTAGTAGTATAGCCCGCTGACCAGCGTTCGATTTCGAACCTTACTGGACATTCTCAATAACTACTGATCATGACGTTTTCCTCAGTTCCTAGCCTTGACAACTAGCCACAGTCAGCATGGTAGAGAGCGTTGAGCCGGGGATAGCCAGGCTATTAAGACAAAGACCCTCGGGCCCCTTAATGCGCGTCAAGTCTGACGGTTTGAGTGCGGAGCAGTAAGCGCTTTGGTATAACCGTGACGTAGCAGATCCATGCTTCGCCCGCTTCCACCTGAGAGATACTAGCCTCTTTCGCACTTTGTAGGATTACGGGCAGCGAAATATTTATCCTGTGCGGCGAGCCCGCTTCGGTTTCGAGCTCTATCAGTGCGCGGTTGGCACTCCAACGCACGATAACATATACCCGCCCACAAGGCCATGCAGGTTTAACCTCCTATTCTGATTGTACCTGGCTGACTTTACGGTACCCACCAGCGCAGGATTAATAGCCTAATTATGCTAACCGGTGCTAGTCTAACTGCTGTTACTAGTCCGCCCCAGCTACCCCACGGGTCAGTAACTGCACCAGCAAGCATGGTTCTCCTCCTGAAGTTGTACGTTCGAGAACCCCGTATCGAGTTGGTATATAAATTAAGGGTTGTCTAAAACAGAAGCCTATTCCGCTATCATCGGTGTAATAACTGATCGCGCCGTGGTTAAATGGAGGAGCACCCGCATGGATACATCGCTAGCGTCTTGTAACTCTCTGGGGGCCTAGTATGGAACGGAACAATGACATCATTGCTTACGGGGCCCGCACTTAGCTGTCGCGTATCGCAAATCATATGGCATGTCAGTCCCGACATCACGAAAATGACCCCATCTGAGGTGGTCGGGAGGCGAACAGTCGAATATGATGTATGCACCCGCAACTTAATGTTCAAAGGCGGGCGAAATGCCTTCTCCCGTCCGGACTATCCTGAGTGCTAGCCGCGAGTCTGTAAAGGTTGACGCAACCATATAGCACGCAGAAAAATCACTCTCACACCATGAGAACCATGGCGGCACGCTGTCTACTTTGTCTGACAGGCTACGGAAGGAATGGTACATACGTACAAACGGATGATATGATATCGGTCATTGCCTATTGTGACGCTACCCTACTGCATCACCCCCTTAGAATGCGTTGGACGCTCTATAGCAGATCCTCCATCCAGTGGAAGTCTCGTCGCCGTGGTTTGCCTTAACGACCGTTGGAGAGAGCAGGACAGAAATATCGCCCTTTTGAGCGCATTATTTGGAATCGAGGTAAGTCAGTGCGGCATAATCGCGCCTCGTGAGCGGAACAGTTTTTGATCCCACCCGCTAAATGCCAAGGTGCTGTAACCTGGGCGCGACACCAAAAGACCACGTGCTGTATGAAGCATGTGTTCTAGCGCACTCTCAACCGTTACCCCGAGAGTAAAATGTTAGTTGTAGGCCGATTCTGCAATGGTAATTGGCGGAGTGTCTAGGGAAATGTTTCGGTCATACTTAACCGGCTACCTCTTCCTCCCTCAGATTCGGTCTGAGATGAGATATACTGGGTGAGTTGAGTCGCCCTGTATCGTTGCGGCGCTCGTGGACCAGACAGACAGTTCCCGTTTATCTCTGCTTCTAGATGGAGGGTCGCCTCCGTGTTAACGCCGGCGAAGGTAGTCGCAGCTGAAGTTGTGATGCACAATCAGGTGAGCCTTTTAAGTATGGTCCTACGGACGTGAACAGCTGGGCCCAGTCATTTAGTACGGGGGGTTTACCTATAAGGATACGGTAAGAACGTCATCTATCCGTCCCACTGGAGTCCGAGGGGTTCGTGTCTACACGGATTACTTATCATGCACACACGTCTACGGTCATGCATAAAGTTGTGCAGCGCAGCAATCGGAGCGGAGTTACACCATCTCCCTATTAACAAGGCACTTATTAGTACTTACCCCGTTATAGAGCTCTCATCTTATCGATAGAGCGCAGTCCTAAGTATTGGCTCGAGTGATTCGCTCCTCAGCCCTTGATTGTAACTCCCCCGATTGCAGGTTGTATGGTGAGTAAAATCTCTGCGCCCTTCTGTTCGGATAAAGAACCCCGACCACTAATGCCCGCCTGCTTGTTGGGCGGTAAATGGGTAACGGAACATGGACTATGAGTGCGATGATGGTCAATAGAATTACCTTATTACGCAGTAAAAGGAATGACGCAGACAGGTATTTGTCGACGATTGCTTCGAACCTGGCAAAATGGGGAGGTATCCTGTCATGTTCATCTGTAAAACAACTCCTGCCTCTTCGTAGAGGACACACACTGTGGGCCTTTAGCCTTTAGCAGCCCATTGGGGCTTACCAGCTGTCGTCATGGGGTATCATTAAGATCCATGCGCCCCCGAAACTTACTGCAAAACAATATGGCTTAAAGGTAAAGGGACCATCAGGAGAATGCTTAAGAGCGACATATAGATACGTATTTAATTAATTTATGTTAACGCAACCATCTCGCAGGAGTCGCATAGCATATTGCCGGGTGATAGTTAATGCACTGTGCTTCCGTGTTTATATAAAATAAGCAGTAACCTCTGACAGGTTGAGACTCCAACAAGTGCTCCGGGTATTTACCTTCTACCATGGCGTTCTAATATCACGAAAGAGAAATTGTGTGTACCGATGCCAGGTGACCGCCCGCGTGCGCCAACGACGCAATCTAGAGCATCCACGCTGAATTGGGGAACTCTTGCCGTTCGTCGCATGGTGTACTTGGTACCACTCGATATGCCTGATTAGGTTTGGCCGTAGCACGTAAGGTAGTGACTTTCCATTCAAGCTAGCGAAGCGACACCACCACAGTGCCCGGTCAAAATAACCCACACCTGGCCAGCATAGAGGCTAAAATAGCTACAGTGCGCTAATCGAGTGTTTTTGCATCGGCTCGTGGCTGGTGGACTCGGGACAGCTTAGAACTAACTCTGGTGTACAAACGCGATCGTAGCTCTCGCGACTTACTCACCGGAGTAGGTTAGATGGACAAGACCTAACCCGAAGCCTAAATCGCCCTGAGTGTTAGCCGCCATTCAATTCTATGGTTTATCGGGGGCGTCTATGGCTGCGACAGTATGGAGGCCCGTTATGGGCACCCGAGTATCGTACCATAGTAATCCCATATTCCTCTTCGAGCGACTATTGGATCAACATACCTACAGGGTAGTATGAATGTTCTTGATTACAGAAACCATGGAATCGGCGCATTCTATGTTTCACTTCCGAATAACAGTGAGCAAGGCATGCCCTTGACAAGGATCATCCCGACAGCAAGCCGATCGGGCCCTAGAGCCCGACCCCCAAACAGAACACCGGCCACGTAGTTGCTGGGACTAAACAAAGGTGTGTTTCCATAAAAGGAAATCTTCAAGTGTATTGTTGAGTCGTAACGCTTATATTTATGGCCCAATGGGCGTTGCGAGCACAGTAGCAGGCCTAGATGAATGCCTAGGCCACGATCGGGGGGAGGCTCATTGAACGTACTGCCATACCAAGCCCCCGTATGCTATGGCAGGAGGGGTTCTCTTCGTATAGAGCGAGGGTCTCTACGCCAAGCAGCATTCCCGTGTTGGGTGGCCAATGGGGCTCACTAGAAACTCGGTTTTTTTAGCGAAGGAATGAGCAAACTCGTGAAAGGTGGTACACACCAGTTGCGGCCGATTTGTTGTAGCAACAAGGTTTGAAGAATTGAGTAGATGGGCCAATTTACCTCCTATTTAGCGAGTGAGATGGCGCATGTTTATTCAGACTCCATGTGGGGTAGAGGCTAATCGTTTAGTAGCAATAACCCCGCGGGGCAAGAGACCGTAATAACTTGAATCTGTGGTAGCTATGAATATGTGCTTCGCCCTAAGTGTTATGTAACAAGAGTGATCCAGGGGCTCAGATCACACTTAGTACGATCCGCTACTGAAATGCGGCCGCGGGCTTGCACGCTGGACATAAGTCGGATAATCAATTGCCTACGACAGGTTCAGCCATAAGGCTTGGCTCCTAACACACTCATGATGTCTGGCTTTTACTCGTGCCCGGACATAAACGTATGCTCAAACGCGAGACAGGGGAGGGTCAGCACCGTTTAGATCTATAAGGCCTACCGGTAATATGGATCGACAACAAACAGATGCTATAGGGATACCTACTCCTTTGGACCCACATGTAGATGAAGGCAAACACGCAGAGCAAAGGAGAGTAGTCCACCCGGTATAAGTTTGTGCTTTGAATTCTGGCTACGCAGACTTGCACTCTGTCCCGGCATTCACTATACTTCTCCGGAAGTCCTTTAAGAAATGTCCGCGCTCATGTGGTTCCCGTTGCTCAGGGGCCAACTCAAGTAGATCTTTAAGGCGCAGTCGACCACAGGCTACTAGATACGAGTTATACTTATCCGGACATCTGGCTAAATACTTGGATACGATACTTCCCCAGTCGTGAGAACGAAGCTAATACAGATCGAATTTCGATGGTTCAGGCAGGCAGTTCTCAGGAGGCAAGGTGTTAAATAGTTTCGGAGGCTCTTTCGTACGATCAGGGTCTACTACCCTAGGGCATTTTGACTTTGGATTAAATATGCAAAATGCAAGGCCGATTGTGATCAGTACTGATACTCCAACTGGACCACCTTCAGACCCTTCGAGGGGACCTAGACGACGGGAACCCTTCCAGCGGGTGATACCAGTTAGAGCAAGTCACAAACACGATTCAGCCCCCGGGGTTTATGACGTACCATGCGAGTAATAATGCACGTATACGGAGCTCTTCCACCGAGCGATGGCATTTCGGGGCGAGGTAGTTGTCTTTCATTGGCATCGCACAACCCCCATCCTCTTAATTGGCATCGTCTCCAGCTGGAAAGAATTTGAGTGAGCATGTCGCCCCTATTATTCCGTTGCCAATAAAGTGTCTCAACTTTTGGCGAAGGTTTTAACGCATACAAGGAGAAGCCGCGAGACGTCTGTACCGCTGATCTGGACGCAAAGTGCTCGGACTGCCGCTGAGTTATCCTGGACGCCATGATTAGAGCCGTCGTCACTACCTGCATACATGGGCCGATAGAGTACTGCAACCAACAACTCACTTAAGCTCCACAACGGCTGGACACTTCCGAGAGCGGTCTTACACAAACGTTAGGTCCTGGGCCGCCGACCTTACCGCTAGTTAGTGAGAGCCAGTTAAAATTATGAACGCTCGGAACCTTCCCAACAGTGGCCGCAGCCTTCCTTGACGCCTAGCACATCTGGTTTATACTCGGGTATGCCGTAGATCGGTAACCTAGGGAACGACCCTGTGGGTTTAACACCCGAGTGCGTAATCAAGCCTAGAGGCCATCTCAACTCGAGAGGTCTCCTGACAAAGAGGCGCCCGATGAATCATCCAGAGGCGTCTGGCGGTCCTACGAGAGTGGCTTTGGATGCCTGCCCCTTGGATGGATCTGTCTTTAATCGGCGCCAATACCTAGCACTGCTAGGCTCCAGACTGTGTTTACATGCCGTAACCCTGATACTCGCAGAAACGTTGCTGGAAATTCCTAGCAGCTGAAACCATTCCCCGTAACGTACTAGTACGCTAAGAGAGAGTCTCTCCTGGCCCTGATGAGTGTGTTCTCATCTGGGGCACGATACAAGAATCGGAACGAACGCAATGCCGAAGTCCCTTGTACCTTAATTTGGGCGACGCAGATAGACCCAAAGATCGCGGACTACGGAAACTAGCATAGGACCTGTGTCGAGAAGGTTCGAGCAGGTAGTGACACGCAGCGCGGTGGCCGGCGGGGTGGCACATTGCGGGTCAATACTGGTAGTAGCCACTCTTTGGACATAGCGGCGGACCAGCGCCTAGAATGTCTCATTCTCATTTTGTTCCGTGGCACGTTACGTAATGACGGCCCGCCAGCACCTGTGTATGGACTTGTAGCTCGGGCCTCTGGTCCTGGCACGACAAGGCACCAGCCAGTAATCTCTCCTAAGGCGCTAGCGTGCATAGCGCGTCTGCCTACCGCCAGAGAACGCGTCATCTGCAAGACGTCCCAGCGTAGTGAATTGTAACTGCAAGCGTTCTCTTACGGTCATAGTGCCGATTTTGAGCAGTAATGGAAGCAGCAAAATGCCGCCCAAGCGATTCGCAAACTTCTAACAGAGCTACAGCCGGACACGACGCGGTGGTGCTCGCGGTTGGTGATCTTATGATATTAACGCCCATAGCGGCCATCTTAATCGACACCATGTTCGTTTTGGCAGGCCTTGTGGTAAACACGTGCTAGTGGCACCACCCATGCCCGTGCCCATACATCCAAACCGAGAGAAAGCCTATTTAAGCGAAAACCACAACTTCGAGGTTTCACCCCCTGCCATTGATAAAGCGAGGAGTACCCCCGATGCCGGGAAGCGTCCGCACCCATTTCTTTCGTTCTGGAATCCTCGGGCGACTTCTCGAAGATACTGTGCTCACGACCTGGAGTATCATGAACAATCGGAGGAAAATGAGTAATTGTCGAGTCGTTGTTAGACGGCACTTCCGTCCGGCCCAACTGTTCTCGGATACGTGTCCCGTGGTCAATGCTCTAAACCGGCTGCCGGCGACTCAGTTCACTGAGACAAATTCTGATGCTTTCGAAGCAAGGATGCGCCCAGAGCAGAGCTGCCCAGATGAGGTTAAGAACGTAACTATAATCGATCAGCCATTCGGCTTAAGGGGCCCCGGCGAAACGCGAAACACTTGGCACATGGACGCTTCACGCGCAACAGTAGTTGTCTCTTTCGTGAGCCACCGTAGCAGCTAGAAAGGCCTATCCAGTGATGCTTTATGACTGAGTGTCGAATCTAGGTATAGCATAGACTGGCTGATCGGGCGGGTCGGCCCACCCGTCTCGGTCGAGCGGTTCTGACTTTGGGTGGCTGTGTGAACCCAACTGCAGATGGAGTTGAATGGGTACACCCTATGCGAGGCCTCGTCTTTACACCAAATCGGGGCCCTGTGAAGTGCCACTCTTTTCCAGCCGGCAGCCGCTCAGTCTGATTTTGCTTGTACATGTCGTGTGCGAACGTTCCGGGAGGCTTCCGTGTTCCAAATACCGTGTTCTCATATTCGGTCCATCTACCGACGGAGAGTTGGGATGCCCGGGCCCGGAAATATAATTTAAACTCGTGGCCAAGAATTTAGCATGTTGTAAACATGAGAGACAGGGCCGGGCTAAAACATTACCCCTGAGTAATGTAGAGCCACAACTGAACATAACATTGGGATCTAACGCACGCAATCAGTGTAGCTTCAGCCCACCCTCTAAATTTCCCCCGGACAACTGGATTATCACCTGCGTCACGCGATAATTGCTCGCATCTCACCAACACACTTCGACAAATCTGGAGTCTCCCTGGTCCGTACGTCCAAAACCGTTTAAATGGGCGGGTGTGTCGTGAACCAATCTCCTCTTCCATTTGTCACATACTGGCGATGACATCCTTTTACTTGAATTATTCATCCGGGCACCAGCCGCTTTCCCTACGATCCCCGACACTCGGGGCTTCGGGAGTTGCCCGCCAAAAAACCGACAAACCAAACTATACAATCAATCCCATCTAGATGTAGGGGACTGAGGCTCTAAGCTATGCGCCTACTATACTTTGTAGGTATCAAACTACGCTTGAAGATAGTTGATAAGGAAGCGAATTGATCGAGTACCGTATCTTCAGTCCGACTCCCGTTCGAACGCAGCACGCTAACATGGTCCACTGGCATTCTTACTAAATACCTAGTTCACTTCTACATGAGGAGTGTCTGGGCCGGACTCACCTTTGATTAGATAACTGAAG diff --git a/toucan/src/test/resources/fake_chrQ.fa.fai b/toucan/src/test/resources/fake_chrQ.fa.fai new file mode 100644 index 0000000000000000000000000000000000000000..b7a558fdb3b3c0e85f6e3c634cc3ae80c601336d --- /dev/null +++ b/toucan/src/test/resources/fake_chrQ.fa.fai @@ -0,0 +1 @@ +chrQ 16571 6 16571 16572 diff --git a/toucan/src/test/resources/log4j.properties b/toucan/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/toucan/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/toucan/src/test/scala/nl/lumc/sasc/biopet/pipelines/toucan/ToucanTest.scala b/toucan/src/test/scala/nl/lumc/sasc/biopet/pipelines/toucan/ToucanTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..25c48f8f3b696d1709ba1bfd9ecb15b142a6a0fe --- /dev/null +++ b/toucan/src/test/scala/nl/lumc/sasc/biopet/pipelines/toucan/ToucanTest.scala @@ -0,0 +1,111 @@ +package nl.lumc.sasc.biopet.pipelines.toucan + +import java.io.File +import java.nio.file.Paths + +import com.google.common.io.Files +import nl.lumc.sasc.biopet.extensions.VariantEffectPredictor +import nl.lumc.sasc.biopet.extensions.tools.VcfWithVcf +import nl.lumc.sasc.biopet.utils.config.Config +import org.broadinstitute.gatk.queue.QSettings +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by pjvan_thof on 4/11/16. + */ +class ToucanTest extends TestNGSuite with Matchers { + def initPipeline(map: Map[String, Any]): Toucan = { + new Toucan { + override def configNamespace = "toucan" + override def globalConfig = new Config(map) + qSettings = new QSettings + qSettings.runName = "test" + } + } + + @Test + def testDefault(): Unit = { + val pipeline = initPipeline(ToucanTest.config) + pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz")) + pipeline.script() + + pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 1 + pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 0 + } + + @Test + def testBinning(): Unit = { + val pipeline = initPipeline(ToucanTest.config ++ Map("bin_size" -> 4000, "min_scatter_genome_size" -> 1000)) + pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz")) + pipeline.script() + + pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 4 + pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 0 + } + + @Test + def testGonl(): Unit = { + val pipeline = initPipeline(ToucanTest.config ++ Map("gonl_vcf" -> ToucanTest.gonlVcfFile)) + pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz")) + pipeline.script() + + pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 1 + pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 1 + } + + @Test + def testExac(): Unit = { + val pipeline = initPipeline(ToucanTest.config ++ Map("exac_vcf" -> ToucanTest.exacVcfFile)) + pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz")) + pipeline.script() + + pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 1 + pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 1 + } + + @Test + def testVarda(): Unit = { + val pipeline = initPipeline(ToucanTest.config ++ Map("use_varda" -> true)) + val gvcfFile = File.createTempFile("bla.", ".g.vcf") + pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz")) + pipeline.inputGvcf = Some(gvcfFile) + pipeline.script() + + pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 1 + pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 0 + } + +} + +object ToucanTest { + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + val outputDir = Files.createTempDir() + outputDir.deleteOnExit() + + val gonlVcfFile: File = File.createTempFile("gonl.", ".vcf.gz") + gonlVcfFile.deleteOnExit() + val exacVcfFile: File = File.createTempFile("exac.", ".vcf.gz") + exacVcfFile.deleteOnExit() + + val config = Map( + "reference_fasta" -> resourcePath("/fake_chrQ.fa"), + "output_dir" -> outputDir, + "gatk_jar" -> "test", + "varianteffectpredictor" -> Map( + "vep_script" -> "test", + "cache" -> true, + "dir" -> "test" + ), + "varda_root" -> "test", + "varda_token" -> "test", + "bcftools" -> Map("exe" -> "test"), + "bedtools" -> Map("exe" -> "test"), + "manwe" -> Map("exe" -> "test"), + "bgzip" -> Map("exe" -> "test") + ) +}