diff --git a/basty/src/test/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTest.scala b/basty/src/test/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTest.scala index 732ac96e6ec6e68f5da7c2484ff2e1c21572dd59..f33efefac8638c6aaf64bccccbf22f7fd31a929b 100644 --- a/basty/src/test/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTest.scala +++ b/basty/src/test/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTest.scala @@ -98,7 +98,7 @@ class BastyTest extends TestNGSuite with Matchers { val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0) val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) - pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample2) 1 else 0)) + pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + numberSamples) // Gatk preprocess pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0)) diff --git a/biopet-core/src/main/resources/nl/lumc/sasc/biopet/tools/plotHeatmap.R b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/tools/plotHeatmap.R index 7f7237e90f6593e3d6cf110da005cd89c154d466..e9ab7ebd1bd142a1502b9be878a75b9598bfacb9 100644 --- a/biopet-core/src/main/resources/nl/lumc/sasc/biopet/tools/plotHeatmap.R +++ b/biopet-core/src/main/resources/nl/lumc/sasc/biopet/tools/plotHeatmap.R @@ -14,6 +14,7 @@ rownames(heat) <- heat[,1] heat<- heat[,-1] heat<- as.matrix(heat) +textMargin <- max(sapply(rownames(heat), nchar)) + 4 colNumber <- 50 col <- rev(colorRampPalette(brewer.pal(11, "Spectral"))(colNumber)) for (i in (colNumber+1):(colNumber+round((dist(range(heat)) - dist(range(heat[heat < 1]))) / dist(range(heat[heat < 1])) * colNumber))) { @@ -22,7 +23,7 @@ for (i in (colNumber+1):(colNumber+round((dist(range(heat)) - dist(range(heat[he col[length(col)] <- "#00FF00" png(file = outputArg, width = 1200, height = 1200) -heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none", margins = c(12, 12), na.color="#00FF00") +heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none", margins = c(textMargin, textMargin), na.color="#00FF00") dev.off() hc <- hclust(d = dist(heat)) @@ -31,5 +32,5 @@ plot(as.dendrogram(hc), horiz=TRUE, asp=0.02) dev.off() png(file = outputArgClustering, width = 1200, height = 1200) -heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row",margins = c(12, 12), na.color="#00FF00") +heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row",margins = c(textMargin, textMargin), na.color="#00FF00") dev.off() diff --git a/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala index 6d8f340b6e2aa2341b7bc8a1cebaf102056e41e6..7690468675e801e4b208721c58e1e16ce1c5d9d7 100644 --- a/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala +++ b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala @@ -54,7 +54,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => } writer.println("set -eubf") writer.println("set -o pipefail") - writer.println(this.commandLine) + writer.println(lines.mkString("\n")) jobDelayTime.foreach(x => writer.println(s"sleep $x")) writer.close() } @@ -99,7 +99,13 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => beforeGraph() internalBeforeGraph() - if (jobOutputFile != null) this.commandDirectory = this.jobOutputFile.getAbsoluteFile.getParentFile + if (jobOutputFile != null) { + this.commandDirectory = this.jobOutputFile.getAbsoluteFile.getParentFile + this match { + case s: ScatterGatherableFunction => s.scatterGatherDirectory = new File(this.commandDirectory, ".scatter") + case _ => + } + } super.freezeFieldValues() } diff --git a/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala index 4d02cf7ddb6f319d6d3dee9a7c4bdf8cff14aae7..254e7367337494b272f609f1c762ddeedcf11616 100644 --- a/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala +++ b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala @@ -126,17 +126,21 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript => } }) - logger.info("Adding report") - this match { - case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) => - logger.info("Write report is skipped because sample flag is used") - case _ => reportClass.foreach { report => - for (f <- functions) f match { - case w: WriteSummary => report.deps :+= w.jobOutputFile - case _ => + val writeHtmlReport: Boolean = config("write_html_report", default = true) + + if (writeHtmlReport) { + logger.info("Adding report") + this match { + case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) => + logger.info("Write report is skipped because sample flag is used") + case _ => reportClass.foreach { report => + for (f <- functions) f match { + case w: WriteSummary => report.deps :+= w.jobOutputFile + case _ => + } + report.jobOutputFile = new File(report.outputDir, ".report.out") + add(report) } - report.jobOutputFile = new File(report.outputDir, ".report.out") - add(report) } } diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala index 7317d1a72c099459a0f460b1569ecf3279522d06..f54cdcead164f5a0e9defd79a926a817ab9b8238 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala @@ -33,7 +33,7 @@ class BwaMem(val parent: Configurable) extends Bwa with Reference { var R2: File = _ @Input(doc = "The reference file for the bam files.", shortName = "R") - var reference: File = null + var reference: File = _ @Output(doc = "Output file SAM", shortName = "output") var output: File = _ @@ -42,7 +42,7 @@ class BwaMem(val parent: Configurable) extends Bwa with Reference { var k: Option[Int] = config("k") var r: Option[Float] = config("r") var S: Boolean = config("S", default = false) - var M: Boolean = config("M", default = true) + var M: Boolean = config("M", default = false) var w: Option[Int] = config("w") var d: Option[Int] = config("d") var c: Option[Int] = config("c") @@ -75,7 +75,7 @@ class BwaMem(val parent: Configurable) extends Bwa with Reference { if (reference == null) reference = referenceFasta() } - def cmdLine = { + def cmdLine: String = { required(executable) + required("mem") + optional("-k", k) + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/clever/CleverFixVCF.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/clever/CleverFixVCF.scala index cfe21c5ed8e5a57d72b7c26d759247b465115455..c6258b19924566ced6ce0f4f24393078f85b7f4c 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/clever/CleverFixVCF.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/clever/CleverFixVCF.scala @@ -38,6 +38,8 @@ class CleverFixVCF(val parent: Configurable) extends BiopetJavaCommandLineFuncti @Argument(doc = "Samplename") var sampleName: String = _ + override def defaultCoreMemory = 4.0 + override def cmdLine = super.cmdLine + required("-i", input) + required("-o", output) + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala index 4c8b7a7c6fb9dcbefd85dd10501dea8b460218fa..f6731772e6e9b68708a6c63a34b13f8234a0b99b 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala @@ -56,6 +56,8 @@ class HaplotypeCaller(val parent: Configurable) extends CommandLineGATK with Sca @Argument(fullName = "maxGGAARExtension", shortName = "maxGGAARExtension", doc = "the maximum extent into the full active region extension that we're willing to go in genotyping our events for GGA mode", required = false, exclusiveOf = "", validation = "") var maxGGAARExtension: Option[Int] = config("maxGGAARExtension") + var useNewAFCalculator: Boolean = config("useNewAFCalculator", default = false) + /** Include at least this many bases around an event for calling indels */ @Argument(fullName = "paddingAroundIndels", shortName = "paddingAroundIndels", doc = "Include at least this many bases around an event for calling indels", required = false, exclusiveOf = "", validation = "") var paddingAroundIndels: Option[Int] = config("paddingAroundIndels") @@ -441,6 +443,7 @@ class HaplotypeCaller(val parent: Configurable) extends CommandLineGATK with Sca optional("-paddingAroundSNPs", paddingAroundSNPs, spaceSeparated = true, escape = true, format = "%s") + repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") + + conditional(useNewAFCalculator, "--useNewAFCalculator") + repeat("-XA", excludeAnnotation, spaceSeparated = true, escape = true, format = "%s") + repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") + conditional(debug, "-debug", escape = true, format = "%s") + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala index 1348bb9591391d850fd55e44762b5b38f498fd87..3244827d9e73de7f4b6814c434e8905451b7b80a 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala @@ -25,6 +25,6 @@ abstract class Sambamba extends BiopetCommandLineFunction with Version { executable = config("exe", default = "sambamba", namespace = "sambamba", freeVar = false) def versionCommand = executable - def versionRegex = """sambamba v(.*)""".r + def versionRegex = """sambamba v?(.*)""".r override def versionExitcode = List(0, 1) } \ No newline at end of file diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala index f664365868cf2cf86dfc1c77713c8294e2073687..eced9b3ebf6881f724bad38a59f413f71f5519df 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala @@ -21,7 +21,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output } /** Extension for sambemba markdup */ class SambambaMarkdup(val parent: Configurable) extends Sambamba { - override def defaultThreads = 4 @Input(doc = "Bam File") var input: File = _ @@ -32,17 +31,29 @@ class SambambaMarkdup(val parent: Configurable) extends Sambamba { var removeDuplicates: Boolean = config("remove_duplicates", default = false) // @doc: compression_level 6 is average, 0 = no compression, 9 = best - val compressionLevel: Option[Int] = config("compression_level", default = 6) - val hashTableSize: Option[Int] = config("hash-table-size", default = 262144) - val overflowListSize: Option[Int] = config("overflow-list-size", default = 200000) - val ioBufferSize: Option[Int] = config("io-buffer-size", default = 128) + val compressionLevel: Option[Int] = config("compression_level") + val hashTableSize: Option[Int] = config("hash-table-size") + val overflowListSize: Option[Int] = config("overflow-list-size") + val ioBufferSize: Option[Int] = config("io-buffer-size") + val showProgress: Boolean = config("show-progress", default = true) + + override def defaultThreads = 4 + override def defaultCoreMemory = 4.0 + + @Output + private var indexOutput: File = _ + + override def beforeGraph(): Unit = { + indexOutput = new File(output + ".bai") + } /** Returns command to execute */ - def cmdLine = required(executable) + + def cmdLine: String = required(executable) + required("markdup") + conditional(removeDuplicates, "--remove-duplicates") + optional("-t", nCoresRequest) + optional("-l", compressionLevel) + + conditional(showProgress, "--show-progress") + optional("--hash-table-size=", hashTableSize, spaceSeparated = false) + optional("--overflow-list-size=", overflowListSize, spaceSeparated = false) + optional("--io-buffer-size=", ioBufferSize, spaceSeparated = false) + @@ -51,10 +62,11 @@ class SambambaMarkdup(val parent: Configurable) extends Sambamba { } object SambambaMarkdup { - def apply(root: Configurable, input: File, output: File): SambambaMarkdup = { + def apply(root: Configurable, input: File, output: File, isIntermediate: Boolean = false): SambambaMarkdup = { val markdup = new SambambaMarkdup(root) markdup.input = input markdup.output = output + markdup.isIntermediate = isIntermediate markdup } diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala index a0418bb92876d9d1e1b07d826c5fb7738580e629..1b49c8656e7b5fb1a0a7228396cfe4227c70ceef 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala @@ -21,7 +21,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output } /** Extension for sambemba merge */ class SambambaMerge(val parent: Configurable) extends Sambamba { - override def defaultThreads = 4 @Input(doc = "Bam File[s]") var input: List[File] = Nil @@ -30,13 +29,29 @@ class SambambaMerge(val parent: Configurable) extends Sambamba { var output: File = _ // @doc: compression_level 6 is average, 0 = no compression, 9 = best - val compressionLevel: Option[Int] = config("compression_level", default = 6) + val compressionLevel: Option[Int] = config("compression_level") + val header: Boolean = config("header", default = false) + val showProgress: Boolean = config("show-progress", default = true) + val filter: Option[String] = config("filter") + + override def defaultThreads = 4 + override def defaultCoreMemory = 4.0 + + @Output + private var indexOutput: File = _ + + override def beforeGraph(): Unit = { + indexOutput = new File(output + ".bai") + } /** Returns command to execute */ - def cmdLine = required(executable) + + def cmdLine: String = required(executable) + required("merge") + optional("-t", nCoresRequest) + optional("-l", compressionLevel) + + optional("-F", filter) + + conditional(header, "--header") + + conditional(showProgress, "--show-progress") + required(output) + - repeat("", input) + repeat(input) } diff --git a/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStatsForSv.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStatsForSv.scala new file mode 100644 index 0000000000000000000000000000000000000000..e878c2085f22e0647cf71967cc0b260ea2ce97c0 --- /dev/null +++ b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStatsForSv.scala @@ -0,0 +1,50 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.tools + +import java.io.File + +import nl.lumc.sasc.biopet.core.summary.Summarizable +import nl.lumc.sasc.biopet.core.ToolCommandFunction +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } + +class VcfStatsForSv(val parent: Configurable) extends ToolCommandFunction with Summarizable { + def toolObject = nl.lumc.sasc.biopet.tools.vcfstats.VcfStatsForSv + + mainFunction = false + + @Input(required = true) + var inputFile: File = _ + + @Argument(required = true) + var histogramBinBoundaries: Array[Int] = _ + + @Output(required = true) + var outputFile: File = _ + + override def defaultCoreMemory = 1.0 + + override def cmdLine = super.cmdLine + + required("-i", inputFile) + + required("-o", outputFile) + + repeat("--histBinBoundaries", histogramBinBoundaries) + + def summaryStats: Map[String, Any] = ConfigUtils.fileToConfigMap(outputFile) + + def summaryFiles: Map[String, File] = Map.empty + +} diff --git a/biopet-tools/src/main/resources/nl/lumc/sasc/biopet/tools/vcfstats/plotHeatmap.R b/biopet-tools/src/main/resources/nl/lumc/sasc/biopet/tools/vcfstats/plotHeatmap.R index 7f7237e90f6593e3d6cf110da005cd89c154d466..e9ab7ebd1bd142a1502b9be878a75b9598bfacb9 100644 --- a/biopet-tools/src/main/resources/nl/lumc/sasc/biopet/tools/vcfstats/plotHeatmap.R +++ b/biopet-tools/src/main/resources/nl/lumc/sasc/biopet/tools/vcfstats/plotHeatmap.R @@ -14,6 +14,7 @@ rownames(heat) <- heat[,1] heat<- heat[,-1] heat<- as.matrix(heat) +textMargin <- max(sapply(rownames(heat), nchar)) + 4 colNumber <- 50 col <- rev(colorRampPalette(brewer.pal(11, "Spectral"))(colNumber)) for (i in (colNumber+1):(colNumber+round((dist(range(heat)) - dist(range(heat[heat < 1]))) / dist(range(heat[heat < 1])) * colNumber))) { @@ -22,7 +23,7 @@ for (i in (colNumber+1):(colNumber+round((dist(range(heat)) - dist(range(heat[he col[length(col)] <- "#00FF00" png(file = outputArg, width = 1200, height = 1200) -heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none", margins = c(12, 12), na.color="#00FF00") +heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none", margins = c(textMargin, textMargin), na.color="#00FF00") dev.off() hc <- hclust(d = dist(heat)) @@ -31,5 +32,5 @@ plot(as.dendrogram(hc), horiz=TRUE, asp=0.02) dev.off() png(file = outputArgClustering, width = 1200, height = 1200) -heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row",margins = c(12, 12), na.color="#00FF00") +heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row",margins = c(textMargin, textMargin), na.color="#00FF00") dev.off() diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/vcfstats/VcfStatsForSv.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/vcfstats/VcfStatsForSv.scala new file mode 100644 index 0000000000000000000000000000000000000000..cc45189e16629ca496cfca8402460e8965c0b4c7 --- /dev/null +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/vcfstats/VcfStatsForSv.scala @@ -0,0 +1,73 @@ +package nl.lumc.sasc.biopet.tools.vcfstats + +import java.io.File + +import htsjdk.variant.vcf.VCFFileReader +import nl.lumc.sasc.biopet.utils.{ ConfigUtils, ToolCommand } + +import scala.collection.JavaConversions._ + +object VcfStatsForSv extends ToolCommand { + /** Commandline arguments */ + case class Args(inputFile: File = null, outputFile: File = null, histBinBoundaries: Array[Int] = Array()) extends AbstractArgs + + /** Parsing commandline arguments */ + class OptParser extends AbstractOptParser { + opt[File]('i', "inputFile") required () maxOccurs 1 valueName "" action { (x, c) => + c.copy(inputFile = x) + } validate { + x => if (x.exists) success else failure("Input VCF required") + } text "Input VCF file (required)" + + opt[File]('o', "outputFile") required () maxOccurs 1 valueName "" action { (x, c) => + c.copy(outputFile = x) + } text "Output file (required)" + + opt[Int]("histBinBoundaries") required () unbounded () action { (x, c) => + c.copy(histBinBoundaries = c.histBinBoundaries :+ x) + } text "When counting the records, sv-s are divided to different size classes, this parameter should give the boundaries between these classes." + } + + protected var cmdArgs: Args = _ + + def main(args: Array[String]): Unit = { + val argsParser = new OptParser + cmdArgs = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException) + + logger.info(s"Parsing file: ${cmdArgs.inputFile}") + + val stats: Map[String, Any] = getVariantCounts(cmdArgs.inputFile, cmdArgs.histBinBoundaries) + + ConfigUtils.mapToYamlFile(stats, cmdArgs.outputFile) + + } + + /** Parses a vcf-file and counts sv-s by type and size. Sv-s are divided to different size classes, the parameter histogramBinBoundaries gives the boundaries between these classes. */ + def getVariantCounts(vcfFile: File, histogramBinBoundaries: Array[Int]): Map[String, Any] = { + val delCounts, insCounts, dupCounts, invCounts = Array.fill(histogramBinBoundaries.size + 1) { 0 } + var traCount = 0 + + val reader = new VCFFileReader(vcfFile, false) + for (record <- reader) { + record.getAttributeAsString("SVTYPE", "") match { + case "TRA" | "CTX" | "ITX" => traCount += 1 + case svType => { + val size = record.getEnd - record.getStart + var i = 0 + while (i < histogramBinBoundaries.size && size > histogramBinBoundaries(i)) i += 1 + svType match { + case "DEL" => delCounts(i) += 1 + case "INS" => insCounts(i) += 1 + case "DUP" => dupCounts(i) += 1 + case "INV" => invCounts(i) += 1 + case _ => logger.warn(s"Vcf file contains a record of unknown type: file-$vcfFile, type-$svType") + } + } + } + } + reader.close() + + Map("DEL" -> delCounts, "INS" -> insCounts, "DUP" -> dupCounts, "INV" -> invCounts, "TRA" -> traCount) + } + +} diff --git a/biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotXY.R b/biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotXY.R index 67cfbf7d3d11900c4dc4cffd52597f0f8c01db3d..8fcbd65783e53040831b713adecaa0e62e88be8f 100644 --- a/biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotXY.R +++ b/biopet-utils/src/main/resources/nl/lumc/sasc/biopet/utils/rscript/plotXY.R @@ -13,6 +13,11 @@ parser$add_argument('--llabel', dest='llabel', type='character') parser$add_argument('--title', dest='title', type='character') parser$add_argument('--hideLegend', dest='hideLegend', type='character', default="false") parser$add_argument('--removeZero', dest='removeZero', type='character', default="false") +parser$add_argument('--xLog10', dest='xLog10', type='character', default="false") +parser$add_argument('--yLog10', dest='yLog10', type='character', default="false") + +parser$add_argument('--xLog10Breaks', dest='xLog10Breaks', nargs='+', type='integer') +parser$add_argument('--xLog10Labels', dest='xLog10Labels', nargs='+', type='character') arguments <- parser$parse_args() @@ -31,14 +36,29 @@ DF1 <- melt(DF, id.var="Rank") if (arguments$removeZero == "true") DF1 <- DF1[DF1$value > 0, ] if (arguments$removeZero == "true") print("Removed 0 values") -ggplot(DF1, aes(x = Rank, y = value, group = variable, color = variable)) + +plot = ggplot(DF1, aes(x = Rank, y = value, group = variable, color = variable)) + xlab(xlab) + ylab(arguments$ylabel) + - guides(fill=guide_legend(title=arguments$llabel)) + + guides(color=guide_legend(title=arguments$llabel)) + theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) + ggtitle(arguments$title) + theme_bw() + theme(legend.position = legendPosition) + geom_line() + +if (arguments$xLog10 == "true") { + if (!is.null(arguments$xLog10Labels)) { + scale_x <- scale_x_log10(breaks = arguments$xLog10Breaks, labels=arguments$xLog10Labels) + } else { + scale_x <- scale_x_log10() + } + plot <- plot + scale_x +} +if (arguments$yLog10 == "true") { + plot <- plot + scale_y_log10() +} + +plot + dev.off() diff --git a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala index 0499835afe1a1e5e92a5600b3df927221780003e..8cb51649474fed71255e94250f59ac84ddf9cd75 100644 --- a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala @@ -17,7 +17,6 @@ package nl.lumc.sasc.biopet.utils.intervals import java.io.{ File, PrintWriter } import htsjdk.samtools.SAMSequenceDictionary -import htsjdk.samtools.reference.FastaSequenceFile import scala.collection.JavaConversions._ import scala.collection.mutable @@ -112,6 +111,15 @@ case class BedRecordList(val chrRecords: Map[String, List[BedRecord]], val heade allRecords.foreach(writer.println) writer.close() } + + /** This return the fraction of the regions comparing to a length */ + def fractionOf(length: Long): Double = length.toDouble / length + + /** This return the fraction of the regions comparing to a reference */ + def fractionOfReference(dict: SAMSequenceDictionary): Double = fractionOf(dict.getReferenceLength) + + /** This return the fraction of the regions comparing to a reference */ + def fractionOfReference(file: File): Double = fractionOfReference(FastaUtils.getCachedDict(file)) } object BedRecordList { @@ -131,6 +139,32 @@ object BedRecordList { def fromList(records: TraversableOnce[BedRecord]): BedRecordList = fromListWithHeader(records, Nil) + /** + * This creates a [[BedRecordList]] based on multiple files. This method combines overlapping regions + * + * @param bedFiles Input bed files + * @return + */ + def fromFilesCombine(bedFiles: File*) = { + fromFiles(bedFiles, combine = true) + } + + /** + * This creates a [[BedRecordList]] based on multiple files + * + * @param bedFiles Input bed files + * @param combine When true overlaping regions are merged + * @return + */ + def fromFiles(bedFiles: Seq[File], combine: Boolean = false) = { + val list = bedFiles.foldLeft(empty)((a, b) => fromList(fromFile(b).allRecords ++ a.allRecords)) + if (combine) list.combineOverlap + else list + } + + /** This created a empty [[BedRecordList]] */ + def empty = fromList(Nil) + def fromFile(bedFile: File) = { val reader = Source.fromFile(bedFile) val all = reader.getLines().toList diff --git a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/LinePlot.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/LinePlot.scala index d6d7483ef58ec3f4f1964c383e9d827a25730e4f..c5f880f394b5610f6c8326d9e40f0a2cc707c8bb 100644 --- a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/LinePlot.scala +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/rscript/LinePlot.scala @@ -39,6 +39,13 @@ class LinePlot(val parent: Configurable) extends Rscript { var hideLegend: Boolean = config("hide_legend", default = false) var removeZero: Boolean = config("removeZero", default = false) + // whether to use log scale for x and y axis + var xLog10: Boolean = false + var yLog10: Boolean = false + + var xLog10AxisTicks: Seq[String] = Seq.empty + var xLog10AxisLabels: Seq[String] = Seq.empty + override def cmd = super.cmd ++ Seq("--input", input.getAbsolutePath) ++ Seq("--output", output.getAbsolutePath) ++ @@ -49,7 +56,11 @@ class LinePlot(val parent: Configurable) extends Rscript { llabel.map(Seq("--llabel", _)).getOrElse(Seq()) ++ title.map(Seq("--title", _)).getOrElse(Seq()) ++ (if (hideLegend) Seq("--hideLegend", "true") else Seq()) ++ - (if (removeZero) Seq("--removeZero", "true") else Seq()) + (if (removeZero) Seq("--removeZero", "true") else Seq()) ++ + (if (xLog10) Seq("--xLog10", "true") else Seq()) ++ + (if (yLog10) Seq("--yLog10", "true") else Seq()) ++ + (if (xLog10AxisTicks.nonEmpty) xLog10AxisTicks.+:("--xLog10Breaks") else Seq()) ++ + (if (xLog10AxisLabels.nonEmpty) xLog10AxisLabels.+:("--xLog10Labels") else Seq()) } object LinePlot { diff --git a/docs/pipelines/shiva.md b/docs/pipelines/shiva.md index d16854dfdf0e7f1cdd29279f5f3b927d7f0e0acb..0b62ba7cbe705d5718723b3238192bb7e54c0d5c 100644 --- a/docs/pipelines/shiva.md +++ b/docs/pipelines/shiva.md @@ -118,6 +118,11 @@ At this moment the following variant callers can be used | shiva | correct_readgroups | Boolean | false | Attempt to correct read groups | Only used when input is a bam file | | shiva | amplicon_bed | Path | | Path to target bed file | all | | shiva | regions_of_interest | Array of paths | | Array of paths to region of interest (e.g. gene panels) bed files | all | +| shivavariantcalling | gender_aware_calling | Boolean | false | Enables gander aware variantcalling | haplotypecaller_gvcf | +| shivavariantcalling | hap̦loid_regions | Bed file | | Haploid regions for all genders | haplotypecaller_gvcf | +| shivavariantcalling | hap̦loid_regions_male | Bed file | | Haploid regions for males | haplotypecaller_gvcf | +| shivavariantcalling | hap̦loid_regions_female | Bed file | | Haploid regions for females | haplotypecaller_gvcf | +| shiva | amplicon_bed | Path | | Path to target bed file | all | | vcffilter | min_sample_depth | Integer | 8 | Filter variants with at least x coverage | raw | | vcffilter | min_alternate_depth | Integer | 2 | Filter variants with at least x depth on the alternate allele | raw | | vcffilter | min_samples_pass | Integer | 1 | Minimum amount of samples which pass custom filter (requires additional flags) | raw | @@ -130,6 +135,14 @@ For all the options, please see the corresponding documentation for the mapping ## Advanced usage +### Gender aware variantcalling + +In Shiva and ShivaVariantcalling while using haplotypecaller_gvcf it is possible to do gender aware variantcalling. In this mode it required to supply bed files to define haploid regions (see config values). +- For males, `hap̦loid_regions` and `hap̦loid_regions_male` is used. +- For females, `hap̦loid_regions` and `hap̦loid_regions_female` is used. + +The pipeline will use a union of those files. At least 1 file is required while using this mode. + ### Reporting modes Shiva furthermore supports three modes. The default and recommended option is `multisample_variantcalling`. diff --git a/gatk b/gatk index 86ee6287b66562544e701f9ffa7cbbf0af9f3eba..677d757419f515779766b10149b0e92eff71e5b2 160000 --- a/gatk +++ b/gatk @@ -1 +1 @@ -Subproject commit 86ee6287b66562544e701f9ffa7cbbf0af9f3eba +Subproject commit 677d757419f515779766b10149b0e92eff71e5b2 diff --git a/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingle.scala b/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingle.scala index 0ea3dce7be759c82228d8e1abe2b8fa5b2bf5448..12a184170feac703bf1edabaaa29e5dc802395b1 100644 --- a/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingle.scala +++ b/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingle.scala @@ -56,12 +56,7 @@ class GearsSingle(val parent: Configurable) extends QScript with SummaryQScript if (sampleId == null || sampleId == None) Logging.addError("Missing sample ID on GearsSingle module") if (outputName == null) { - if (fastqR1.nonEmpty) outputName = fastqR1.headOption.map(_.getName - .stripSuffix(".gz") - .stripSuffix(".fastq") - .stripSuffix(".fq")) - .getOrElse("noName") - else outputName = bamFile.map(_.getName.stripSuffix(".bam")).getOrElse("noName") + outputName = sampleId.getOrElse("noName") + libId.map("-" + _).getOrElse("") } if (fastqR1.nonEmpty) { diff --git a/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingleTest.scala b/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingleTest.scala index 0f8763dac9b0ada1e2076118a85d99420bf64a49..51fa33a3da219f192fbfd67b147c03f1785c5f88 100644 --- a/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingleTest.scala +++ b/gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingleTest.scala @@ -108,7 +108,7 @@ abstract class TestGearsSingle extends TestNGSuite with Matchers { gears.outputName shouldBe "test" } else { // in the following cases the filename should have been determined by the filename - gears.outputName shouldBe (if (inputMode == Some("bam")) "bamfile" else "R1") + gears.outputName shouldBe "sampleName-libName" } val pipesJobs = gears.functions.filter(_.isInstanceOf[BiopetCommandLineFunction]) diff --git a/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMapping.scala b/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMapping.scala index 39d12c3455d8766c27a0983aaff49c506316c8f2..39f95b2f6b0079c68fe18b08fde98635773461cb 100644 --- a/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMapping.scala +++ b/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMapping.scala @@ -19,7 +19,7 @@ import java.io.File import htsjdk.samtools.SamReaderFactory import htsjdk.samtools.reference.FastaSequenceFile import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension -import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, MultiSampleQScript } +import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand, Reference } import nl.lumc.sasc.biopet.extensions.Ln import nl.lumc.sasc.biopet.extensions.picard._ import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics @@ -28,8 +28,8 @@ import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle import nl.lumc.sasc.biopet.utils.Logging import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript - import MultisampleMapping.MergeStrategy +import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaMarkdup, SambambaMerge } import scala.collection.JavaConversions._ @@ -125,7 +125,7 @@ trait MultisampleMappingTrait extends MultiSampleQScript } else None def bamFile: Option[File] = mapping match { - case Some(m) => Some(m.finalBamFile) + case Some(m) => Some(m.mergedBamFile) case _ if inputBam.isDefined => Some(new File(libDir, s"$sampleId-$libId.bam")) case _ => None } @@ -247,9 +247,9 @@ trait MultisampleMappingTrait extends MultiSampleQScript mergeStrategy match { case MergeStrategy.None => - case (MergeStrategy.MergeSam | MergeStrategy.MarkDuplicates) if libraries.flatMap(_._2.bamFile).size == 1 => + case (MergeStrategy.MergeSam) if libraries.flatMap(_._2.bamFile).size == 1 => add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.bamFile).head, bamFile.get): _*) - case (MergeStrategy.PreProcessMergeSam | MergeStrategy.PreProcessMarkDuplicates) if libraries.flatMap(_._2.preProcessBam).size == 1 => + case (MergeStrategy.PreProcessMergeSam) if libraries.flatMap(_._2.preProcessBam).size == 1 => add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.preProcessBam).head, bamFile.get): _*) case MergeStrategy.MergeSam => add(MergeSamFiles(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = !keepMergedFiles)) @@ -259,6 +259,20 @@ trait MultisampleMappingTrait extends MultiSampleQScript add(MarkDuplicates(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = !keepMergedFiles)) case MergeStrategy.PreProcessMarkDuplicates => add(MarkDuplicates(qscript, libraries.flatMap(_._2.preProcessBam).toList, bamFile.get, isIntermediate = !keepMergedFiles)) + case MergeStrategy.PreProcessSambambaMarkdup => + val mergedBam = if (libraries.flatMap(_._2.bamFile).size == 1) { + add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.preProcessBam).head, new File(sampleDir, "merged.bam")): _*) + libraries.flatMap(_._2.preProcessBam).head + } else { + val merge = new SambambaMerge(qscript) + merge.input = libraries.flatMap(_._2.preProcessBam).toList + merge.output = new File(sampleDir, "merged.bam") + merge.isIntermediate = true + add(merge) + merge.output + } + add(SambambaMarkdup(qscript, mergedBam, bamFile.get, isIntermediate = !keepMergedFiles)) + add(Ln(qscript, bamFile.get + ".bai", bamFile.get.getAbsolutePath.stripSuffix(".bam") + ".bai")) case _ => throw new IllegalStateException("This should not be possible, unimplemented MergeStrategy?") } @@ -301,7 +315,7 @@ class MultisampleMapping(val parent: Configurable) extends QScript with Multisam object MultisampleMapping extends PipelineCommand { object MergeStrategy extends Enumeration { - val None, MergeSam, MarkDuplicates, PreProcessMergeSam, PreProcessMarkDuplicates = Value + val None, MergeSam, MarkDuplicates, PreProcessMergeSam, PreProcessMarkDuplicates, PreProcessSambambaMarkdup = Value } /** When file is not absolute an error is raise att the end of the script of a pipeline */ diff --git a/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTest.scala b/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTest.scala index bdee59df8e67951909dac7361d3540795770f217..577957eac0e167bbd98e6a5b6aa4d92dc218a148 100644 --- a/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTest.scala +++ b/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTest.scala @@ -20,6 +20,7 @@ import com.google.common.io.Files import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.extensions.centrifuge.Centrifuge import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, MergeSamFiles } +import nl.lumc.sasc.biopet.extensions.sambamba.SambambaMarkdup import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging } import nl.lumc.sasc.biopet.utils.config.Config import org.apache.commons.io.FileUtils @@ -91,16 +92,22 @@ trait MultisampleMappingTestTrait extends TestNGSuite with Matchers { pipeline.script() val numberFastqLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0) + (if (sample3 && bamToFastq) 1 else 0) + (if (sample4 && bamToFastq) 1 else 0) - val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 1 else 0) + (if (sample4) 1 else 0) val pipesJobs = pipeline.functions.filter(_.isInstanceOf[BiopetCommandLineFunction]) .flatMap(_.asInstanceOf[BiopetCommandLineFunction].pipesJobs) + if (merge == MultisampleMapping.MergeStrategy.PreProcessMarkDuplicates) { + "" + } + import MultisampleMapping.MergeStrategy pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberFastqLibs + - (if (sample2 && (merge == MergeStrategy.MarkDuplicates || merge == MergeStrategy.PreProcessMarkDuplicates)) 1 else 0)) + (if (merge == MergeStrategy.MarkDuplicates || merge == MergeStrategy.PreProcessMarkDuplicates) numberSamples else 0)) pipeline.functions.count(_.isInstanceOf[MergeSamFiles]) shouldBe ( (if (sample2 && (merge == MergeStrategy.MergeSam || merge == MergeStrategy.PreProcessMergeSam)) 1 else 0)) + pipeline.functions.count(_.isInstanceOf[SambambaMarkdup]) shouldBe + (if (merge == MergeStrategy.PreProcessSambambaMarkdup) numberSamples else 0) pipeline.samples.foreach { case (sampleName, sample) => if (merge == MergeStrategy.None) sample.bamFile shouldBe None @@ -211,6 +218,7 @@ object MultisampleMappingTestTrait { "sickle" -> Map("exe" -> "test"), "cutadapt" -> Map("exe" -> "test"), "bwa" -> Map("exe" -> "test"), + "sambamba" -> Map("exe" -> "test"), "samtools" -> Map("exe" -> "test"), "igvtools" -> Map("exe" -> "test", "igvtools_jar" -> "test"), "wigtobigwig" -> Map("exe" -> "test"), @@ -232,7 +240,7 @@ object MultisampleMappingTestTrait { ))) val sample2 = Map( - "samples" -> Map("sample3" -> Map("libraries" -> Map( + "samples" -> Map("sample2" -> Map("libraries" -> Map( "lib1" -> Map( "R1" -> inputTouch("2_1_R1.fq"), "R2" -> inputTouch("2_1_R2.fq") diff --git a/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariantsSv.ssp b/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariantsSv.ssp new file mode 100644 index 0000000000000000000000000000000000000000..9e09699adeb33c735b1727da292820c8ca7704e0 --- /dev/null +++ b/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariantsSv.ssp @@ -0,0 +1,111 @@ +#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb) +#import(nl.lumc.sasc.biopet.utils.summary.db.Schema.Sample) +#import(java.io.File) +<%@ var summary: SummaryDb %> +<%@ var allSamples: Seq[Sample]%> +<%@ var rootPath: String %> +<%@ var outputDir: File %> +<%@ var runId: Int %> +<%@ var sampleId: Option[Int] = None %> +<%@ var showPlot: Boolean = false %> +<%@ var showTable: Boolean = true %> +<%@ var showIntro: Boolean = true %> + +#{ + val sampleNames: Seq[String] = sampleId match { + case Some(sampleId) => Seq(allSamples.filter(s => s.id == sampleId).head.name) + case _ => allSamples.collect({case s: Sample => s.name}).sorted + } + + val counts: Map[String, Map[String, Array[Long]]] = ShivaSvCallingReport.parseSummaryForSvCounts(summary, runId, sampleNames) + val traCounts: Map[String, Long] = ShivaSvCallingReport.parseSummaryForTranslocations(summary, runId, sampleNames) + + var svTypes = List( + SvTypeForReport("DEL", "Deletions", "svSummaryDeletions.tsv", "svSummaryDeletions.png"), + SvTypeForReport("DUP", "Duplications", "svSummaryDuplications.tsv", "svSummaryDuplications.png"), + SvTypeForReport("INS", "Insertions", "svSummaryInsertions.tsv", "svSummaryInsertions.png"), + SvTypeForReport("INV", "Inversions", "svSummaryInversions.tsv", "svSummaryInversions.png")) + svTypes = svTypes.filter(sv => counts.contains(sv.svType)) + val tsvAllTypes = "svSummary.tsv" + + ShivaSvCallingReport.writeTsvFiles(sampleNames, counts, svTypes, tsvAllTypes, outputDir) + ShivaSvCallingReport.createPlots(svTypes, outputDir) +}# + + +#if (showPlot) +
+ #for (sv <- svTypes) + + #end +
+ #if (traCounts.nonEmpty) +
+
Number of translocation events detected:
+ + + #for (sampleName <- sampleNames) + + #end + + + #for (sampleName <- sampleNames) + #{ + val sampleCount: String = traCounts.get(sampleName) match { + case Some(c) => c.toString() + case None => "-" + } + }# + + #end + +
${sampleName}
${sampleCount}
+
+ #end + +#end + +
+ +#for (sv <- svTypes) +#{ + val countsForSvType: Map[String, Array[Long]] = counts(sv.svType) + val missingCounts: Array[String] = Array.fill(ShivaSvCallingReport.histogramText.size) { "-" } +}# +

${sv.displayText}

+ + + #for (text <- ShivaSvCallingReport.histogramText) + + #end + + + #for (sampleName <- sampleNames) + + + #{ + val sampleCounts: Array[String] = countsForSvType.get(sampleName) match { + case Some(c: Array[_]) => c.collect({case x => x.toString()}) + case None => missingCounts + } + }# + + #for (countForSize <- sampleCounts) + + #end + + #end + +
Sample${text}
${sampleName}${countForSize}
+ +#end +
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala index edb9e76a4e2e4a66d6e10cd0ea764b6f84d13c63..85e919c49f688c3e6c6f57b72af9f2afbf64aa28 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala @@ -60,11 +60,13 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra override def namePrefix = "multisample" override def configNamespace: String = "shivavariantcalling" override def configPath: List[String] = super.configPath ::: "multisample" :: Nil + genders = samples.map { case (sampleName, sample) => sampleName -> sample.gender } } else new ShivaVariantcalling(qscript) { override def configNamespace = "shivavariantcalling" sampleId = sample libId = library + genders = sample.map(x => x -> samples(x).gender).toMap } } diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala index c96730a520fe0d90562da7b6b590904f978d57e4..be9f063dca043b8021f2c7a066d1b8f24bd0a812 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala @@ -48,13 +48,22 @@ trait ShivaReportTrait extends MultisampleMappingReportTrait { case _ => false } + def svCallingExecuted = summary.getSettingKeys(runId, "shiva", NoModule, keyValues = Map("sv_calling" -> List("sv_calling"))).get("sv_calling") + .flatten match { + case Some(true) => true + case _ => false + } + override def frontSection = ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp") override def pipelineName = "shiva" - override def additionalSections = super.additionalSections ++ (if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp", - Map("showPlot" -> true, "showTable" -> false))) - else Nil) + override def additionalSections = { + val params = Map("showPlot" -> true, "showTable" -> false) + super.additionalSections ++ + (if (variantcallingExecuted) List("SNV Calling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp", params)) else Nil) ++ + (if (svCallingExecuted) List("SV Calling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariantsSv.ssp", params)) else Nil) + } /** Root page for the shiva report */ override def indexPage: Future[ReportPage] = Future { @@ -107,9 +116,10 @@ trait ShivaReportTrait extends MultisampleMappingReportTrait { /** Single sample page */ override def samplePage(sampleId: Int, args: Map[String, Any]): Future[ReportPage] = Future { - val variantcallingSection = if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp")) else Nil + val variantcallingSection = if (variantcallingExecuted) List("SNV Calling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp")) else Nil + val svSection = if (svCallingExecuted) List("SV Calling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariantsSv.ssp")) else Nil val oldPage: ReportPage = super.samplePage(sampleId, args) - oldPage.copy(sections = variantcallingSection ++ oldPage.sections) + oldPage.copy(sections = variantcallingSection ++ svSection ++ oldPage.sections) } /** Name of the report */ @@ -163,4 +173,5 @@ trait ShivaReportTrait extends MultisampleMappingReportTrait { plot.width = Some(200 + (samples.count(s => sampleId.getOrElse(s) == s) * 10)) plot.runLocal() } + } diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala index 6d21b101d119b5c70df10a1abbe0f563b23996b1..4bb8354fa342ad7f7178fb055d7c36be950d3e43 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala @@ -14,9 +14,10 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva -import nl.lumc.sasc.biopet.core.summary.SummaryQScript -import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag } +import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript } +import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference } import nl.lumc.sasc.biopet.extensions.Pysvtools +import nl.lumc.sasc.biopet.extensions.tools.VcfStatsForSv import nl.lumc.sasc.biopet.pipelines.shiva.svcallers._ import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging } @@ -27,7 +28,7 @@ import org.broadinstitute.gatk.queue.QScript * * Created by pjvan_thof on 2/26/15. */ -class ShivaSvCalling(val parent: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference { +class ShivaSvCalling(val parent: Configurable) extends QScript with SummaryQScript with Reference { qscript => def this() = this(null) @@ -96,6 +97,21 @@ class ShivaSvCalling(val parent: Configurable) extends QScript with SummaryQScri // group by "tags" // sample tagging is however not available within this pipeline + for ((sample, mergedResultFile) <- outputMergedVCFbySample) { + val vcfStats = new VcfStatsForSv(qscript) + vcfStats.inputFile = mergedResultFile + vcfStats.outputFile = new File(outputDir, s".$sample.merged.stats") + vcfStats.histogramBinBoundaries = ShivaSvCallingReport.histogramBinBoundaries + + add(vcfStats) + addSummarizable(vcfStats, "vcfstats-sv", Some(sample)) + + addSummarizable(new Summarizable { + def summaryFiles = Map("output_vcf" -> mergedResultFile) + def summaryStats = Map.empty + }, "merge_variants", Some(sample)) + } + addSummaryJobs() } @@ -103,10 +119,11 @@ class ShivaSvCalling(val parent: Configurable) extends QScript with SummaryQScri protected def callersList: List[SvCaller] = List(new Breakdancer(this), new Clever(this), new Delly(this), new Pindel(this)) /** Settings for the summary */ - def summarySettings = Map("sv_callers" -> configCallers.toList) + def summarySettings = Map("sv_callers" -> configCallers.toList, "hist_bin_boundaries" -> ShivaSvCallingReport.histogramBinBoundaries) /** Files for the summary */ - def summaryFiles: Map[String, File] = Map("final_mergedvcf" -> (if (inputBams.size > 1) outputMergedVCF else outputMergedVCFbySample.values.head)) + def summaryFiles: Map[String, File] = if (inputBams.size > 1) Map("final_mergedvcf" -> outputMergedVCF) else Map.empty + } object ShivaSvCalling extends PipelineCommand \ No newline at end of file diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCallingReport.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCallingReport.scala new file mode 100644 index 0000000000000000000000000000000000000000..b2f0317bc557a5798e91baa17604d03038b9fb36 --- /dev/null +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCallingReport.scala @@ -0,0 +1,137 @@ +package nl.lumc.sasc.biopet.pipelines.shiva + +import java.io.{ File, PrintWriter } + +import nl.lumc.sasc.biopet.core.report.ReportBuilder +import nl.lumc.sasc.biopet.utils.Logging +import nl.lumc.sasc.biopet.utils.rscript.LinePlot +import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb +import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.{ ModuleName, PipelineName, SampleName } + +import scala.concurrent.Await +import scala.concurrent.duration.Duration + +object ShivaSvCallingReport extends Logging { + + implicit lazy val ec = ReportBuilder.ec + + val histogramBinBoundaries: Array[Int] = Array(100, 1000, 10000, 100000, 1000000, 10000000) + val histogramPlotTicks: Array[Int] = Array(100, 1000, 10000, 100000, 1000000, 10000000, 100000000) + val histogramText: List[String] = List("<=100bp", "0.1-1kb", "1-10kb", "10-100kb", "0.1-1Mb", "1-10Mb", ">10Mb") + + def parseSummaryForSvCounts(summary: SummaryDb, runId: Int, sampleNames: Seq[String]): Map[String, Map[String, Array[Long]]] = { + var delCounts, insCounts, dupCounts, invCounts: Map[String, Array[Long]] = Map() + + for (sampleName <- sampleNames) { + val sampleCounts: Map[String, Any] = Await.result(summary.getStat(runId, PipelineName("shivasvcalling"), ModuleName("vcfstats-sv"), SampleName(sampleName)), Duration.Inf).get + for ((svType, counts) <- sampleCounts.collect({ case (k, v: List[_]) => (k, v.toArray[Any]) })) { + val elem: Tuple2[String, Array[Long]] = (sampleName, counts.collect({ case x: Long => x })) + svType match { + case "DEL" => delCounts += elem + case "INS" => insCounts += elem + case "DUP" => dupCounts += elem + case "INV" => invCounts += elem + } + } + } + + var result: Map[String, Map[String, Array[Long]]] = Map() + if (delCounts.exists(elem => (elem._2.sum > 0))) result = Map("DEL" -> delCounts) + if (insCounts.exists(elem => (elem._2.sum > 0))) result += ("INS" -> insCounts) + if (dupCounts.exists(elem => (elem._2.sum > 0))) result += ("DUP" -> dupCounts) + if (invCounts.exists(elem => (elem._2.sum > 0))) result += ("INV" -> invCounts) + result + } + + def parseSummaryForTranslocations(summary: SummaryDb, runId: Int, sampleNames: Seq[String]): Map[String, Long] = { + var traCounts: Map[String, Long] = Map() + for (sampleName <- sampleNames) { + val counts: Map[String, Any] = Await.result(summary.getStat(runId, PipelineName("shivasvcalling"), ModuleName("vcfstats-sv"), SampleName(sampleName)), Duration.Inf).get + counts.get("TRA") match { + case Some(c: Long) => traCounts += (sampleName -> c) + case Some(c) => logger.error(s"Unable to parse translocation counts from summary db for sample $sampleName (type mismatch, type in the db: ${c.getClass})") + case _ => logger.error(s"Summary db doesn't have translocation counts for sample $sampleName") + } + + } + if (traCounts.exists(elem => elem._2 > 0)) traCounts else Map.empty + } + + def writeTsvFiles(sampleNames: Seq[String], counts: Map[String, Map[String, Array[Long]]], svTypes: List[SvTypeForReport], outFileAllTypes: String, outDir: File): Unit = { + + val tsvWriter = new PrintWriter(new File(outDir, outFileAllTypes)) + tsvWriter.print("sv_type\tsample") + histogramText.foreach(bin => tsvWriter.print("\t" + bin)) + tsvWriter.println() + + val missingCounts: Array[String] = Array.fill(ShivaSvCallingReport.histogramText.size) { "-" } + + for (sv <- svTypes) { + val countsForSvType: Map[String, Array[Long]] = counts.getOrElse(sv.svType, Map.empty) + + if (countsForSvType.nonEmpty) { + + writeTsvFileForSvType(sv, countsForSvType, sampleNames, outDir) + + for (sampleName <- sampleNames) { + val sampleCounts: Array[String] = countsForSvType.get(sampleName) match { + case Some(c) => c.collect({ case x => x.toString() }) + case None => { + logger.error(s"Internal error, missing sv counts, sample-$sampleName, sv type-${sv.svType}") + missingCounts + } + } + + tsvWriter.print(sv.svType + "\t" + sampleName + "\t") + tsvWriter.println(sampleCounts.mkString("\t")) + } + + } else { + logger.error(s"Internal error, skipping writing the tsv-file for sv type ${sv.svType}") + } + + } + tsvWriter.close() + } + + def writeTsvFileForSvType(svType: SvTypeForReport, counts: Map[String, Array[Long]], sampleNames: Seq[String], outDir: File): Unit = { + val tsvWriter = new PrintWriter(new File(outDir, svType.tsvFileName)) + + tsvWriter.print("histogramBin") + val samplesWithCounts: Seq[String] = sampleNames.filter(x => counts.contains(x)) + samplesWithCounts.foreach(sampleName => tsvWriter.print("\t" + sampleName)) + tsvWriter.println() + + for (i <- histogramPlotTicks.indices) { + tsvWriter.print(histogramPlotTicks(i)) + samplesWithCounts.foreach(sampleName => tsvWriter.print("\t" + counts.get(sampleName).get(i))) + tsvWriter.println() + } + + tsvWriter.close() + } + + def createPlots(svTypes: List[SvTypeForReport], outDir: File): Unit = { + for (sv <- svTypes) { + val tsvFile = new File(outDir, sv.tsvFileName) + val pngFile: File = new File(outDir, sv.pngFileName) + val plot = LinePlot(tsvFile, pngFile, + xlabel = Some(s"${sv.displayText.substring(0, sv.displayText.length - 1)} size"), + ylabel = Some("Number of loci"), + title = Some(sv.displayText), + width = 400, + removeZero = false) + plot.height = Some(300) + plot.llabel = Some("Sample") + plot.xLog10 = true + plot.yLog10 = true + plot.xLog10AxisTicks = histogramPlotTicks.collect({ case x => x.toString() }) + plot.xLog10AxisLabels = histogramText + plot.runLocal() + } + + } + +} + +case class SvTypeForReport(svType: String, displayText: String, tsvFileName: String, pngFileName: String) \ No newline at end of file diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala index 7a089960805d01f4160137fce5304b3ed2a849dc..458e70b6dfa97ead555369e394226e992e7c693e 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala @@ -14,7 +14,8 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva -import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag } +import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand, Reference, SampleLibraryTag } +import MultiSampleQScript.Gender import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.extensions.Tabix import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance } @@ -48,9 +49,22 @@ class ShivaVariantcalling(val parent: Configurable) extends QScript var inputBqsrFiles: Map[String, File] = Map() + var genders: Map[String, Gender.Value] = _ + /** Executed before script */ def init(): Unit = { if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg) + if (Option(genders).isEmpty) genders = { + val samples: Map[String, Any] = config("genders", default = Map()) + samples.map { + case (sampleName, gender) => + sampleName -> (gender.toString.toLowerCase match { + case "male" => Gender.Male + case "female" => Gender.Female + case _ => Gender.Unknown + }) + } + } } var referenceVcf: Option[File] = config("reference_vcf") @@ -100,6 +114,7 @@ class ShivaVariantcalling(val parent: Configurable) extends QScript caller.inputBqsrFiles = inputBqsrFiles caller.namePrefix = namePrefix caller.outputDir = new File(outputDir, caller.name) + caller.genders = genders add(caller) addStats(caller.outputFile, caller.name) val normalize: Boolean = config("execute_vt_normalize", default = false, namespace = caller.configNamespace) diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala index 39cb48124d44a780b85d7a7f196277fd993a6b74..51b8eeb4f2ac7199b309687fbb4fc879f1e35d70 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala @@ -19,8 +19,12 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers +import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender import nl.lumc.sasc.biopet.extensions.gatk +import nl.lumc.sasc.biopet.extensions.gatk.CombineGVCFs +import nl.lumc.sasc.biopet.utils.Logging import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.utils.intervals.BedRecordList /** Gvcf mode for haplotypecaller */ class HaplotypeCallerGvcf(val parent: Configurable) extends Variantcaller { @@ -34,6 +38,15 @@ class HaplotypeCallerGvcf(val parent: Configurable) extends Variantcaller { def getGvcfs = gVcfFiles + val genderAwareCalling: Boolean = config("gender_aware_calling", default = false) + val haploidRegions: Option[File] = config("hap̦loid_regions") + val haploidRegionsMale: Option[File] = config("haploid_regions_male") + val haploidRegionsFemale: Option[File] = config("haploid_regions_female") + + lazy val fractionMale: Double = BedRecordList.fromFiles(Seq(haploidRegions, haploidRegionsMale).flatten, true).fractionOfReference(referenceDict) + lazy val fractionFemale: Double = BedRecordList.fromFiles(Seq(haploidRegions, haploidRegionsFemale).flatten, true).fractionOfReference(referenceDict) + lazy val fractionUnknown: Double = BedRecordList.fromFiles(Seq(haploidRegions).flatten, true).fractionOfReference(referenceDict) + override def fixedValues = Map("haplotypecaller" -> Map("emitRefConfidence" -> "GVCF")) override def defaults = Map("haplotypecaller" -> Map( @@ -41,12 +54,60 @@ class HaplotypeCallerGvcf(val parent: Configurable) extends Variantcaller { "variant_index_parameter" -> 128000) ) + override def init(): Unit = { + super.init() + if (genderAwareCalling && haploidRegions.isEmpty && haploidRegionsMale.isEmpty && haploidRegionsFemale.isEmpty) + Logging.addError("Gender aware variantcalling is enabled but no haploid bed files are given") + } + def biopetScript() { gVcfFiles = for ((sample, inputBam) <- inputBams) yield { - val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".gvcf.vcf.gz")) - hc.BQSR = inputBqsrFiles.get(sample) - add(hc) - sample -> hc.out + if (genderAwareCalling) { + val finalFile = new File(outputDir, sample + ".gvcf.vcf.gz") + val gender = genders.getOrElse(sample, Gender.Unknown) + val haploidBedFiles: List[File] = gender match { + case Gender.Female => haploidRegions.toList ::: haploidRegionsFemale.toList ::: Nil + case Gender.Male => haploidRegions.toList ::: haploidRegionsMale.toList ::: Nil + case _ => haploidRegions.toList + } + + val fraction: Double = gender match { + case Gender.Female => fractionFemale + case Gender.Male => fractionMale + case _ => fractionUnknown + } + + val haploidGvcf = if (haploidBedFiles.nonEmpty) { + val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".haploid.gvcf.vcf.gz")) + hc.BQSR = inputBqsrFiles.get(sample) + hc.intervals = haploidBedFiles + hc.scatterCount = (hc.scatterCount * fraction).toInt + hc.sample_ploidy = Some(1) + add(hc) + Some(hc.out) + } else None + + val hcDiploid = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".diploid.gvcf.vcf.gz")) + hcDiploid.BQSR = inputBqsrFiles.get(sample) + hcDiploid.excludeIntervals = haploidBedFiles + hcDiploid.scatterCount = (hcDiploid.scatterCount * (1 - fraction)).toInt + add(hcDiploid) + + haploidGvcf match { + case Some(file) => + val combine = new CombineGVCFs(this) + combine.variant = Seq(hcDiploid.out, file) + combine.out = new File(outputDir, sample + ".gvcf.vcf.gz") + add(combine) + sample -> combine.out + case _ => sample -> hcDiploid.out + } + } else { + val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".gvcf.vcf.gz")) + hc.BQSR = inputBqsrFiles.get(sample) + add(hc) + sample -> hc.out + } } val genotypeGVCFs = gatk.GenotypeGVCFs(this, gVcfFiles.values.toList, outputFile) diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala index 8ebc9b618614fd03a9d277781a2daa7a26952935..6d5d062d505d6c109c47ab17fe0741b46a2b4515 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala @@ -14,6 +14,7 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers +import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender import nl.lumc.sasc.biopet.core.{ BiopetQScript, Reference } import org.broadinstitute.gatk.queue.QScript @@ -27,6 +28,8 @@ trait Variantcaller extends QScript with BiopetQScript with Reference { var namePrefix: String = _ + var genders: Map[String, Gender.Value] = _ + val mergeVcfResults: Boolean = config("merge_vcf_results", default = true) /** diff --git a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCallingTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCallingTest.scala index 83caa7c64bdbd14a78df903f867c48618edcd3f3..a56081e4dd7f1961e97c306238fdbd3d18d368ad 100644 --- a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCallingTest.scala +++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCallingTest.scala @@ -90,7 +90,7 @@ class ShivaSvCallingTest extends TestNGSuite with Matchers { pipeline.init() pipeline.script() - val summaryCallers = pipeline.summarySettings("sv_callers") + val summaryCallers = pipeline.summarySettings.get("sv_callers").get.asInstanceOf[List[String]] if (delly) assert(summaryCallers.contains("delly")) else assert(!summaryCallers.contains("delly")) if (clever) assert(summaryCallers.contains("clever")) @@ -182,7 +182,7 @@ class ShivaSvCallingTest extends TestNGSuite with Matchers { pipeline.init() pipeline.script() - val summaryCallers = pipeline.summarySettings("sv_callers") + val summaryCallers: List[String] = pipeline.summarySettings.get("sv_callers").get.asInstanceOf[List[String]] assert(summaryCallers.contains("delly")) assert(summaryCallers.contains("clever")) assert(summaryCallers.contains("breakdancer")) diff --git a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala index f5fce6c4ac3bf5363b8c94d1b07b762ae425537c..19f4f7749606e9008ea78eeedb9d4b620790cacd 100644 --- a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala +++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala @@ -62,6 +62,7 @@ trait ShivaTestTrait extends TestNGSuite with Matchers { def svCalling = false def cnvCalling = false def annotation = false + def usePrintReads = true private var dirs: List[File] = Nil @@ -83,7 +84,8 @@ trait ShivaTestTrait extends TestNGSuite with Matchers { "use_base_recalibration" -> baseRecalibration, "sv_calling" -> svCalling, "cnv_calling" -> cnvCalling, - "annotation" -> annotation), m) + "annotation" -> annotation, + "use_printreads" -> usePrintReads), m) } @@ -99,13 +101,13 @@ trait ShivaTestTrait extends TestNGSuite with Matchers { val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0) val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) - pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample2) 1 else 0)) + pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + numberSamples) // Gatk preprocess pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0)) pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0)) pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) (numberLibs * 2) else 0) - pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0) + pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration && usePrintReads) numberLibs else 0) pipeline.summarySettings.get("annotation") shouldBe Some(annotation) pipeline.summarySettings.get("sv_calling") shouldBe Some(svCalling) @@ -143,6 +145,10 @@ class ShivaNoDbsnpTest extends ShivaTestTrait { override def realignProvider = Array(true) override def dbsnp = false } +class ShivaNoPrintReadsTest extends ShivaTestTrait { + override def realignProvider = Array(false) + override def usePrintReads = false +} class ShivaLibraryCallingTest extends ShivaTestTrait { override def sample1 = Array(true, false) override def sample2 = Array(false, true)