diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala index cf5aa84c5bf75623b78ab3ad696a3d75300bd7fb..eae23a65406c85035ae38a939c11f4ac3d25af66 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala @@ -43,21 +43,25 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait { /** Class will generate library jobs */ class Library(libId: String) extends super.Library(libId) { - val useIndelRealigner: Boolean = config("use_indel_realigner", default = true) - val useBaseRecalibration: Boolean = config("use_base_recalibration", default = true) - - /** Return true when baserecalibration is executed */ - protected def doneBaseRecalibrator: Boolean = { + lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true) + lazy val useBaseRecalibration: Boolean = { + val c: Boolean = config("use_base_recalibration", default = true) val br = new BaseRecalibrator(qscript) - useBaseRecalibration && br.knownSites.nonEmpty + if (c && br.knownSites.isEmpty) + logger.warn("No Known site found, skipping base recalibration, file: " + inputBam) + c && br.knownSites.nonEmpty } + override def summarySettings = super.summarySettings + + ("use_indel_realigner" -> useIndelRealigner) + + ("use_base_recalibration" -> useBaseRecalibration) + /** This will adds preprocess steps, gatk indel realignment and base recalibration is included here */ override def preProcess(input: File): Option[File] = { - if (!useIndelRealigner && !doneBaseRecalibrator) None + if (!useIndelRealigner && !useBaseRecalibration) None else { val indelRealignFile = useIndelRealigner match { - case true => addIndelRealign(input, libDir, doneBaseRecalibrator || libraries.size > 1) + case true => addIndelRealign(input, libDir, useBaseRecalibration || libraries.size > 1) case false => input } @@ -69,12 +73,16 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait { } } + override def summarySettings = super.summarySettings + ("use_indel_realigner" -> useIndelRealigner) + + lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true) + /** This methods will add double preprocess steps, with GATK indel realignment */ override protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = { if (input.size <= 1) super.addDoublePreProcess(input) - else super.addDoublePreProcess(input, isIntermediate = true).collect { + else super.addDoublePreProcess(input, isIntermediate = useIndelRealigner).collect { case file => - config("use_indel_realigner", default = true).asBoolean match { + useIndelRealigner match { case true => addIndelRealign(file, sampleDir, isIntermediate = false) case false => file } @@ -99,10 +107,7 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait { def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean): File = { val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal")) - if (baseRecalibrator.knownSites.isEmpty) { - logger.warn("No Known site found, skipping base recalibration, file: " + inputBam) - return inputBam - } + if (baseRecalibrator.knownSites.isEmpty) return inputBam add(baseRecalibrator) if (config("use_analyze_covariates", default = false).asBoolean) { diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala index 981417a012d7d3e17ec87ad879e622d77273b01f..f0c9478603de90f9401241bfcebf797d24196f52 100644 --- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala @@ -19,7 +19,7 @@ import java.io.File import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.summary.SummaryQScript -import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, SampleLibraryTag } +import nl.lumc.sasc.biopet.core.{ Reference, BiopetFifoPipe, PipelineCommand, SampleLibraryTag } import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect } import nl.lumc.sasc.biopet.extensions.picard._ import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat @@ -27,7 +27,11 @@ import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat import org.broadinstitute.gatk.queue.QScript -class BamMetrics(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag { +class BamMetrics(val root: Configurable) extends QScript + with SummaryQScript + with SampleLibraryTag + with Reference { + def this() = this(null) @Input(doc = "Bam File", shortName = "BAM", required = true) @@ -51,7 +55,8 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit } /** returns files to store in summary */ - def summaryFiles = Map("input_bam" -> inputBam) ++ + def summaryFiles = Map("reference" -> referenceFasta(), + "input_bam" -> inputBam) ++ ampliconBedFile.map("amplicon" -> _).toMap ++ ampliconBedFile.map(x => "roi_" + x.getName.stripSuffix(".bed") -> x).toMap diff --git a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala index d9085369f0316b6a0474fe02cd15ef07c683713a..7da7f9c3ce26248867468bb59398533a87cd007e 100644 --- a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala +++ b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala @@ -180,7 +180,7 @@ trait BastyTrait extends MultiSampleQScript { snpsOnly: Boolean = false): FastaOutput = { val bastyGenerateFasta = new BastyGenerateFasta(this) bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName - bastyGenerateFasta.inputVcf = shiva.variantCalling.get.finalFile + bastyGenerateFasta.inputVcf = shiva.multisampleVariantCalling.get.finalFile if (shiva.samples.contains(sampleName)) { bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala index 1d9d6396235164d611f10291657cff7ffe72be60..6334b3cbfc1f02bb28d2085ef791939dafd388b2 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala @@ -6,22 +6,21 @@ import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.function.CommandLineFunction /** - * Created by pjvanthof on 01/10/15. + * This trait will control resources given to a CommandlineFunction */ trait CommandLineResources extends CommandLineFunction with Configurable { def defaultThreads = 1 final def threads = nCoresRequest match { case Some(i) => i - case _ => { + case _ => val t = getThreads nCoresRequest = Some(t) t - } } var vmem: Option[String] = config("vmem") - def defaultCoreMemory: Double = 1.0 + def defaultCoreMemory: Double = 2.0 def defaultVmemFactor: Double = 1.4 var vmemFactor: Double = config("vmem_factor", default = defaultVmemFactor) @@ -64,23 +63,26 @@ trait CommandLineResources extends CommandLineFunction with Configurable { nCoresRequest = Option(threads) + /** The 1e retry does not yet upgrade the memory */ + val retryMultipler = if (retry > 1) retry - 1 else 0 + _coreMemory = config("core_memory", default = defaultCoreMemory).asDouble + - (0.5 * retry) + (0.5 * retryMultipler) if (config.contains("memory_limit")) memoryLimit = config("memory_limit") else memoryLimit = Some(_coreMemory * threads) if (config.contains("resident_limit")) residentLimit = config("resident_limit") - else residentLimit = Some((_coreMemory + (0.5 * retry)) * residentFactor) + else residentLimit = Some((_coreMemory + (0.5 * retryMultipler)) * residentFactor) - if (!config.contains("vmem")) vmem = Some((_coreMemory * (vmemFactor + (0.5 * retry))) + "G") + if (!config.contains("vmem")) vmem = Some((_coreMemory * (vmemFactor + (0.5 * retryMultipler))) + "G") jobName = configName + ":" + (if (firstOutput != null) firstOutput.getName else jobOutputFile) } override def setupRetry(): Unit = { super.setupRetry() if (vmem.isDefined) jobResourceRequests = jobResourceRequests.filterNot(_.contains("h_vmem=")) - logger.info("Auto raise memory on retry") + if (retry > 0) logger.info("Auto raise memory on retry") retry += 1 this.freeze() } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala index f60aeeca83c20ad810ad2183a8938137200a0ee3..81c992b4d5911af0b9a808e7750d15f1078c44e9 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala @@ -35,6 +35,9 @@ trait MultiSampleQScript extends SummaryQScript { /** Overrules config of qscript with default sample */ val config = new ConfigFunctions(defaultSample = sampleId) + /** Sample specific settings */ + def summarySettings: Map[String, Any] = Map() + /** Library class with basic functions build in */ abstract class AbstractLibrary(val libId: String) extends Summarizable { /** Overrules config of qscript with default sample and default library */ @@ -45,6 +48,9 @@ trait MultiSampleQScript extends SummaryQScript { qscript.addSummarizable(summarizable, name, Some(sampleId), Some(libId)) } + /** Library specific settings */ + def summarySettings: Map[String, Any] = Map() + /** Adds the library jobs */ final def addAndTrackJobs(): Unit = { if (nameRegex.findFirstIn(libId) == None) diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala index be479a8fa972fe896fb8d45cd74708944ba4ba64..bcdabfaeb1f95ed3bade6df058b3b9ae2b291f96 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala @@ -118,22 +118,21 @@ object Reference { /** * Raise an exception when given fasta file has no fai file * @param fastaFile Fasta file - * @throws IllegalArgumentException */ def requireFai(fastaFile: File): Unit = { val fai = new File(fastaFile.getAbsolutePath + ".fai") - require(fai.exists(), "Reference is missing a fai file") - require(IndexedFastaSequenceFile.canCreateIndexedFastaReader(fastaFile), - "Index of reference cannot be loaded, reference: " + fastaFile) + if (fai.exists()) { + if (!IndexedFastaSequenceFile.canCreateIndexedFastaReader(fastaFile)) + Logging.addError(s"Index of reference cannot be loaded, reference: $fastaFile") + } else Logging.addError("Reference is missing a fai file") } /** * Raise an exception when given fasta file has no dict file * @param fastaFile Fasta file - * @throws IllegalArgumentException */ def requireDict(fastaFile: File): Unit = { val dict = new File(fastaFile.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta") + ".dict") - require(dict.exists(), "Reference is missing a dict file") + if (!dict.exists()) Logging.addError("Reference is missing a dict file") } } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala index ab2f64546a7f79e8432ec1ac7ff0ab19427cccfc..cc45e751af475a705d0ed55beb82f97636e34ffb 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala @@ -96,40 +96,29 @@ trait SummaryQScript extends BiopetQScript { qscript => val writeSummary = new WriteSummary(this) def addChecksum(file: File): Unit = { - if (writeSummary.md5sum && !SummaryQScript.md5sumCache.contains(file)) { - val md5sum = new Md5sum(this) { - override def configName = "md5sum" - override def cmdLine: String = super.cmdLine + " || " + - required("echo") + required("error_on_capture " + input.toString) + " > " + required(output) - } - md5sum.input = file - md5sum.output = new File(file.getParentFile, file.getName + ".md5") - - // Need to not write a md5 file outside the outputDir - if (!file.getAbsolutePath.startsWith(outputDir.getAbsolutePath)) - md5sum.output = new File(outputDir, ".md5" + file.getAbsolutePath + ".md5") - - writeSummary.deps :+= md5sum.output - SummaryQScript.md5sumCache += file -> md5sum.output - add(md5sum) + if (writeSummary.md5sum) { + if (!SummaryQScript.md5sumCache.contains(file)) { + val md5sum = new Md5sum(this) { + override def configName = "md5sum" + + override def cmdLine: String = super.cmdLine + " || " + + required("echo") + required("error_on_capture " + input.toString) + " > " + required(output) + } + md5sum.input = file + md5sum.output = new File(file.getParentFile, file.getName + ".md5") + + // Need to not write a md5 file outside the outputDir + if (!file.getAbsolutePath.startsWith(outputDir.getAbsolutePath)) + md5sum.output = new File(outputDir, ".md5" + file.getAbsolutePath + ".md5") + + writeSummary.deps :+= md5sum.output + SummaryQScript.md5sumCache += file -> md5sum.output + add(md5sum) + } else writeSummary.deps :+= SummaryQScript.md5sumCache(file) } //TODO: add more checksums types } - for (inputFile <- inputFiles) { - inputFile.md5 match { - case Some(checksum) => { - val checkMd5 = new CheckChecksum - checkMd5.inputFile = inputFile.file - require(SummaryQScript.md5sumCache.contains(inputFile.file), "Md5 job is not executed, checksum file can't be found") - checkMd5.checksumFile = SummaryQScript.md5sumCache(inputFile.file) - checkMd5.checksum = checksum - add(checkMd5) - } - case _ => - } - } - for ((_, summarizableList) <- summarizables; summarizable <- summarizableList) { summarizable match { case f: BiopetCommandLineFunction => f.beforeGraph() @@ -146,6 +135,21 @@ trait SummaryQScript extends BiopetQScript { qscript => } } + for (inputFile <- inputFiles) { + inputFile.md5 match { + case Some(checksum) => { + val checkMd5 = new CheckChecksum + checkMd5.inputFile = inputFile.file + require(SummaryQScript.md5sumCache.contains(inputFile.file), + s"Md5 job is not executed, checksum file can't be found for: ${inputFile.file}") + checkMd5.checksumFile = SummaryQScript.md5sumCache(inputFile.file) + checkMd5.checksum = checksum + add(checkMd5) + } + case _ => + } + } + for ((_, file) <- this.summaryFiles) addChecksum(file) diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala index 7b8f34ecbb8108d53c342df2f4112df973f702c3..02c860fdb1719c8c4635d467bea752b74745f9b2 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala @@ -18,9 +18,9 @@ package nl.lumc.sasc.biopet.core.summary import java.io.{ File, PrintWriter } import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, BiopetJavaCommandLineFunction, SampleLibraryTag } +import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.utils.ConfigUtils -import nl.lumc.sasc.biopet.{ LastCommitHash, Version } +import nl.lumc.sasc.biopet.LastCommitHash import org.broadinstitute.gatk.queue.function.{ InProcessFunction, QFunction } import org.broadinstitute.gatk.utils.commandline.{ Input, Output } @@ -90,7 +90,7 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config } ( - qscript.functions.flatMap(fetchVersion(_)) ++ + qscript.functions.flatMap(fetchVersion) ++ qscript.functions .flatMap { case f: BiopetCommandLineFunction => f.pipesJobs @@ -99,13 +99,29 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config ).toMap } - val map = Map(qscript.summaryName -> ((if (settings.isEmpty) Map[String, Any]() else Map("settings" -> settings)) ++ - (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map("pipeline" -> files))) ++ - (if (executables.isEmpty) Map[String, Any]() else Map("executables" -> executables.toMap)))) + val map = Map(qscript.summaryName -> Map( + "settings" -> settings, + "files" -> Map("pipeline" -> files), + "executables" -> executables.toMap) + ) qscript match { case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId) - case _ => map + case q: MultiSampleQScript => + ConfigUtils.mergeMaps( + Map("samples" -> q.samples.map { + case (sampleName, sample) => + sampleName -> Map( + qscript.summaryName -> Map("settings" -> sample.summarySettings), + "libraries" -> sample.libraries.map { + case (libName, lib) => + libName -> Map( + qscript.summaryName -> Map("settings" -> lib.summarySettings) + ) + } + ) + }), map) + case _ => map } } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala index 4bffa97fe173c113697803257e7cd1f206e97027..33522732f42599390238d294c8e6dfb1297f3829 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala @@ -68,7 +68,7 @@ abstract class Gatk extends BiopetJavaCommandLineFunction with Reference with Ve required("-R", reference) + optional("-K", gatkKey) + optional("-et", et) + - repeat("-I", intervals) + + repeat("-L", intervals) + repeat("-XL", excludeIntervals) + repeat("-ped", pedigree) } \ No newline at end of file diff --git a/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp b/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp index 5721d22515ced92c9102565df158c20be80a2807..8c850934608710e0a343def57e0392a5a799b164 100644 --- a/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp +++ b/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp @@ -11,10 +11,13 @@ <tr><th>Output directory</th><td>${summary.getValue("meta", "output_dir")}</td></tr> <tr> <th>Variantcallers</th> - <td>${summary.getValue("shivavariantcalling", "settings", "variantcallers").get.asInstanceOf[List[String]].mkString(", ")}</td> + <td>${summary.getValue("shivavariantcalling", "settings", "variantcallers").getOrElse(List("None")).asInstanceOf[List[String]].mkString(", ")}</td> </tr> <tr><th>Reference</th><td>${summary.getValue("shiva", "settings", "reference", "species")} - ${summary.getValue("shiva", "settings", "reference", "name")}</td></tr> <tr><th>Number of samples</th><td>${summary.samples.size}</td></tr> + <tr><th>Annotation</th><td>${summary.getValue("shiva", "settings", "annotation")}</td></tr> + <tr><th>Multisample variantcalling</th><td>${summary.getValue("shiva", "settings", "multisample_variantcalling")}</td></tr> + <tr><th>Sv calling</th><td>${summary.getValue("shiva", "settings", "sv_calling")}</td></tr> </tbody> </table> <br/> diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala index 8749071dfd151a7da42e1f3d01c4e2474eba1b7e..ca3bf3016d3e00b970574e0cec4426e6b10a6bc5 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala @@ -36,6 +36,11 @@ class ShivaReport(val root: Configurable) extends ReportBuilderExtension { /** Object for report generation for Shiva pipeline */ object ShivaReport extends MultisampleReportBuilder { + def variantcallingExecuted = summary.getValue("shiva", "settings", "multisample_variantcalling") match { + case Some(true) => true + case _ => false + } + override def extFiles = super.extFiles ++ List("js/gears.js") .map(x => ExtFile("/nl/lumc/sasc/biopet/pipelines/gears/report/ext/" + x, x)) @@ -54,21 +59,22 @@ object ShivaReport extends MultisampleReportBuilder { ), Map()) ), List( - "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp"), - "Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp", - Map("showPlot" -> true, "showTable" -> false)), - "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", + "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp")) ++ + (if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp", + Map("showPlot" -> true, "showTable" -> false))) + else Nil) ++ + List("Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false) ), - "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", - Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)), - "Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp", - Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp", - Map("showPlot" -> true, "showTable" -> false)), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp", - Map("showPlot" -> true, "showTable" -> false)) - ), + "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", + Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)), + "Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp", + Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)), + "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp", + Map("showPlot" -> true, "showTable" -> false)), + "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp", + Map("showPlot" -> true, "showTable" -> false)) + ), pageArgs ) } @@ -117,9 +123,10 @@ object ShivaReport extends MultisampleReportBuilder { "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"), "Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)), "Preprocessed bam files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", - Map("pipelineName" -> "shiva", "fileTag" -> "preProcessBam")), - "VCF files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/outputVcfFiles.ssp", Map("sampleId" -> None)) - ), Map()) + Map("pipelineName" -> "shiva", "fileTag" -> "preProcessBam"))) ++ + (if (variantcallingExecuted) List("VCF files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/outputVcfFiles.ssp", + Map("sampleId" -> None))) + else Nil), Map()) /** Single sample page */ def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = { @@ -130,28 +137,30 @@ object ShivaReport extends MultisampleReportBuilder { ), List( "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", if (summary.libraries(sampleId).size > 1) Map("showPlot" -> true) else Map()), - "Preprocessing" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true)), - "Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp"), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") - ), args) + "Preprocessing" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true))) ++ + (if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp")) else Nil) ++ + List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), + "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") + ), args) } /** Library page */ def libraryPage(sampleId: String, libId: String, args: Map[String, Any]): ReportPage = { - def krakenExecuted = summary.getValue(Some(sampleId), Some(libId), "gears", "stats", "krakenreport").isDefined + val flexiprepExecuted = summary.getLibraryValue(sampleId, libId, "flexiprep").isDefined + val krakenExecuted = summary.getValue(Some(sampleId), Some(libId), "gears", "stats", "krakenreport").isDefined - ReportPage(List( - "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId)), - "QC" -> FlexiprepReport.flexiprepPage - ) ::: (if (krakenExecuted) List("Gears - Metagenomics" -> ReportPage(List(), List( - "Sunburst analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp" - )), Map())) - else Nil), List( - "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") - ), args) + ReportPage( + "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId)) :: + (if (flexiprepExecuted) List("QC" -> FlexiprepReport.flexiprepPage) else Nil + ) ::: (if (krakenExecuted) List("Gears - Metagenomics" -> ReportPage(List(), List( + "Sunburst analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp" + )), Map())) + else Nil), "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp") :: + (if (flexiprepExecuted) List( + "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), + "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") + ) + else Nil), args) } /** Name of the report */ diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala index b628744d325184b46b176c1b9b9871208b72a170..19c11ca883cd6b8597ec41e3be1ed4bdb0e33c06 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala @@ -18,7 +18,6 @@ package nl.lumc.sasc.biopet.pipelines.shiva import java.io.File import htsjdk.samtools.SamReaderFactory -import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference } import nl.lumc.sasc.biopet.extensions.Ln import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq } @@ -33,7 +32,7 @@ import scala.collection.JavaConversions._ * * Created by pjvan_thof on 2/26/15. */ -trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { +trait ShivaTrait extends MultiSampleQScript with Reference { qscript => /** Executed before running the script */ @@ -85,15 +84,20 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { /** Method to make a library */ def makeLibrary(id: String) = new Library(id) + /** Sample specific settings */ + override def summarySettings = Map("single_sample_variantcalling" -> variantcalling.isDefined) + /** Class to generate jobs for a library */ class Library(libId: String) extends AbstractLibrary(libId) { /** Library specific files to add to the summary */ def summaryFiles: Map[String, File] = { - (bamFile, preProcessBam) match { + ((bamFile, preProcessBam) match { case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb) - case (Some(b), _) => Map("bamFile" -> b) + case (Some(b), _) => Map("bamFile" -> b, "preProcessBam" -> b) case _ => Map() - } + }) ++ (inputR1.map("input_R1" -> _) :: + inputR2.map("input_R2" -> _) :: + inputBam.map("input_bam" -> _) :: Nil).flatten.toMap } /** Library specific stats to add to summary */ @@ -102,6 +106,9 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { /** Method to execute library preprocess */ def preProcess(input: File): Option[File] = None + /** Library specific settings */ + override def summarySettings = Map("library_variantcalling" -> variantcalling.isDefined) + /** Method to make the mapping submodule */ def makeMapping = { val mapping = new Mapping(qscript) @@ -112,8 +119,12 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile)) } + lazy val inputR1: Option[File] = config("R1") + lazy val inputR2: Option[File] = config("R2") + lazy val inputBam: Option[File] = if (inputR1.isEmpty) config("bam") else None + lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) = - (config.contains("R1"), config.contains("bam")) match { + (inputR1.isDefined, inputBam.isDefined) match { case (true, _) => makeMapping // Default starting from fastq files case (false, true) => // Starting from bam file config("bam_to_fastq", default = false).asBoolean match { @@ -132,18 +143,18 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { /** This will add jobs for this library */ def addJobs(): Unit = { - (config.contains("R1"), config.contains("bam")) match { + (inputR1.isDefined, inputBam.isDefined) match { case (true, _) => mapping.foreach(mapping => { - mapping.input_R1 = config("R1") - mapping.input_R2 = config("R2") + mapping.input_R1 = inputR1.get + mapping.input_R2 = inputR2 inputFiles :+= new InputFile(mapping.input_R1, config("R1_md5")) mapping.input_R2.foreach(inputFiles :+= new InputFile(_, config("R2_md5"))) }) case (false, true) => { - inputFiles :+= new InputFile(config("bam"), config("bam_md5")) + inputFiles :+= new InputFile(inputBam.get, config("bam_md5")) config("bam_to_fastq", default = false).asBoolean match { case true => - val samToFastq = SamToFastq(qscript, config("bam"), + val samToFastq = SamToFastq(qscript, inputBam.get, new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"), new File(libDir, sampleId + "-" + libId + ".R2.fq.gz")) samToFastq.isIntermediate = true @@ -153,7 +164,7 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { mapping.input_R2 = Some(samToFastq.fastqR2) }) case false => - val inputSam = SamReaderFactory.makeDefault.open(config("bam")) + val inputSam = SamReaderFactory.makeDefault.open(inputBam.get) val readGroups = inputSam.getFileHeader.getReadGroups val readGroupOke = readGroups.forall(readGroup => { @@ -165,25 +176,37 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { if (!readGroupOke) { if (config("correct_readgroups", default = false).asBoolean) { - logger.info("Correcting readgroups, file:" + config("bam")) - val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get) + logger.info("Correcting readgroups, file:" + inputBam.get) + val aorrg = AddOrReplaceReadGroups(qscript, inputBam.get, bamFile.get) aorrg.RGID = sampleId + "-" + libId aorrg.RGLB = libId aorrg.RGSM = sampleId + aorrg.RGPL = "unknown" + aorrg.RGPU = "na" aorrg.isIntermediate = true qscript.add(aorrg) } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile + "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this") } else { - val oldBamFile: File = config("bam") + val oldBamFile: File = inputBam.get val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai") + val newIndex: File = new File(libDir, bamFile.get.getName.stripSuffix(".bam") + ".bai") val baiLn = Ln(qscript, oldIndex, newIndex) add(baiLn) val bamLn = Ln(qscript, oldBamFile, bamFile.get) bamLn.deps :+= baiLn.output add(bamLn) + + val bamMetrics = new BamMetrics(qscript) + bamMetrics.sampleId = Some(sampleId) + bamMetrics.libId = Some(libId) + bamMetrics.inputBam = bamFile.get + bamMetrics.outputDir = new File(libDir, "metrics") + bamMetrics.init() + bamMetrics.biopetScript() + addAll(bamMetrics.functions) + addSummaryQScript(bamMetrics) } } } @@ -215,9 +238,9 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = { if (input == Nil) None else if (input.tail == Nil) { - val bamFile = new File(sampleDir, input.head.getName) + val bamFile = new File(sampleDir, s"$sampleId.bam") val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = new File(sampleDir, input.head.getName.stripSuffix(".bam") + ".bai") + val newIndex: File = new File(sampleDir, s"$sampleId.bai") val baiLn = Ln(qscript, oldIndex, newIndex) add(baiLn) @@ -263,11 +286,6 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { addAll(bamMetrics.functions) addSummaryQScript(bamMetrics) - val oldIndex: File = new File(bam.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = new File(bam + ".bai") - val baiLn = Ln(qscript, oldIndex, newIndex) - add(baiLn) - variantcalling.foreach(vc => { vc.sampleId = Some(sampleId) vc.outputDir = new File(sampleDir, "variantcalling") @@ -281,7 +299,7 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { } } - lazy val variantCalling = if (config("multisample_variantcalling", default = true).asBoolean) { + lazy val multisampleVariantCalling = if (config("multisample_variantcalling", default = true).asBoolean) { Some(makeVariantcalling(multisample = true)) } else None @@ -289,9 +307,14 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { Some(new ShivaSvCalling(this)) } else None + lazy val annotation = if (multisampleVariantCalling.isDefined && + config("annotation", default = false).asBoolean) { + Some(new Toucan(this)) + } else None + /** This will add the mutisample variantcalling */ def addMultiSampleJobs(): Unit = { - variantCalling.foreach(vc => { + multisampleVariantCalling.foreach(vc => { vc.outputDir = new File(outputDir, "variantcalling") vc.inputBams = samples.flatMap(_._2.preProcessBam).toList vc.init() @@ -299,8 +322,7 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { addAll(vc.functions) addSummaryQScript(vc) - if (config("annotation", default = false).asBoolean) { - val toucan = new Toucan(this) + annotation.foreach { toucan => toucan.outputDir = new File(outputDir, "annotation") toucan.inputVCF = vc.finalFile toucan.init() @@ -331,7 +353,10 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { Map( "reference" -> referenceSummary, "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")), - "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")) + "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")), + "annotation" -> annotation.isDefined, + "multisample_variantcalling" -> multisampleVariantCalling.isDefined, + "sv_calling" -> svCalling.isDefined ) }