From a0959af060a36a5027e9073644d1d8a63e0372d3 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof Date: Wed, 27 Apr 2016 19:26:45 +0200 Subject: [PATCH] Move variantcallers to public --- .../sasc/biopet/pipelines/gatk/Basty.scala | 27 --- .../sasc/biopet/pipelines/gatk/Shiva.scala | 2 +- .../pipelines/gatk/ShivaVariantcalling.scala | 35 ---- .../gatk/ShivaVariantcallingTest.scala | 1 + .../biopet/BiopetExecutableProtected.scala | 4 +- .../sasc/biopet/pipelines/basty/Basty.scala | 186 ++++++++++++++++- .../biopet/pipelines/basty/BastyTrait.scala | 196 ------------------ .../sasc/biopet/BiopetExecutablePublic.scala | 9 +- .../gatk/variantcallers/HaplotypeCaller.scala | 0 .../HaplotypeCallerAllele.scala | 0 .../variantcallers/HaplotypeCallerGvcf.scala | 0 .../variantcallers/UnifiedGenotyper.scala | 0 .../UnifiedGenotyperAllele.scala | 0 .../biopet/pipelines/shiva/ShivaTrait.scala | 2 +- .../pipelines/shiva/ShivaVariantcalling.scala | 167 ++++++++++++++- .../shiva/ShivaVariantcallingTrait.scala | 180 ---------------- 16 files changed, 350 insertions(+), 459 deletions(-) delete mode 100644 protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Basty.scala delete mode 100644 protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala delete mode 100644 public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala rename {protected/biopet-gatk-pipelines => public/shiva}/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala (100%) rename {protected/biopet-gatk-pipelines => public/shiva}/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala (100%) rename {protected/biopet-gatk-pipelines => public/shiva}/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala (100%) rename {protected/biopet-gatk-pipelines => public/shiva}/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala (100%) rename {protected/biopet-gatk-pipelines => public/shiva}/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala (100%) delete mode 100644 public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Basty.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Basty.scala deleted file mode 100644 index ecf4ccf90..000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Basty.scala +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.pipelines.basty.BastyTrait -import org.broadinstitute.gatk.queue.QScript - -/** - * Basty pipeline including GATK steps - * - * Created by pjvan_thof on 3/4/15. - */ -class Basty(val root: Configurable) extends QScript with BastyTrait { - qscript => - def this() = this(null) - - override def variantcallers = List("unifiedgenotyper") - - override lazy val shiva = new Shiva(qscript) -} - -object Basty extends PipelineCommand \ No newline at end of file diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala index 6e1769d8a..1274a1dd2 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala @@ -7,7 +7,7 @@ package nl.lumc.sasc.biopet.pipelines.gatk import nl.lumc.sasc.biopet.core.PipelineCommand import nl.lumc.sasc.biopet.extensions.gatk.broad._ -import nl.lumc.sasc.biopet.pipelines.shiva.ShivaTrait +import nl.lumc.sasc.biopet.pipelines.shiva.{ ShivaTrait, ShivaVariantcalling } import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala deleted file mode 100644 index edbc633f3..000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.pipelines.gatk.variantcallers._ -import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcallingTrait -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.QScript - -/** - * ShivaVariantcalling with GATK variantcallers - * - * Created by pjvan_thof on 2/26/15. - */ -class ShivaVariantcalling(val root: Configurable) extends QScript with ShivaVariantcallingTrait { - qscript => - def this() = this(null) - - /** Will generate all available variantcallers */ - override def callersList = { - new HaplotypeCallerGvcf(this) :: - new HaplotypeCallerAllele(this) :: - new UnifiedGenotyperAllele(this) :: - new UnifiedGenotyper(this) :: - new HaplotypeCaller(this) :: - super.callersList - } -} - -/** object to add default main method to pipeline */ -object ShivaVariantcalling extends PipelineCommand \ No newline at end of file diff --git a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala index b26b2fb6e..e8444075d 100644 --- a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala +++ b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala @@ -12,6 +12,7 @@ import nl.lumc.sasc.biopet.utils.config.Config import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants import nl.lumc.sasc.biopet.extensions.gatk.broad.{ HaplotypeCaller, UnifiedGenotyper } import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats } +import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling import nl.lumc.sasc.biopet.utils.ConfigUtils import org.apache.commons.io.FileUtils import org.broadinstitute.gatk.queue.QSettings diff --git a/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableProtected.scala b/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableProtected.scala index 9155e7dba..35295eefc 100644 --- a/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableProtected.scala +++ b/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableProtected.scala @@ -9,9 +9,7 @@ import nl.lumc.sasc.biopet.utils.{ BiopetExecutable, MainCommand } object BiopetExecutableProtected extends BiopetExecutable { def pipelines: List[MainCommand] = BiopetExecutablePublic.publicPipelines ::: List( - nl.lumc.sasc.biopet.pipelines.gatk.Shiva, - nl.lumc.sasc.biopet.pipelines.gatk.ShivaVariantcalling, - nl.lumc.sasc.biopet.pipelines.gatk.Basty) + nl.lumc.sasc.biopet.pipelines.gatk.Shiva) def tools = BiopetExecutablePublic.tools } \ No newline at end of file diff --git a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala index 8476d1bbc..9d199cf0e 100644 --- a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala +++ b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala @@ -13,19 +13,189 @@ * license; For commercial users or users who do not want to follow the AGPL * license, please contact us to obtain a separate license. */ +/** + * Due to the license issue with GATK, this part of Biopet can only be used inside the + * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions + * on how to use this protected part of biopet or contact us at sasc@lumc.nl + */ package nl.lumc.sasc.biopet.pipelines.basty -import nl.lumc.sasc.biopet.core.PipelineCommand +import java.io.File + +import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand } +import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins } +import nl.lumc.sasc.biopet.pipelines.shiva.{ Shiva, ShivaTrait } +import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta +import nl.lumc.sasc.biopet.utils.ConfigUtils import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript -/** - * Basty implementation without GATK parts - * - * Created by pjvan_thof on 3/4/15. - */ -class Basty(val root: Configurable) extends QScript with BastyTrait { +class Basty(val root: Configurable) extends QScript with MultiSampleQScript { + qscript => + def this() = this(null) + + case class FastaOutput(variants: File, consensus: File, consensusVariants: File) + + def variantcallers = List("unifiedgenotyper") + + override def defaults = Map( + "ploidy" -> 1, + "variantcallers" -> variantcallers + ) + + lazy val shiva: ShivaTrait = new Shiva(qscript) + + def summaryFile: File = new File(outputDir, "Basty.summary.json") + + //TODO: Add summary + def summaryFiles: Map[String, File] = Map() + + //TODO: Add summary + def summarySettings: Map[String, Any] = Map() + + def makeSample(id: String) = new Sample(id) + class Sample(sampleId: String) extends AbstractSample(sampleId) { + //TODO: Add summary + def summaryFiles: Map[String, File] = Map() + + //TODO: Add summary + def summaryStats: Map[String, Any] = Map() + + def makeLibrary(id: String) = new Library(id) + class Library(libId: String) extends AbstractLibrary(libId) { + //TODO: Add summary + def summaryFiles: Map[String, File] = Map() + + //TODO: Add summary + def summaryStats: Map[String, Any] = Map() + + protected def addJobs(): Unit = {} + } + + var output: FastaOutput = _ + var outputSnps: FastaOutput = _ + + protected def addJobs(): Unit = { + addPerLibJobs() + output = addGenerateFasta(sampleId, sampleDir) + outputSnps = addGenerateFasta(sampleId, sampleDir, snpsOnly = true) + } + } + + def init() { + shiva.outputDir = outputDir + shiva.init() + } + + def biopetScript() { + shiva.biopetScript() + addAll(shiva.functions) + addSummaryQScript(shiva) + + inputFiles :::= shiva.inputFiles + + addSamplesJobs() + } + + def addMultiSampleJobs(): Unit = { + val refVariants = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference") + val refVariantSnps = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference", snpsOnly = true) + + val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList, + new File(outputDir, "fastas" + File.separator + "variant.fasta")) + add(catVariants) + val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList, + new File(outputDir, "fastas" + File.separator + "variant.snps_only.fasta")) + add(catVariantsSnps) + + val catConsensus = Cat(this, refVariants.consensus :: samples.map(_._2.output.consensus).toList, + new File(outputDir, "fastas" + File.separator + "consensus.fasta")) + add(catConsensus) + val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList, + new File(outputDir, "fastas" + File.separator + "consensus.snps_only.fasta")) + add(catConsensusSnps) + + val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList, + new File(outputDir, "fastas" + File.separator + "consensus.variant.fasta")) + add(catConsensusVariants) + val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList, + new File(outputDir, "fastas" + File.separator + "consensus.variant.snps_only.fasta")) + add(catConsensusVariantsSnps) + + val seed: Int = config("seed", default = 12345) + def addTreeJobs(variants: File, concensusVariants: File, outputDir: File, outputName: String) { + val dirSufixRaxml = new File(outputDir, "raxml") + val dirSufixGubbins = new File(outputDir, "gubbins") + + val raxmlMl = new Raxml(this) + raxmlMl.input = variants + raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX") + raxmlMl.p = Some(seed) + raxmlMl.n = outputName + "_ml" + raxmlMl.w = dirSufixRaxml + raxmlMl.N = config("ml_runs", default = 20, namespace = "raxml") + add(raxmlMl) + + val r = new scala.util.Random(seed) + val numBoot = config("boot_runs", default = 100, namespace = "raxml").asInt + val bootList = for (t <- 0 until numBoot) yield { + val raxmlBoot = new Raxml(this) + raxmlBoot.input = variants + raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX") + raxmlBoot.p = Some(seed) + raxmlBoot.b = Some(math.abs(r.nextInt())) + raxmlBoot.w = dirSufixRaxml + raxmlBoot.N = Some(1) + raxmlBoot.n = outputName + "_boot_" + t + add(raxmlBoot) + raxmlBoot.getBootstrapFile.get + } + + val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list")) + add(cat) + + val raxmlBi = new Raxml(this) + raxmlBi.input = concensusVariants + raxmlBi.t = raxmlMl.getBestTreeFile + raxmlBi.z = Some(cat.output) + raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX") + raxmlBi.p = Some(seed) + raxmlBi.f = "b" + raxmlBi.n = outputName + "_bi" + raxmlBi.w = dirSufixRaxml + add(raxmlBi) + + val gubbins = new RunGubbins(this) + gubbins.fastafile = concensusVariants + gubbins.startingTree = raxmlBi.getBipartitionsFile + gubbins.outputDirectory = dirSufixGubbins + add(gubbins) + } + + addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output, + new File(outputDir, "trees" + File.separator + "snps_only"), "snps_only") + addTreeJobs(catVariants.output, catConsensusVariants.output, + new File(outputDir, "trees" + File.separator + "snps_indels"), "snps_indels") + + } + + def addGenerateFasta(sampleName: String, outputDir: File, outputName: String = null, + snpsOnly: Boolean = false): FastaOutput = { + val bastyGenerateFasta = new BastyGenerateFasta(this) + bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName + bastyGenerateFasta.inputVcf = shiva.multisampleVariantCalling.get.finalFile + if (shiva.samples.contains(sampleName)) { + bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get + } + bastyGenerateFasta.outputVariants = new File(outputDir, bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta") + bastyGenerateFasta.outputConsensus = new File(outputDir, bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta") + bastyGenerateFasta.outputConsensusVariants = new File(outputDir, bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta") + bastyGenerateFasta.sampleName = sampleName + bastyGenerateFasta.snpsOnly = snpsOnly + qscript.add(bastyGenerateFasta) + FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants) + } } -object Basty extends PipelineCommand \ No newline at end of file +object Basty extends PipelineCommand diff --git a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala deleted file mode 100644 index 3202d4f37..000000000 --- a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala +++ /dev/null @@ -1,196 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.basty - -import java.io.File - -import nl.lumc.sasc.biopet.core.MultiSampleQScript -import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins } -import nl.lumc.sasc.biopet.pipelines.shiva.{ Shiva, ShivaTrait } -import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta -import nl.lumc.sasc.biopet.utils.ConfigUtils -import org.broadinstitute.gatk.queue.QScript - -trait BastyTrait extends MultiSampleQScript { - qscript: QScript => - - case class FastaOutput(variants: File, consensus: File, consensusVariants: File) - - def variantcallers = List("freebayes") - - override def defaults = Map( - "ploidy" -> 1, - "variantcallers" -> variantcallers - ) - - lazy val shiva: ShivaTrait = new Shiva(qscript) - - def summaryFile: File = new File(outputDir, "Basty.summary.json") - - //TODO: Add summary - def summaryFiles: Map[String, File] = Map() - - //TODO: Add summary - def summarySettings: Map[String, Any] = Map() - - def makeSample(id: String) = new Sample(id) - class Sample(sampleId: String) extends AbstractSample(sampleId) { - //TODO: Add summary - def summaryFiles: Map[String, File] = Map() - - //TODO: Add summary - def summaryStats: Map[String, Any] = Map() - - def makeLibrary(id: String) = new Library(id) - class Library(libId: String) extends AbstractLibrary(libId) { - //TODO: Add summary - def summaryFiles: Map[String, File] = Map() - - //TODO: Add summary - def summaryStats: Map[String, Any] = Map() - - protected def addJobs(): Unit = {} - } - - var output: FastaOutput = _ - var outputSnps: FastaOutput = _ - - protected def addJobs(): Unit = { - addPerLibJobs() - output = addGenerateFasta(sampleId, sampleDir) - outputSnps = addGenerateFasta(sampleId, sampleDir, snpsOnly = true) - } - } - - def init() { - shiva.outputDir = outputDir - shiva.init() - } - - def biopetScript() { - shiva.biopetScript() - addAll(shiva.functions) - addSummaryQScript(shiva) - - inputFiles :::= shiva.inputFiles - - addSamplesJobs() - } - - def addMultiSampleJobs(): Unit = { - val refVariants = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference") - val refVariantSnps = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference", snpsOnly = true) - - val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList, - new File(outputDir, "fastas" + File.separator + "variant.fasta")) - add(catVariants) - val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList, - new File(outputDir, "fastas" + File.separator + "variant.snps_only.fasta")) - add(catVariantsSnps) - - val catConsensus = Cat(this, refVariants.consensus :: samples.map(_._2.output.consensus).toList, - new File(outputDir, "fastas" + File.separator + "consensus.fasta")) - add(catConsensus) - val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList, - new File(outputDir, "fastas" + File.separator + "consensus.snps_only.fasta")) - add(catConsensusSnps) - - val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList, - new File(outputDir, "fastas" + File.separator + "consensus.variant.fasta")) - add(catConsensusVariants) - val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList, - new File(outputDir, "fastas" + File.separator + "consensus.variant.snps_only.fasta")) - add(catConsensusVariantsSnps) - - val seed: Int = config("seed", default = 12345) - def addTreeJobs(variants: File, concensusVariants: File, outputDir: File, outputName: String) { - val dirSufixRaxml = new File(outputDir, "raxml") - val dirSufixGubbins = new File(outputDir, "gubbins") - - val raxmlMl = new Raxml(this) - raxmlMl.input = variants - raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX") - raxmlMl.p = Some(seed) - raxmlMl.n = outputName + "_ml" - raxmlMl.w = dirSufixRaxml - raxmlMl.N = config("ml_runs", default = 20, namespace = "raxml") - add(raxmlMl) - - val r = new scala.util.Random(seed) - val numBoot = config("boot_runs", default = 100, namespace = "raxml").asInt - val bootList = for (t <- 0 until numBoot) yield { - val raxmlBoot = new Raxml(this) - raxmlBoot.input = variants - raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX") - raxmlBoot.p = Some(seed) - raxmlBoot.b = Some(math.abs(r.nextInt())) - raxmlBoot.w = dirSufixRaxml - raxmlBoot.N = Some(1) - raxmlBoot.n = outputName + "_boot_" + t - add(raxmlBoot) - raxmlBoot.getBootstrapFile.get - } - - val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list")) - add(cat) - - val raxmlBi = new Raxml(this) - raxmlBi.input = concensusVariants - raxmlBi.t = raxmlMl.getBestTreeFile - raxmlBi.z = Some(cat.output) - raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX") - raxmlBi.p = Some(seed) - raxmlBi.f = "b" - raxmlBi.n = outputName + "_bi" - raxmlBi.w = dirSufixRaxml - add(raxmlBi) - - val gubbins = new RunGubbins(this) - gubbins.fastafile = concensusVariants - gubbins.startingTree = raxmlBi.getBipartitionsFile - gubbins.outputDirectory = dirSufixGubbins - add(gubbins) - } - - addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output, - new File(outputDir, "trees" + File.separator + "snps_only"), "snps_only") - addTreeJobs(catVariants.output, catConsensusVariants.output, - new File(outputDir, "trees" + File.separator + "snps_indels"), "snps_indels") - - } - - def addGenerateFasta(sampleName: String, outputDir: File, outputName: String = null, - snpsOnly: Boolean = false): FastaOutput = { - val bastyGenerateFasta = new BastyGenerateFasta(this) - bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName - bastyGenerateFasta.inputVcf = shiva.multisampleVariantCalling.get.finalFile - if (shiva.samples.contains(sampleName)) { - bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get - } - bastyGenerateFasta.outputVariants = new File(outputDir, bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta") - bastyGenerateFasta.outputConsensus = new File(outputDir, bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta") - bastyGenerateFasta.outputConsensusVariants = new File(outputDir, bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta") - bastyGenerateFasta.sampleName = sampleName - bastyGenerateFasta.snpsOnly = snpsOnly - qscript.add(bastyGenerateFasta) - FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants) - } -} diff --git a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala index 8b21fd806..e0c4c29cb 100644 --- a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala +++ b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala @@ -15,7 +15,6 @@ */ package nl.lumc.sasc.biopet -import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling import nl.lumc.sasc.biopet.utils.{ BiopetExecutable, MainCommand } object BiopetExecutablePublic extends BiopetExecutable { @@ -33,13 +32,13 @@ object BiopetExecutablePublic extends BiopetExecutable { nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling, nl.lumc.sasc.biopet.pipelines.gears.GearsSingle, nl.lumc.sasc.biopet.pipelines.gears.Gears, - nl.lumc.sasc.biopet.pipelines.gwastest.GwasTest + nl.lumc.sasc.biopet.pipelines.gwastest.GwasTest, + nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling, + nl.lumc.sasc.biopet.pipelines.basty.Basty ) def pipelines: List[MainCommand] = List( - nl.lumc.sasc.biopet.pipelines.shiva.Shiva, - ShivaVariantcalling, - nl.lumc.sasc.biopet.pipelines.basty.Basty + nl.lumc.sasc.biopet.pipelines.shiva.Shiva ) ::: publicPipelines def tools: List[MainCommand] = BiopetToolsExecutable.tools diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala similarity index 100% rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala rename to public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala similarity index 100% rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala rename to public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala similarity index 100% rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala rename to public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala similarity index 100% rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala rename to public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala similarity index 100% rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala rename to public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala index 5014d33a6..630bdb4d7 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala @@ -43,7 +43,7 @@ trait ShivaTrait extends MultisampleMappingTrait with Reference with TargetRegio ) /** Method to make the variantcalling namespace of shiva */ - def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait with QScript = { + def makeVariantcalling(multisample: Boolean = false): ShivaVariantcalling with QScript = { if (multisample) new ShivaVariantcalling(qscript) { override def namePrefix = "multisample" override def configNamespace: String = "shivavariantcalling" diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala index d075619c1..e2a325ed8 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala @@ -15,17 +15,178 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva -import nl.lumc.sasc.biopet.core.PipelineCommand +import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag } +import nl.lumc.sasc.biopet.core.summary.SummaryQScript +import nl.lumc.sasc.biopet.extensions.Tabix +import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance } +import nl.lumc.sasc.biopet.extensions.tools.VcfStats +import nl.lumc.sasc.biopet.extensions.vt.{ VtDecompose, VtNormalize } +import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions +import nl.lumc.sasc.biopet.pipelines.gatk.variantcallers._ +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.{ VarscanCnsSingleSample, _ } +import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging } import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript /** - * Implementation of ShivaVariantcalling without GATK variantcallers + * Implementation of ShivaVariantcalling * * Created by pjvan_thof on 2/26/15. */ -class ShivaVariantcalling(val root: Configurable) extends QScript with ShivaVariantcallingTrait { +class ShivaVariantcalling(val root: Configurable) extends QScript + with SummaryQScript + with SampleLibraryTag + with Reference + with TargetRegions { + qscript => + def this() = this(null) + + @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true) + protected var inputBamsArg: List[File] = Nil + + var inputBams: Map[String, File] = Map() + + /** Executed before script */ + def init(): Unit = { + if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg) + } + + var referenceVcf: Option[File] = config("reference_vcf") + + var referenceVcfRegions: Option[File] = config("reference_vcf_regions") + + /** Name prefix, can override this methods if neeeded */ + def namePrefix: String = { + (sampleId, libId) match { + case (Some(s), Some(l)) => s + "-" + l + case (Some(s), _) => s + case _ => config("name_prefix") + } + } + + override def defaults = Map("bcftoolscall" -> Map("f" -> List("GQ"))) + + /** Final merged output files of all variantcaller modes */ + def finalFile = new File(outputDir, namePrefix + ".final.vcf.gz") + + /** Variantcallers requested by the config */ + protected val configCallers: Set[String] = config("variantcallers") + + protected val callers: List[Variantcaller] = { + (for (name <- configCallers) yield { + if (!callersList.exists(_.name == name)) + Logging.addError(s"variantcaller '$name' does not exist, possible to use: " + callersList.map(_.name).mkString(", ")) + callersList.find(_.name == name) + }).flatten.toList.sortBy(_.prio) + } + + /** This will add jobs for this pipeline */ + def biopetScript(): Unit = { + require(inputBams.nonEmpty, "No input bams found") + require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", ")) + + val cv = new CombineVariants(qscript) + cv.outputFile = finalFile + cv.setKey = "VariantCaller" + cv.genotypeMergeOptions = Some("PRIORITIZE") + cv.rodPriorityList = callers.map(_.name).mkString(",") + for (caller <- callers) { + caller.inputBams = inputBams + caller.namePrefix = namePrefix + caller.outputDir = new File(outputDir, caller.name) + add(caller) + addStats(caller.outputFile, caller.name) + val normalize: Boolean = config("execute_vt_normalize", default = false, namespace = caller.configNamespace) + val decompose: Boolean = config("execute_vt_decompose", default = false, namespace = caller.configNamespace) + + val vtNormalize = new VtNormalize(this) + vtNormalize.inputVcf = caller.outputFile + val vtDecompose = new VtDecompose(this) + + if (normalize && decompose) { + vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz") + vtNormalize.isIntermediate = true + add(vtNormalize, Tabix(this, vtNormalize.outputVcf)) + vtDecompose.inputVcf = vtNormalize.outputVcf + vtDecompose.outputVcf = swapExt(caller.outputDir, vtNormalize.outputVcf, ".vcf.gz", ".decompose.vcf.gz") + add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) + cv.addInput(vtDecompose.outputVcf, caller.name) + } else if (normalize && !decompose) { + vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz") + add(vtNormalize, Tabix(this, vtNormalize.outputVcf)) + cv.addInput(vtNormalize.outputVcf, caller.name) + } else if (!normalize && decompose) { + vtDecompose.inputVcf = caller.outputFile + vtDecompose.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".decompose.vcf.gz") + add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) + cv.addInput(vtDecompose.outputVcf, caller.name) + } else cv.addInput(caller.outputFile, caller.name) + } + add(cv) + + addStats(finalFile, "final") + + addSummaryJobs() + } + + protected def addStats(vcfFile: File, name: String): Unit = { + val vcfStats = new VcfStats(qscript) + vcfStats.input = vcfFile + vcfStats.setOutputDir(new File(vcfFile.getParentFile, "vcfstats")) + if (name == "final") vcfStats.infoTags :+= "VariantCaller" + add(vcfStats) + addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name") + + referenceVcf.foreach(referenceVcfFile => { + val gc = new GenotypeConcordance(this) + gc.evalFile = vcfFile + gc.compFile = referenceVcfFile + gc.outputFile = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt") + referenceVcfRegions.foreach(gc.intervals ::= _) + add(gc) + addSummarizable(gc, s"$namePrefix-genotype_concordance-$name") + }) + + for (bedFile <- ampliconBedFile.toList ::: roiBedFiles) { + val regionName = bedFile.getName.stripSuffix(".bed") + val vcfStats = new VcfStats(qscript) + vcfStats.input = vcfFile + vcfStats.intervals = Some(bedFile) + vcfStats.setOutputDir(new File(vcfFile.getParentFile, s"vcfstats-$regionName")) + if (name == "final") vcfStats.infoTags :+= "VariantCaller" + add(vcfStats) + addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name-$regionName") + } + } + + /** Will generate all available variantcallers */ + protected def callersList: List[Variantcaller] = + new HaplotypeCallerGvcf(this) :: + new HaplotypeCallerAllele(this) :: + new UnifiedGenotyperAllele(this) :: + new UnifiedGenotyper(this) :: + new HaplotypeCaller(this) :: + new Freebayes(this) :: + new RawVcf(this) :: + new Bcftools(this) :: + new BcftoolsSingleSample(this) :: + new VarscanCnsSingleSample(this) :: Nil + + /** Location of summary file */ + def summaryFile = new File(outputDir, "ShivaVariantcalling.summary.json") + + /** Settings for the summary */ + def summarySettings = Map( + "variantcallers" -> configCallers.toList, + "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")), + "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")) + ) + + /** Files for the summary */ + def summaryFiles: Map[String, File] = { + callers.map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile) + } } object ShivaVariantcalling extends PipelineCommand \ No newline at end of file diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala deleted file mode 100644 index 06f876979..000000000 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala +++ /dev/null @@ -1,180 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.pipelines.shiva - -import nl.lumc.sasc.biopet.core.summary.SummaryQScript -import nl.lumc.sasc.biopet.core.{ Reference, SampleLibraryTag } -import nl.lumc.sasc.biopet.extensions.Tabix -import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance } -import nl.lumc.sasc.biopet.extensions.tools.VcfStats -import nl.lumc.sasc.biopet.extensions.vt.{ VtDecompose, VtNormalize } -import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions -import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers._ -import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging } -import org.broadinstitute.gatk.queue.QScript - -/** - * Common trait for ShivaVariantcalling - * - * Created by pjvan_thof on 2/26/15. - */ -trait ShivaVariantcallingTrait extends SummaryQScript - with SampleLibraryTag - with Reference - with TargetRegions { - qscript: QScript => - - @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true) - protected var inputBamsArg: List[File] = Nil - - var inputBams: Map[String, File] = Map() - - /** Executed before script */ - def init(): Unit = { - if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg) - } - - var referenceVcf: Option[File] = config("reference_vcf") - - var referenceVcfRegions: Option[File] = config("reference_vcf_regions") - - /** Name prefix, can override this methods if neeeded */ - def namePrefix: String = { - (sampleId, libId) match { - case (Some(s), Some(l)) => s + "-" + l - case (Some(s), _) => s - case _ => config("name_prefix") - } - } - - override def defaults = Map("bcftoolscall" -> Map("f" -> List("GQ"))) - - /** Final merged output files of all variantcaller modes */ - def finalFile = new File(outputDir, namePrefix + ".final.vcf.gz") - - /** Variantcallers requested by the config */ - protected val configCallers: Set[String] = config("variantcallers") - - protected val callers: List[Variantcaller] = { - (for (name <- configCallers) yield { - if (!callersList.exists(_.name == name)) - Logging.addError(s"variantcaller '$name' does not exist, possible to use: " + callersList.map(_.name).mkString(", ")) - callersList.find(_.name == name) - }).flatten.toList.sortBy(_.prio) - } - - /** This will add jobs for this pipeline */ - def biopetScript(): Unit = { - require(inputBams.nonEmpty, "No input bams found") - require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", ")) - - val cv = new CombineVariants(qscript) - cv.outputFile = finalFile - cv.setKey = "VariantCaller" - cv.genotypeMergeOptions = Some("PRIORITIZE") - cv.rodPriorityList = callers.map(_.name).mkString(",") - for (caller <- callers) { - caller.inputBams = inputBams - caller.namePrefix = namePrefix - caller.outputDir = new File(outputDir, caller.name) - add(caller) - addStats(caller.outputFile, caller.name) - val normalize: Boolean = config("execute_vt_normalize", default = false, namespace = caller.configNamespace) - val decompose: Boolean = config("execute_vt_decompose", default = false, namespace = caller.configNamespace) - - val vtNormalize = new VtNormalize(this) - vtNormalize.inputVcf = caller.outputFile - val vtDecompose = new VtDecompose(this) - - if (normalize && decompose) { - vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz") - vtNormalize.isIntermediate = true - add(vtNormalize, Tabix(this, vtNormalize.outputVcf)) - vtDecompose.inputVcf = vtNormalize.outputVcf - vtDecompose.outputVcf = swapExt(caller.outputDir, vtNormalize.outputVcf, ".vcf.gz", ".decompose.vcf.gz") - add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) - cv.addInput(vtDecompose.outputVcf, caller.name) - } else if (normalize && !decompose) { - vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz") - add(vtNormalize, Tabix(this, vtNormalize.outputVcf)) - cv.addInput(vtNormalize.outputVcf, caller.name) - } else if (!normalize && decompose) { - vtDecompose.inputVcf = caller.outputFile - vtDecompose.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".decompose.vcf.gz") - add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) - cv.addInput(vtDecompose.outputVcf, caller.name) - } else cv.addInput(caller.outputFile, caller.name) - } - add(cv) - - addStats(finalFile, "final") - - addSummaryJobs() - } - - protected def addStats(vcfFile: File, name: String): Unit = { - val vcfStats = new VcfStats(qscript) - vcfStats.input = vcfFile - vcfStats.setOutputDir(new File(vcfFile.getParentFile, "vcfstats")) - if (name == "final") vcfStats.infoTags :+= "VariantCaller" - add(vcfStats) - addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name") - - referenceVcf.foreach(referenceVcfFile => { - val gc = new GenotypeConcordance(this) - gc.evalFile = vcfFile - gc.compFile = referenceVcfFile - gc.outputFile = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt") - referenceVcfRegions.foreach(gc.intervals ::= _) - add(gc) - addSummarizable(gc, s"$namePrefix-genotype_concordance-$name") - }) - - for (bedFile <- ampliconBedFile.toList ::: roiBedFiles) { - val regionName = bedFile.getName.stripSuffix(".bed") - val vcfStats = new VcfStats(qscript) - vcfStats.input = vcfFile - vcfStats.intervals = Some(bedFile) - vcfStats.setOutputDir(new File(vcfFile.getParentFile, s"vcfstats-$regionName")) - if (name == "final") vcfStats.infoTags :+= "VariantCaller" - add(vcfStats) - addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name-$regionName") - } - } - - /** Will generate all available variantcallers */ - protected def callersList: List[Variantcaller] = List( - new Freebayes(this), - new RawVcf(this), - new Bcftools(this), - new BcftoolsSingleSample(this), - new VarscanCnsSingleSample(this)) - - /** Location of summary file */ - def summaryFile = new File(outputDir, "ShivaVariantcalling.summary.json") - - /** Settings for the summary */ - def summarySettings = Map( - "variantcallers" -> configCallers.toList, - "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")), - "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")) - ) - - /** Files for the summary */ - def summaryFiles: Map[String, File] = { - callers.map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile) - } -} \ No newline at end of file -- GitLab