From a0959af060a36a5027e9073644d1d8a63e0372d3 Mon Sep 17 00:00:00 2001
From: Peter van 't Hof
Date: Wed, 27 Apr 2016 19:26:45 +0200
Subject: [PATCH] Move variantcallers to public
---
.../sasc/biopet/pipelines/gatk/Basty.scala | 27 ---
.../sasc/biopet/pipelines/gatk/Shiva.scala | 2 +-
.../pipelines/gatk/ShivaVariantcalling.scala | 35 ----
.../gatk/ShivaVariantcallingTest.scala | 1 +
.../biopet/BiopetExecutableProtected.scala | 4 +-
.../sasc/biopet/pipelines/basty/Basty.scala | 186 ++++++++++++++++-
.../biopet/pipelines/basty/BastyTrait.scala | 196 ------------------
.../sasc/biopet/BiopetExecutablePublic.scala | 9 +-
.../gatk/variantcallers/HaplotypeCaller.scala | 0
.../HaplotypeCallerAllele.scala | 0
.../variantcallers/HaplotypeCallerGvcf.scala | 0
.../variantcallers/UnifiedGenotyper.scala | 0
.../UnifiedGenotyperAllele.scala | 0
.../biopet/pipelines/shiva/ShivaTrait.scala | 2 +-
.../pipelines/shiva/ShivaVariantcalling.scala | 167 ++++++++++++++-
.../shiva/ShivaVariantcallingTrait.scala | 180 ----------------
16 files changed, 350 insertions(+), 459 deletions(-)
delete mode 100644 protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Basty.scala
delete mode 100644 protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala
delete mode 100644 public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala
rename {protected/biopet-gatk-pipelines => public/shiva}/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala (100%)
rename {protected/biopet-gatk-pipelines => public/shiva}/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala (100%)
rename {protected/biopet-gatk-pipelines => public/shiva}/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala (100%)
rename {protected/biopet-gatk-pipelines => public/shiva}/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala (100%)
rename {protected/biopet-gatk-pipelines => public/shiva}/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala (100%)
delete mode 100644 public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala
diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Basty.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Basty.scala
deleted file mode 100644
index ecf4ccf90..000000000
--- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Basty.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-/**
- * Due to the license issue with GATK, this part of Biopet can only be used inside the
- * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
- * on how to use this protected part of biopet or contact us at sasc@lumc.nl
- */
-package nl.lumc.sasc.biopet.pipelines.gatk
-
-import nl.lumc.sasc.biopet.core.PipelineCommand
-import nl.lumc.sasc.biopet.utils.config.Configurable
-import nl.lumc.sasc.biopet.pipelines.basty.BastyTrait
-import org.broadinstitute.gatk.queue.QScript
-
-/**
- * Basty pipeline including GATK steps
- *
- * Created by pjvan_thof on 3/4/15.
- */
-class Basty(val root: Configurable) extends QScript with BastyTrait {
- qscript =>
- def this() = this(null)
-
- override def variantcallers = List("unifiedgenotyper")
-
- override lazy val shiva = new Shiva(qscript)
-}
-
-object Basty extends PipelineCommand
\ No newline at end of file
diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala
index 6e1769d8a..1274a1dd2 100644
--- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala
+++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala
@@ -7,7 +7,7 @@ package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.extensions.gatk.broad._
-import nl.lumc.sasc.biopet.pipelines.shiva.ShivaTrait
+import nl.lumc.sasc.biopet.pipelines.shiva.{ ShivaTrait, ShivaVariantcalling }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala
deleted file mode 100644
index edbc633f3..000000000
--- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * Due to the license issue with GATK, this part of Biopet can only be used inside the
- * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
- * on how to use this protected part of biopet or contact us at sasc@lumc.nl
- */
-package nl.lumc.sasc.biopet.pipelines.gatk
-
-import nl.lumc.sasc.biopet.core.PipelineCommand
-import nl.lumc.sasc.biopet.pipelines.gatk.variantcallers._
-import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcallingTrait
-import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.queue.QScript
-
-/**
- * ShivaVariantcalling with GATK variantcallers
- *
- * Created by pjvan_thof on 2/26/15.
- */
-class ShivaVariantcalling(val root: Configurable) extends QScript with ShivaVariantcallingTrait {
- qscript =>
- def this() = this(null)
-
- /** Will generate all available variantcallers */
- override def callersList = {
- new HaplotypeCallerGvcf(this) ::
- new HaplotypeCallerAllele(this) ::
- new UnifiedGenotyperAllele(this) ::
- new UnifiedGenotyper(this) ::
- new HaplotypeCaller(this) ::
- super.callersList
- }
-}
-
-/** object to add default main method to pipeline */
-object ShivaVariantcalling extends PipelineCommand
\ No newline at end of file
diff --git a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala
index b26b2fb6e..e8444075d 100644
--- a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala
+++ b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala
@@ -12,6 +12,7 @@ import nl.lumc.sasc.biopet.utils.config.Config
import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
import nl.lumc.sasc.biopet.extensions.gatk.broad.{ HaplotypeCaller, UnifiedGenotyper }
import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats }
+import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.apache.commons.io.FileUtils
import org.broadinstitute.gatk.queue.QSettings
diff --git a/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableProtected.scala b/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableProtected.scala
index 9155e7dba..35295eefc 100644
--- a/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableProtected.scala
+++ b/protected/biopet-protected-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableProtected.scala
@@ -9,9 +9,7 @@ import nl.lumc.sasc.biopet.utils.{ BiopetExecutable, MainCommand }
object BiopetExecutableProtected extends BiopetExecutable {
def pipelines: List[MainCommand] = BiopetExecutablePublic.publicPipelines ::: List(
- nl.lumc.sasc.biopet.pipelines.gatk.Shiva,
- nl.lumc.sasc.biopet.pipelines.gatk.ShivaVariantcalling,
- nl.lumc.sasc.biopet.pipelines.gatk.Basty)
+ nl.lumc.sasc.biopet.pipelines.gatk.Shiva)
def tools = BiopetExecutablePublic.tools
}
\ No newline at end of file
diff --git a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala
index 8476d1bbc..9d199cf0e 100644
--- a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala
+++ b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala
@@ -13,19 +13,189 @@
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
+/**
+ * Due to the license issue with GATK, this part of Biopet can only be used inside the
+ * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
+ * on how to use this protected part of biopet or contact us at sasc@lumc.nl
+ */
package nl.lumc.sasc.biopet.pipelines.basty
-import nl.lumc.sasc.biopet.core.PipelineCommand
+import java.io.File
+
+import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
+import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins }
+import nl.lumc.sasc.biopet.pipelines.shiva.{ Shiva, ShivaTrait }
+import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta
+import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
-/**
- * Basty implementation without GATK parts
- *
- * Created by pjvan_thof on 3/4/15.
- */
-class Basty(val root: Configurable) extends QScript with BastyTrait {
+class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
+ qscript =>
+
def this() = this(null)
+
+ case class FastaOutput(variants: File, consensus: File, consensusVariants: File)
+
+ def variantcallers = List("unifiedgenotyper")
+
+ override def defaults = Map(
+ "ploidy" -> 1,
+ "variantcallers" -> variantcallers
+ )
+
+ lazy val shiva: ShivaTrait = new Shiva(qscript)
+
+ def summaryFile: File = new File(outputDir, "Basty.summary.json")
+
+ //TODO: Add summary
+ def summaryFiles: Map[String, File] = Map()
+
+ //TODO: Add summary
+ def summarySettings: Map[String, Any] = Map()
+
+ def makeSample(id: String) = new Sample(id)
+ class Sample(sampleId: String) extends AbstractSample(sampleId) {
+ //TODO: Add summary
+ def summaryFiles: Map[String, File] = Map()
+
+ //TODO: Add summary
+ def summaryStats: Map[String, Any] = Map()
+
+ def makeLibrary(id: String) = new Library(id)
+ class Library(libId: String) extends AbstractLibrary(libId) {
+ //TODO: Add summary
+ def summaryFiles: Map[String, File] = Map()
+
+ //TODO: Add summary
+ def summaryStats: Map[String, Any] = Map()
+
+ protected def addJobs(): Unit = {}
+ }
+
+ var output: FastaOutput = _
+ var outputSnps: FastaOutput = _
+
+ protected def addJobs(): Unit = {
+ addPerLibJobs()
+ output = addGenerateFasta(sampleId, sampleDir)
+ outputSnps = addGenerateFasta(sampleId, sampleDir, snpsOnly = true)
+ }
+ }
+
+ def init() {
+ shiva.outputDir = outputDir
+ shiva.init()
+ }
+
+ def biopetScript() {
+ shiva.biopetScript()
+ addAll(shiva.functions)
+ addSummaryQScript(shiva)
+
+ inputFiles :::= shiva.inputFiles
+
+ addSamplesJobs()
+ }
+
+ def addMultiSampleJobs(): Unit = {
+ val refVariants = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference")
+ val refVariantSnps = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference", snpsOnly = true)
+
+ val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList,
+ new File(outputDir, "fastas" + File.separator + "variant.fasta"))
+ add(catVariants)
+ val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList,
+ new File(outputDir, "fastas" + File.separator + "variant.snps_only.fasta"))
+ add(catVariantsSnps)
+
+ val catConsensus = Cat(this, refVariants.consensus :: samples.map(_._2.output.consensus).toList,
+ new File(outputDir, "fastas" + File.separator + "consensus.fasta"))
+ add(catConsensus)
+ val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList,
+ new File(outputDir, "fastas" + File.separator + "consensus.snps_only.fasta"))
+ add(catConsensusSnps)
+
+ val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList,
+ new File(outputDir, "fastas" + File.separator + "consensus.variant.fasta"))
+ add(catConsensusVariants)
+ val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList,
+ new File(outputDir, "fastas" + File.separator + "consensus.variant.snps_only.fasta"))
+ add(catConsensusVariantsSnps)
+
+ val seed: Int = config("seed", default = 12345)
+ def addTreeJobs(variants: File, concensusVariants: File, outputDir: File, outputName: String) {
+ val dirSufixRaxml = new File(outputDir, "raxml")
+ val dirSufixGubbins = new File(outputDir, "gubbins")
+
+ val raxmlMl = new Raxml(this)
+ raxmlMl.input = variants
+ raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
+ raxmlMl.p = Some(seed)
+ raxmlMl.n = outputName + "_ml"
+ raxmlMl.w = dirSufixRaxml
+ raxmlMl.N = config("ml_runs", default = 20, namespace = "raxml")
+ add(raxmlMl)
+
+ val r = new scala.util.Random(seed)
+ val numBoot = config("boot_runs", default = 100, namespace = "raxml").asInt
+ val bootList = for (t <- 0 until numBoot) yield {
+ val raxmlBoot = new Raxml(this)
+ raxmlBoot.input = variants
+ raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
+ raxmlBoot.p = Some(seed)
+ raxmlBoot.b = Some(math.abs(r.nextInt()))
+ raxmlBoot.w = dirSufixRaxml
+ raxmlBoot.N = Some(1)
+ raxmlBoot.n = outputName + "_boot_" + t
+ add(raxmlBoot)
+ raxmlBoot.getBootstrapFile.get
+ }
+
+ val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list"))
+ add(cat)
+
+ val raxmlBi = new Raxml(this)
+ raxmlBi.input = concensusVariants
+ raxmlBi.t = raxmlMl.getBestTreeFile
+ raxmlBi.z = Some(cat.output)
+ raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
+ raxmlBi.p = Some(seed)
+ raxmlBi.f = "b"
+ raxmlBi.n = outputName + "_bi"
+ raxmlBi.w = dirSufixRaxml
+ add(raxmlBi)
+
+ val gubbins = new RunGubbins(this)
+ gubbins.fastafile = concensusVariants
+ gubbins.startingTree = raxmlBi.getBipartitionsFile
+ gubbins.outputDirectory = dirSufixGubbins
+ add(gubbins)
+ }
+
+ addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output,
+ new File(outputDir, "trees" + File.separator + "snps_only"), "snps_only")
+ addTreeJobs(catVariants.output, catConsensusVariants.output,
+ new File(outputDir, "trees" + File.separator + "snps_indels"), "snps_indels")
+
+ }
+
+ def addGenerateFasta(sampleName: String, outputDir: File, outputName: String = null,
+ snpsOnly: Boolean = false): FastaOutput = {
+ val bastyGenerateFasta = new BastyGenerateFasta(this)
+ bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
+ bastyGenerateFasta.inputVcf = shiva.multisampleVariantCalling.get.finalFile
+ if (shiva.samples.contains(sampleName)) {
+ bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get
+ }
+ bastyGenerateFasta.outputVariants = new File(outputDir, bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
+ bastyGenerateFasta.outputConsensus = new File(outputDir, bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
+ bastyGenerateFasta.outputConsensusVariants = new File(outputDir, bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
+ bastyGenerateFasta.sampleName = sampleName
+ bastyGenerateFasta.snpsOnly = snpsOnly
+ qscript.add(bastyGenerateFasta)
+ FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants)
+ }
}
-object Basty extends PipelineCommand
\ No newline at end of file
+object Basty extends PipelineCommand
diff --git a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala
deleted file mode 100644
index 3202d4f37..000000000
--- a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala
+++ /dev/null
@@ -1,196 +0,0 @@
-/**
- * Biopet is built on top of GATK Queue for building bioinformatic
- * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
- * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
- * should also be able to execute Biopet tools and pipelines.
- *
- * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
- *
- * Contact us at: sasc@lumc.nl
- *
- * A dual licensing mode is applied. The source code within this project that are
- * not part of GATK Queue is freely available for non-commercial use under an AGPL
- * license; For commercial users or users who do not want to follow the AGPL
- * license, please contact us to obtain a separate license.
- */
-/**
- * Due to the license issue with GATK, this part of Biopet can only be used inside the
- * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
- * on how to use this protected part of biopet or contact us at sasc@lumc.nl
- */
-package nl.lumc.sasc.biopet.pipelines.basty
-
-import java.io.File
-
-import nl.lumc.sasc.biopet.core.MultiSampleQScript
-import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins }
-import nl.lumc.sasc.biopet.pipelines.shiva.{ Shiva, ShivaTrait }
-import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta
-import nl.lumc.sasc.biopet.utils.ConfigUtils
-import org.broadinstitute.gatk.queue.QScript
-
-trait BastyTrait extends MultiSampleQScript {
- qscript: QScript =>
-
- case class FastaOutput(variants: File, consensus: File, consensusVariants: File)
-
- def variantcallers = List("freebayes")
-
- override def defaults = Map(
- "ploidy" -> 1,
- "variantcallers" -> variantcallers
- )
-
- lazy val shiva: ShivaTrait = new Shiva(qscript)
-
- def summaryFile: File = new File(outputDir, "Basty.summary.json")
-
- //TODO: Add summary
- def summaryFiles: Map[String, File] = Map()
-
- //TODO: Add summary
- def summarySettings: Map[String, Any] = Map()
-
- def makeSample(id: String) = new Sample(id)
- class Sample(sampleId: String) extends AbstractSample(sampleId) {
- //TODO: Add summary
- def summaryFiles: Map[String, File] = Map()
-
- //TODO: Add summary
- def summaryStats: Map[String, Any] = Map()
-
- def makeLibrary(id: String) = new Library(id)
- class Library(libId: String) extends AbstractLibrary(libId) {
- //TODO: Add summary
- def summaryFiles: Map[String, File] = Map()
-
- //TODO: Add summary
- def summaryStats: Map[String, Any] = Map()
-
- protected def addJobs(): Unit = {}
- }
-
- var output: FastaOutput = _
- var outputSnps: FastaOutput = _
-
- protected def addJobs(): Unit = {
- addPerLibJobs()
- output = addGenerateFasta(sampleId, sampleDir)
- outputSnps = addGenerateFasta(sampleId, sampleDir, snpsOnly = true)
- }
- }
-
- def init() {
- shiva.outputDir = outputDir
- shiva.init()
- }
-
- def biopetScript() {
- shiva.biopetScript()
- addAll(shiva.functions)
- addSummaryQScript(shiva)
-
- inputFiles :::= shiva.inputFiles
-
- addSamplesJobs()
- }
-
- def addMultiSampleJobs(): Unit = {
- val refVariants = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference")
- val refVariantSnps = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference", snpsOnly = true)
-
- val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList,
- new File(outputDir, "fastas" + File.separator + "variant.fasta"))
- add(catVariants)
- val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList,
- new File(outputDir, "fastas" + File.separator + "variant.snps_only.fasta"))
- add(catVariantsSnps)
-
- val catConsensus = Cat(this, refVariants.consensus :: samples.map(_._2.output.consensus).toList,
- new File(outputDir, "fastas" + File.separator + "consensus.fasta"))
- add(catConsensus)
- val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList,
- new File(outputDir, "fastas" + File.separator + "consensus.snps_only.fasta"))
- add(catConsensusSnps)
-
- val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList,
- new File(outputDir, "fastas" + File.separator + "consensus.variant.fasta"))
- add(catConsensusVariants)
- val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList,
- new File(outputDir, "fastas" + File.separator + "consensus.variant.snps_only.fasta"))
- add(catConsensusVariantsSnps)
-
- val seed: Int = config("seed", default = 12345)
- def addTreeJobs(variants: File, concensusVariants: File, outputDir: File, outputName: String) {
- val dirSufixRaxml = new File(outputDir, "raxml")
- val dirSufixGubbins = new File(outputDir, "gubbins")
-
- val raxmlMl = new Raxml(this)
- raxmlMl.input = variants
- raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
- raxmlMl.p = Some(seed)
- raxmlMl.n = outputName + "_ml"
- raxmlMl.w = dirSufixRaxml
- raxmlMl.N = config("ml_runs", default = 20, namespace = "raxml")
- add(raxmlMl)
-
- val r = new scala.util.Random(seed)
- val numBoot = config("boot_runs", default = 100, namespace = "raxml").asInt
- val bootList = for (t <- 0 until numBoot) yield {
- val raxmlBoot = new Raxml(this)
- raxmlBoot.input = variants
- raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
- raxmlBoot.p = Some(seed)
- raxmlBoot.b = Some(math.abs(r.nextInt()))
- raxmlBoot.w = dirSufixRaxml
- raxmlBoot.N = Some(1)
- raxmlBoot.n = outputName + "_boot_" + t
- add(raxmlBoot)
- raxmlBoot.getBootstrapFile.get
- }
-
- val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list"))
- add(cat)
-
- val raxmlBi = new Raxml(this)
- raxmlBi.input = concensusVariants
- raxmlBi.t = raxmlMl.getBestTreeFile
- raxmlBi.z = Some(cat.output)
- raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
- raxmlBi.p = Some(seed)
- raxmlBi.f = "b"
- raxmlBi.n = outputName + "_bi"
- raxmlBi.w = dirSufixRaxml
- add(raxmlBi)
-
- val gubbins = new RunGubbins(this)
- gubbins.fastafile = concensusVariants
- gubbins.startingTree = raxmlBi.getBipartitionsFile
- gubbins.outputDirectory = dirSufixGubbins
- add(gubbins)
- }
-
- addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output,
- new File(outputDir, "trees" + File.separator + "snps_only"), "snps_only")
- addTreeJobs(catVariants.output, catConsensusVariants.output,
- new File(outputDir, "trees" + File.separator + "snps_indels"), "snps_indels")
-
- }
-
- def addGenerateFasta(sampleName: String, outputDir: File, outputName: String = null,
- snpsOnly: Boolean = false): FastaOutput = {
- val bastyGenerateFasta = new BastyGenerateFasta(this)
- bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
- bastyGenerateFasta.inputVcf = shiva.multisampleVariantCalling.get.finalFile
- if (shiva.samples.contains(sampleName)) {
- bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get
- }
- bastyGenerateFasta.outputVariants = new File(outputDir, bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
- bastyGenerateFasta.outputConsensus = new File(outputDir, bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
- bastyGenerateFasta.outputConsensusVariants = new File(outputDir, bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
- bastyGenerateFasta.sampleName = sampleName
- bastyGenerateFasta.snpsOnly = snpsOnly
- qscript.add(bastyGenerateFasta)
- FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants)
- }
-}
diff --git a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala
index 8b21fd806..e0c4c29cb 100644
--- a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala
+++ b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala
@@ -15,7 +15,6 @@
*/
package nl.lumc.sasc.biopet
-import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling
import nl.lumc.sasc.biopet.utils.{ BiopetExecutable, MainCommand }
object BiopetExecutablePublic extends BiopetExecutable {
@@ -33,13 +32,13 @@ object BiopetExecutablePublic extends BiopetExecutable {
nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling,
nl.lumc.sasc.biopet.pipelines.gears.GearsSingle,
nl.lumc.sasc.biopet.pipelines.gears.Gears,
- nl.lumc.sasc.biopet.pipelines.gwastest.GwasTest
+ nl.lumc.sasc.biopet.pipelines.gwastest.GwasTest,
+ nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling,
+ nl.lumc.sasc.biopet.pipelines.basty.Basty
)
def pipelines: List[MainCommand] = List(
- nl.lumc.sasc.biopet.pipelines.shiva.Shiva,
- ShivaVariantcalling,
- nl.lumc.sasc.biopet.pipelines.basty.Basty
+ nl.lumc.sasc.biopet.pipelines.shiva.Shiva
) ::: publicPipelines
def tools: List[MainCommand] = BiopetToolsExecutable.tools
diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala
similarity index 100%
rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala
rename to public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala
diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala
similarity index 100%
rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala
rename to public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala
diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala
similarity index 100%
rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala
rename to public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala
diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala
similarity index 100%
rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala
rename to public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala
diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala
similarity index 100%
rename from protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala
rename to public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala
diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala
index 5014d33a6..630bdb4d7 100644
--- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala
+++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala
@@ -43,7 +43,7 @@ trait ShivaTrait extends MultisampleMappingTrait with Reference with TargetRegio
)
/** Method to make the variantcalling namespace of shiva */
- def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait with QScript = {
+ def makeVariantcalling(multisample: Boolean = false): ShivaVariantcalling with QScript = {
if (multisample) new ShivaVariantcalling(qscript) {
override def namePrefix = "multisample"
override def configNamespace: String = "shivavariantcalling"
diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala
index d075619c1..e2a325ed8 100644
--- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala
+++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala
@@ -15,17 +15,178 @@
*/
package nl.lumc.sasc.biopet.pipelines.shiva
-import nl.lumc.sasc.biopet.core.PipelineCommand
+import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag }
+import nl.lumc.sasc.biopet.core.summary.SummaryQScript
+import nl.lumc.sasc.biopet.extensions.Tabix
+import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance }
+import nl.lumc.sasc.biopet.extensions.tools.VcfStats
+import nl.lumc.sasc.biopet.extensions.vt.{ VtDecompose, VtNormalize }
+import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
+import nl.lumc.sasc.biopet.pipelines.gatk.variantcallers._
+import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.{ VarscanCnsSingleSample, _ }
+import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
- * Implementation of ShivaVariantcalling without GATK variantcallers
+ * Implementation of ShivaVariantcalling
*
* Created by pjvan_thof on 2/26/15.
*/
-class ShivaVariantcalling(val root: Configurable) extends QScript with ShivaVariantcallingTrait {
+class ShivaVariantcalling(val root: Configurable) extends QScript
+ with SummaryQScript
+ with SampleLibraryTag
+ with Reference
+ with TargetRegions {
+ qscript =>
+
def this() = this(null)
+
+ @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
+ protected var inputBamsArg: List[File] = Nil
+
+ var inputBams: Map[String, File] = Map()
+
+ /** Executed before script */
+ def init(): Unit = {
+ if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
+ }
+
+ var referenceVcf: Option[File] = config("reference_vcf")
+
+ var referenceVcfRegions: Option[File] = config("reference_vcf_regions")
+
+ /** Name prefix, can override this methods if neeeded */
+ def namePrefix: String = {
+ (sampleId, libId) match {
+ case (Some(s), Some(l)) => s + "-" + l
+ case (Some(s), _) => s
+ case _ => config("name_prefix")
+ }
+ }
+
+ override def defaults = Map("bcftoolscall" -> Map("f" -> List("GQ")))
+
+ /** Final merged output files of all variantcaller modes */
+ def finalFile = new File(outputDir, namePrefix + ".final.vcf.gz")
+
+ /** Variantcallers requested by the config */
+ protected val configCallers: Set[String] = config("variantcallers")
+
+ protected val callers: List[Variantcaller] = {
+ (for (name <- configCallers) yield {
+ if (!callersList.exists(_.name == name))
+ Logging.addError(s"variantcaller '$name' does not exist, possible to use: " + callersList.map(_.name).mkString(", "))
+ callersList.find(_.name == name)
+ }).flatten.toList.sortBy(_.prio)
+ }
+
+ /** This will add jobs for this pipeline */
+ def biopetScript(): Unit = {
+ require(inputBams.nonEmpty, "No input bams found")
+ require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", "))
+
+ val cv = new CombineVariants(qscript)
+ cv.outputFile = finalFile
+ cv.setKey = "VariantCaller"
+ cv.genotypeMergeOptions = Some("PRIORITIZE")
+ cv.rodPriorityList = callers.map(_.name).mkString(",")
+ for (caller <- callers) {
+ caller.inputBams = inputBams
+ caller.namePrefix = namePrefix
+ caller.outputDir = new File(outputDir, caller.name)
+ add(caller)
+ addStats(caller.outputFile, caller.name)
+ val normalize: Boolean = config("execute_vt_normalize", default = false, namespace = caller.configNamespace)
+ val decompose: Boolean = config("execute_vt_decompose", default = false, namespace = caller.configNamespace)
+
+ val vtNormalize = new VtNormalize(this)
+ vtNormalize.inputVcf = caller.outputFile
+ val vtDecompose = new VtDecompose(this)
+
+ if (normalize && decompose) {
+ vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz")
+ vtNormalize.isIntermediate = true
+ add(vtNormalize, Tabix(this, vtNormalize.outputVcf))
+ vtDecompose.inputVcf = vtNormalize.outputVcf
+ vtDecompose.outputVcf = swapExt(caller.outputDir, vtNormalize.outputVcf, ".vcf.gz", ".decompose.vcf.gz")
+ add(vtDecompose, Tabix(this, vtDecompose.outputVcf))
+ cv.addInput(vtDecompose.outputVcf, caller.name)
+ } else if (normalize && !decompose) {
+ vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz")
+ add(vtNormalize, Tabix(this, vtNormalize.outputVcf))
+ cv.addInput(vtNormalize.outputVcf, caller.name)
+ } else if (!normalize && decompose) {
+ vtDecompose.inputVcf = caller.outputFile
+ vtDecompose.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".decompose.vcf.gz")
+ add(vtDecompose, Tabix(this, vtDecompose.outputVcf))
+ cv.addInput(vtDecompose.outputVcf, caller.name)
+ } else cv.addInput(caller.outputFile, caller.name)
+ }
+ add(cv)
+
+ addStats(finalFile, "final")
+
+ addSummaryJobs()
+ }
+
+ protected def addStats(vcfFile: File, name: String): Unit = {
+ val vcfStats = new VcfStats(qscript)
+ vcfStats.input = vcfFile
+ vcfStats.setOutputDir(new File(vcfFile.getParentFile, "vcfstats"))
+ if (name == "final") vcfStats.infoTags :+= "VariantCaller"
+ add(vcfStats)
+ addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name")
+
+ referenceVcf.foreach(referenceVcfFile => {
+ val gc = new GenotypeConcordance(this)
+ gc.evalFile = vcfFile
+ gc.compFile = referenceVcfFile
+ gc.outputFile = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt")
+ referenceVcfRegions.foreach(gc.intervals ::= _)
+ add(gc)
+ addSummarizable(gc, s"$namePrefix-genotype_concordance-$name")
+ })
+
+ for (bedFile <- ampliconBedFile.toList ::: roiBedFiles) {
+ val regionName = bedFile.getName.stripSuffix(".bed")
+ val vcfStats = new VcfStats(qscript)
+ vcfStats.input = vcfFile
+ vcfStats.intervals = Some(bedFile)
+ vcfStats.setOutputDir(new File(vcfFile.getParentFile, s"vcfstats-$regionName"))
+ if (name == "final") vcfStats.infoTags :+= "VariantCaller"
+ add(vcfStats)
+ addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name-$regionName")
+ }
+ }
+
+ /** Will generate all available variantcallers */
+ protected def callersList: List[Variantcaller] =
+ new HaplotypeCallerGvcf(this) ::
+ new HaplotypeCallerAllele(this) ::
+ new UnifiedGenotyperAllele(this) ::
+ new UnifiedGenotyper(this) ::
+ new HaplotypeCaller(this) ::
+ new Freebayes(this) ::
+ new RawVcf(this) ::
+ new Bcftools(this) ::
+ new BcftoolsSingleSample(this) ::
+ new VarscanCnsSingleSample(this) :: Nil
+
+ /** Location of summary file */
+ def summaryFile = new File(outputDir, "ShivaVariantcalling.summary.json")
+
+ /** Settings for the summary */
+ def summarySettings = Map(
+ "variantcallers" -> configCallers.toList,
+ "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
+ "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
+ )
+
+ /** Files for the summary */
+ def summaryFiles: Map[String, File] = {
+ callers.map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile)
+ }
}
object ShivaVariantcalling extends PipelineCommand
\ No newline at end of file
diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala
deleted file mode 100644
index 06f876979..000000000
--- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala
+++ /dev/null
@@ -1,180 +0,0 @@
-/**
- * Biopet is built on top of GATK Queue for building bioinformatic
- * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
- * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
- * should also be able to execute Biopet tools and pipelines.
- *
- * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
- *
- * Contact us at: sasc@lumc.nl
- *
- * A dual licensing mode is applied. The source code within this project that are
- * not part of GATK Queue is freely available for non-commercial use under an AGPL
- * license; For commercial users or users who do not want to follow the AGPL
- * license, please contact us to obtain a separate license.
- */
-package nl.lumc.sasc.biopet.pipelines.shiva
-
-import nl.lumc.sasc.biopet.core.summary.SummaryQScript
-import nl.lumc.sasc.biopet.core.{ Reference, SampleLibraryTag }
-import nl.lumc.sasc.biopet.extensions.Tabix
-import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance }
-import nl.lumc.sasc.biopet.extensions.tools.VcfStats
-import nl.lumc.sasc.biopet.extensions.vt.{ VtDecompose, VtNormalize }
-import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
-import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers._
-import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
-import org.broadinstitute.gatk.queue.QScript
-
-/**
- * Common trait for ShivaVariantcalling
- *
- * Created by pjvan_thof on 2/26/15.
- */
-trait ShivaVariantcallingTrait extends SummaryQScript
- with SampleLibraryTag
- with Reference
- with TargetRegions {
- qscript: QScript =>
-
- @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
- protected var inputBamsArg: List[File] = Nil
-
- var inputBams: Map[String, File] = Map()
-
- /** Executed before script */
- def init(): Unit = {
- if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
- }
-
- var referenceVcf: Option[File] = config("reference_vcf")
-
- var referenceVcfRegions: Option[File] = config("reference_vcf_regions")
-
- /** Name prefix, can override this methods if neeeded */
- def namePrefix: String = {
- (sampleId, libId) match {
- case (Some(s), Some(l)) => s + "-" + l
- case (Some(s), _) => s
- case _ => config("name_prefix")
- }
- }
-
- override def defaults = Map("bcftoolscall" -> Map("f" -> List("GQ")))
-
- /** Final merged output files of all variantcaller modes */
- def finalFile = new File(outputDir, namePrefix + ".final.vcf.gz")
-
- /** Variantcallers requested by the config */
- protected val configCallers: Set[String] = config("variantcallers")
-
- protected val callers: List[Variantcaller] = {
- (for (name <- configCallers) yield {
- if (!callersList.exists(_.name == name))
- Logging.addError(s"variantcaller '$name' does not exist, possible to use: " + callersList.map(_.name).mkString(", "))
- callersList.find(_.name == name)
- }).flatten.toList.sortBy(_.prio)
- }
-
- /** This will add jobs for this pipeline */
- def biopetScript(): Unit = {
- require(inputBams.nonEmpty, "No input bams found")
- require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", "))
-
- val cv = new CombineVariants(qscript)
- cv.outputFile = finalFile
- cv.setKey = "VariantCaller"
- cv.genotypeMergeOptions = Some("PRIORITIZE")
- cv.rodPriorityList = callers.map(_.name).mkString(",")
- for (caller <- callers) {
- caller.inputBams = inputBams
- caller.namePrefix = namePrefix
- caller.outputDir = new File(outputDir, caller.name)
- add(caller)
- addStats(caller.outputFile, caller.name)
- val normalize: Boolean = config("execute_vt_normalize", default = false, namespace = caller.configNamespace)
- val decompose: Boolean = config("execute_vt_decompose", default = false, namespace = caller.configNamespace)
-
- val vtNormalize = new VtNormalize(this)
- vtNormalize.inputVcf = caller.outputFile
- val vtDecompose = new VtDecompose(this)
-
- if (normalize && decompose) {
- vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz")
- vtNormalize.isIntermediate = true
- add(vtNormalize, Tabix(this, vtNormalize.outputVcf))
- vtDecompose.inputVcf = vtNormalize.outputVcf
- vtDecompose.outputVcf = swapExt(caller.outputDir, vtNormalize.outputVcf, ".vcf.gz", ".decompose.vcf.gz")
- add(vtDecompose, Tabix(this, vtDecompose.outputVcf))
- cv.addInput(vtDecompose.outputVcf, caller.name)
- } else if (normalize && !decompose) {
- vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz")
- add(vtNormalize, Tabix(this, vtNormalize.outputVcf))
- cv.addInput(vtNormalize.outputVcf, caller.name)
- } else if (!normalize && decompose) {
- vtDecompose.inputVcf = caller.outputFile
- vtDecompose.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".decompose.vcf.gz")
- add(vtDecompose, Tabix(this, vtDecompose.outputVcf))
- cv.addInput(vtDecompose.outputVcf, caller.name)
- } else cv.addInput(caller.outputFile, caller.name)
- }
- add(cv)
-
- addStats(finalFile, "final")
-
- addSummaryJobs()
- }
-
- protected def addStats(vcfFile: File, name: String): Unit = {
- val vcfStats = new VcfStats(qscript)
- vcfStats.input = vcfFile
- vcfStats.setOutputDir(new File(vcfFile.getParentFile, "vcfstats"))
- if (name == "final") vcfStats.infoTags :+= "VariantCaller"
- add(vcfStats)
- addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name")
-
- referenceVcf.foreach(referenceVcfFile => {
- val gc = new GenotypeConcordance(this)
- gc.evalFile = vcfFile
- gc.compFile = referenceVcfFile
- gc.outputFile = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt")
- referenceVcfRegions.foreach(gc.intervals ::= _)
- add(gc)
- addSummarizable(gc, s"$namePrefix-genotype_concordance-$name")
- })
-
- for (bedFile <- ampliconBedFile.toList ::: roiBedFiles) {
- val regionName = bedFile.getName.stripSuffix(".bed")
- val vcfStats = new VcfStats(qscript)
- vcfStats.input = vcfFile
- vcfStats.intervals = Some(bedFile)
- vcfStats.setOutputDir(new File(vcfFile.getParentFile, s"vcfstats-$regionName"))
- if (name == "final") vcfStats.infoTags :+= "VariantCaller"
- add(vcfStats)
- addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name-$regionName")
- }
- }
-
- /** Will generate all available variantcallers */
- protected def callersList: List[Variantcaller] = List(
- new Freebayes(this),
- new RawVcf(this),
- new Bcftools(this),
- new BcftoolsSingleSample(this),
- new VarscanCnsSingleSample(this))
-
- /** Location of summary file */
- def summaryFile = new File(outputDir, "ShivaVariantcalling.summary.json")
-
- /** Settings for the summary */
- def summarySettings = Map(
- "variantcallers" -> configCallers.toList,
- "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
- "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
- )
-
- /** Files for the summary */
- def summaryFiles: Map[String, File] = {
- callers.map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile)
- }
-}
\ No newline at end of file
--
GitLab