Basty.scala 8.73 KB
Newer Older
bow's avatar
bow committed
1
/**
2 3 4 5 6 7 8 9 10 11 12 13 14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
15
/**
16 17 18 19
  * Due to the license issue with GATK, this part of Biopet can only be used inside the
  * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
  * on how to use this protected part of biopet or contact us at sasc@lumc.nl
  */
20 21 22
package nl.lumc.sasc.biopet.pipelines.basty

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
23

24 25
import nl.lumc.sasc.biopet.core.{MultiSampleQScript, PipelineCommand}
import nl.lumc.sasc.biopet.extensions.{Cat, Raxml, RunGubbins}
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.shiva.Shiva
27
import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import nl.lumc.sasc.biopet.utils.config.Configurable
29
import org.broadinstitute.gatk.queue.QScript
30

31
class Basty(val parent: Configurable) extends QScript with MultiSampleQScript { qscript =>
32

Peter van 't Hof's avatar
Peter van 't Hof committed
33
  def this() = this(null)
34

Peter van 't Hof's avatar
Peter van 't Hof committed
35 36 37 38 39 40 41
  case class FastaOutput(variants: File, consensus: File, consensusVariants: File) {
    def summaryFiles(prefix: Option[String] = None) = Map(
      s"${prefix.map(_ + "_").getOrElse("")}variants_fasta" -> variants,
      s"${prefix.map(_ + "_").getOrElse("")}consensus_fasta" -> consensus,
      s"${prefix.map(_ + "_").getOrElse("")}consensus_variants_fasta" -> consensusVariants
    )
  }
42

43
  def variantcallers = List("unifiedgenotyper")
Peter van 't Hof's avatar
Peter van 't Hof committed
44

45
  val numBoot: Int = config("boot_runs", default = 100, namespace = "raxml").asInt
Peter van 't Hof's avatar
Peter van 't Hof committed
46

47
  override def defaults = Map(
48
    "ploidy" -> 1,
Peter van 't Hof's avatar
Peter van 't Hof committed
49
    "variantcallers" -> variantcallers
50
  )
51

Peter van 't Hof's avatar
Peter van 't Hof committed
52
  lazy val shiva = new Shiva(qscript)
53

Peter van 't Hof's avatar
Peter van 't Hof committed
54
  def summaryFiles: Map[String, File] = Map()
55

Peter van 't Hof's avatar
Peter van 't Hof committed
56
  def summarySettings: Map[String, Any] = Map("boot_runs" -> numBoot)
57

58 59
  def makeSample(id: String) = new Sample(id)
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
60 61
    def summaryFiles: Map[String, File] =
      output.summaryFiles() ++ outputSnps.summaryFiles(Some("snps_only"))
62

Peter van 't Hof's avatar
Peter van 't Hof committed
63
    def summaryStats: Map[String, Any] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
64

Peter van 't Hof's avatar
Peter van 't Hof committed
65
    override def summarySettings: Map[String, Any] = Map()
66

67
    def makeLibrary(id: String) = new Library(id)
68
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
69
      def summaryFiles: Map[String, File] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
70

Peter van 't Hof's avatar
Peter van 't Hof committed
71
      def summaryStats: Map[String, Any] = Map()
72

Peter van 't Hof's avatar
Peter van 't Hof committed
73
      override def summarySettings: Map[String, Any] = Map()
74

Peter van 't Hof's avatar
Peter van 't Hof committed
75
      protected def addJobs(): Unit = {}
76 77
    }

78 79 80
    var output: FastaOutput = _
    var outputSnps: FastaOutput = _

Peter van 't Hof's avatar
Peter van 't Hof committed
81
    protected def addJobs(): Unit = {
82
      addPerLibJobs()
83 84 85 86
      output = addGenerateFasta(sampleId, sampleDir)
      outputSnps = addGenerateFasta(sampleId, sampleDir, snpsOnly = true)
    }
  }
87 88

  def init() {
Peter van 't Hof's avatar
Peter van 't Hof committed
89
    shiva.outputDir = outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
90
    shiva.init()
91 92 93
  }

  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
94
    shiva.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
95 96
    addAll(shiva.functions)
    addSummaryQScript(shiva)
97

98 99
    inputFiles :::= shiva.inputFiles

Peter van 't Hof's avatar
Peter van 't Hof committed
100
    addSamplesJobs()
101 102 103
  }

  def addMultiSampleJobs(): Unit = {
104 105 106 107 108 109 110 111 112 113 114 115 116
    val refVariants = addGenerateFasta(
      null,
      new File(outputDir, "fastas" + File.separator + "reference"),
      outputName = "reference")
    val refVariantSnps = addGenerateFasta(
      null,
      new File(outputDir, "fastas" + File.separator + "reference"),
      outputName = "reference",
      snpsOnly = true)

    val catVariants = Cat(this,
                          refVariants.variants :: samples.map(_._2.output.variants).toList,
                          new File(outputDir, "fastas" + File.separator + "variant.fasta"))
117
    add(catVariants)
118 119 120
    val catVariantsSnps = Cat(
      this,
      refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList,
Peter van 't Hof's avatar
Peter van 't Hof committed
121
      new File(outputDir, "fastas" + File.separator + "variant.snps_only.fasta"))
122 123
    add(catVariantsSnps)

124 125 126
    val catConsensus = Cat(this,
                           refVariants.consensus :: samples.map(_._2.output.consensus).toList,
                           new File(outputDir, "fastas" + File.separator + "consensus.fasta"))
127
    add(catConsensus)
128 129 130
    val catConsensusSnps = Cat(
      this,
      refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList,
Peter van 't Hof's avatar
Peter van 't Hof committed
131
      new File(outputDir, "fastas" + File.separator + "consensus.snps_only.fasta"))
132 133
    add(catConsensusSnps)

134 135 136 137 138
    val catConsensusVariants = Cat(
      this,
      refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.fasta")
    )
139
    add(catConsensusVariants)
140 141 142 143 144
    val catConsensusVariantsSnps = Cat(
      this,
      refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.snps_only.fasta")
    )
145 146 147
    add(catConsensusVariantsSnps)

    val seed: Int = config("seed", default = 12345)
Peter van 't Hof's avatar
Peter van 't Hof committed
148 149 150
    def addTreeJobs(variants: File, concensusVariants: File, outputDir: File, outputName: String) {
      val dirSufixRaxml = new File(outputDir, "raxml")
      val dirSufixGubbins = new File(outputDir, "gubbins")
Peter van 't Hof's avatar
Peter van 't Hof committed
151

152
      val raxmlMl = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
153
      raxmlMl.input = variants
154
      raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
155
      raxmlMl.p = Some(seed)
156
      raxmlMl.n = outputName + "_ml"
157
      raxmlMl.w = dirSufixRaxml
Sander Bollen's avatar
Sander Bollen committed
158
      raxmlMl.N = config("ml_runs", default = 20, namespace = "raxml")
159 160 161 162 163
      add(raxmlMl)

      val r = new scala.util.Random(seed)
      val bootList = for (t <- 0 until numBoot) yield {
        val raxmlBoot = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
164
        raxmlBoot.input = variants
165
        raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
166
        raxmlBoot.p = Some(seed)
Peter van 't Hof's avatar
Peter van 't Hof committed
167
        raxmlBoot.b = Some(math.abs(r.nextInt()))
Peter van 't Hof's avatar
Peter van 't Hof committed
168
        raxmlBoot.w = dirSufixRaxml
Peter van 't Hof's avatar
Peter van 't Hof committed
169
        raxmlBoot.N = Some(1)
170 171
        raxmlBoot.n = outputName + "_boot_" + t
        add(raxmlBoot)
172
        raxmlBoot.getBootstrapFile.get
173 174
      }

175
      val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list"))
176 177 178
      add(cat)

      val raxmlBi = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
179
      raxmlBi.input = concensusVariants
180
      raxmlBi.t = raxmlMl.getBestTreeFile
181
      raxmlBi.z = Some(cat.output)
182
      raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
183
      raxmlBi.p = Some(seed)
184 185
      raxmlBi.f = "b"
      raxmlBi.n = outputName + "_bi"
186
      raxmlBi.w = dirSufixRaxml
187
      add(raxmlBi)
188 189

      val gubbins = new RunGubbins(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
190
      gubbins.fastafile = concensusVariants
191
      gubbins.startingTree = raxmlBi.getBipartitionsFile
Peter van 't Hof's avatar
Peter van 't Hof committed
192
      gubbins.outputDirectory = dirSufixGubbins
193
      add(gubbins)
194 195
    }

196 197 198 199 200 201 202 203
    addTreeJobs(catVariantsSnps.output,
                catConsensusVariantsSnps.output,
                new File(outputDir, "trees" + File.separator + "snps_only"),
                "snps_only")
    addTreeJobs(catVariants.output,
                catConsensusVariants.output,
                new File(outputDir, "trees" + File.separator + "snps_indels"),
                "snps_indels")
204

205 206
  }

207 208 209
  def addGenerateFasta(sampleName: String,
                       outputDir: File,
                       outputName: String = null,
210 211 212
                       snpsOnly: Boolean = false): FastaOutput = {
    val bastyGenerateFasta = new BastyGenerateFasta(this)
    bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
213
    bastyGenerateFasta.inputVcf = shiva.multisampleVariantCalling.get.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
214 215
    if (shiva.samples.contains(sampleName)) {
      bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get
216
    }
217 218 219 220 221 222
    bastyGenerateFasta.outputVariants = new File(
      outputDir,
      bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
    bastyGenerateFasta.outputConsensus = new File(
      outputDir,
      bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
223 224 225 226
    bastyGenerateFasta.outputConsensusVariants = new File(
      outputDir,
      bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only"
                                                               else "") + ".fasta")
227 228
    bastyGenerateFasta.sampleName = sampleName
    bastyGenerateFasta.snpsOnly = snpsOnly
229
    qscript.add(bastyGenerateFasta)
230 231 232
    FastaOutput(bastyGenerateFasta.outputVariants,
                bastyGenerateFasta.outputConsensus,
                bastyGenerateFasta.outputConsensusVariants)
233 234
  }
}
Peter van 't Hof's avatar
Peter van 't Hof committed
235

236
object Basty extends PipelineCommand