Basty.scala 9.07 KB
Newer Older
bow's avatar
bow committed
1
/**
2
3
4
5
6
7
8
9
10
11
12
13
14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
15
/**
16
17
18
19
  * Due to the license issue with GATK, this part of Biopet can only be used inside the
  * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
  * on how to use this protected part of biopet or contact us at sasc@lumc.nl
  */
20
21
22
package nl.lumc.sasc.biopet.pipelines.basty

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
23

Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
25
26
import nl.lumc.sasc.biopet.core.{MultiSampleQScript, PipelineCommand}
import nl.lumc.sasc.biopet.extensions.{Cat, Raxml, RunGubbins}
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.pipelines.shiva.{Shiva, ShivaReport}
28
import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import nl.lumc.sasc.biopet.utils.config.Configurable
30
import org.broadinstitute.gatk.queue.QScript
31

32
class Basty(val parent: Configurable) extends QScript with MultiSampleQScript { qscript =>
33

Peter van 't Hof's avatar
Peter van 't Hof committed
34
  def this() = this(null)
35

Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
38
39
40
41
42
  case class FastaOutput(variants: File, consensus: File, consensusVariants: File) {
    def summaryFiles(prefix: Option[String] = None) = Map(
      s"${prefix.map(_ + "_").getOrElse("")}variants_fasta" -> variants,
      s"${prefix.map(_ + "_").getOrElse("")}consensus_fasta" -> consensus,
      s"${prefix.map(_ + "_").getOrElse("")}consensus_variants_fasta" -> consensusVariants
    )
  }
43

44
  def variantcallers = List("unifiedgenotyper")
Peter van 't Hof's avatar
Peter van 't Hof committed
45

46
  val numBoot: Int = config("boot_runs", default = 100, namespace = "raxml").asInt
Peter van 't Hof's avatar
Peter van 't Hof committed
47

48
49
  val executeGubbins: Boolean = config("execute_gubbins", default = true)

50
  override def defaults = Map(
51
    "ploidy" -> 1,
Peter van 't Hof's avatar
Peter van 't Hof committed
52
    "variantcallers" -> variantcallers
53
  )
54

Peter van 't Hof's avatar
Peter van 't Hof committed
55
  lazy val shiva = new Shiva(qscript)
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57
58
59
60
61
62
63
  override def reportClass: Option[ReportBuilderExtension] = {
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
    shiva.summaryDbFile = summaryDbFile
    Some(shiva)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
64
  def summaryFiles: Map[String, File] = Map()
65

Peter van 't Hof's avatar
Peter van 't Hof committed
66
  def summarySettings: Map[String, Any] = Map("boot_runs" -> numBoot)
67

68
69
  def makeSample(id: String) = new Sample(id)
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
70
71
    def summaryFiles: Map[String, File] =
      output.summaryFiles() ++ outputSnps.summaryFiles(Some("snps_only"))
72

Peter van 't Hof's avatar
Peter van 't Hof committed
73
    def summaryStats: Map[String, Any] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
74

Peter van 't Hof's avatar
Peter van 't Hof committed
75
    override def summarySettings: Map[String, Any] = Map()
76

77
    def makeLibrary(id: String) = new Library(id)
78
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
79
      def summaryFiles: Map[String, File] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
      def summaryStats: Map[String, Any] = Map()
82

Peter van 't Hof's avatar
Peter van 't Hof committed
83
      override def summarySettings: Map[String, Any] = Map()
84

Peter van 't Hof's avatar
Peter van 't Hof committed
85
      protected def addJobs(): Unit = {}
86
87
    }

88
89
90
    var output: FastaOutput = _
    var outputSnps: FastaOutput = _

Peter van 't Hof's avatar
Peter van 't Hof committed
91
    protected def addJobs(): Unit = {
92
      addPerLibJobs()
93
94
95
96
      output = addGenerateFasta(sampleId, sampleDir)
      outputSnps = addGenerateFasta(sampleId, sampleDir, snpsOnly = true)
    }
  }
97
98

  def init() {
Peter van 't Hof's avatar
Peter van 't Hof committed
99
    shiva.outputDir = outputDir
100
101
102
  }

  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
103
    add(shiva)
104

105
106
    inputFiles :::= shiva.inputFiles

Peter van 't Hof's avatar
Peter van 't Hof committed
107
    addSamplesJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
108
    addSummaryJobs()
109
110
111
  }

  def addMultiSampleJobs(): Unit = {
112
113
114
115
116
117
118
119
120
121
122
123
124
    val refVariants = addGenerateFasta(
      null,
      new File(outputDir, "fastas" + File.separator + "reference"),
      outputName = "reference")
    val refVariantSnps = addGenerateFasta(
      null,
      new File(outputDir, "fastas" + File.separator + "reference"),
      outputName = "reference",
      snpsOnly = true)

    val catVariants = Cat(this,
                          refVariants.variants :: samples.map(_._2.output.variants).toList,
                          new File(outputDir, "fastas" + File.separator + "variant.fasta"))
125
    add(catVariants)
126
127
128
    val catVariantsSnps = Cat(
      this,
      refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList,
Peter van 't Hof's avatar
Peter van 't Hof committed
129
      new File(outputDir, "fastas" + File.separator + "variant.snps_only.fasta"))
130
131
    add(catVariantsSnps)

132
133
134
    val catConsensus = Cat(this,
                           refVariants.consensus :: samples.map(_._2.output.consensus).toList,
                           new File(outputDir, "fastas" + File.separator + "consensus.fasta"))
135
    add(catConsensus)
136
137
138
    val catConsensusSnps = Cat(
      this,
      refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList,
Peter van 't Hof's avatar
Peter van 't Hof committed
139
      new File(outputDir, "fastas" + File.separator + "consensus.snps_only.fasta"))
140
141
    add(catConsensusSnps)

142
143
144
145
146
    val catConsensusVariants = Cat(
      this,
      refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.fasta")
    )
147
    add(catConsensusVariants)
148
149
150
151
152
    val catConsensusVariantsSnps = Cat(
      this,
      refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.snps_only.fasta")
    )
153
154
155
    add(catConsensusVariantsSnps)

    val seed: Int = config("seed", default = 12345)
Peter van 't Hof's avatar
Peter van 't Hof committed
156
157
158
    def addTreeJobs(variants: File, concensusVariants: File, outputDir: File, outputName: String) {
      val dirSufixRaxml = new File(outputDir, "raxml")
      val dirSufixGubbins = new File(outputDir, "gubbins")
Peter van 't Hof's avatar
Peter van 't Hof committed
159

160
      val raxmlMl = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
161
      raxmlMl.input = variants
162
      raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
163
      raxmlMl.p = Some(seed)
164
      raxmlMl.n = outputName + "_ml"
165
      raxmlMl.w = dirSufixRaxml
Sander Bollen's avatar
Sander Bollen committed
166
      raxmlMl.N = config("ml_runs", default = 20, namespace = "raxml")
167
168
169
170
171
      add(raxmlMl)

      val r = new scala.util.Random(seed)
      val bootList = for (t <- 0 until numBoot) yield {
        val raxmlBoot = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
172
        raxmlBoot.input = variants
173
        raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
174
        raxmlBoot.p = Some(seed)
Peter van 't Hof's avatar
Peter van 't Hof committed
175
        raxmlBoot.b = Some(math.abs(r.nextInt()))
Peter van 't Hof's avatar
Peter van 't Hof committed
176
        raxmlBoot.w = dirSufixRaxml
Peter van 't Hof's avatar
Peter van 't Hof committed
177
        raxmlBoot.N = Some(1)
178
179
        raxmlBoot.n = outputName + "_boot_" + t
        add(raxmlBoot)
180
        raxmlBoot.getBootstrapFile.get
181
182
      }

183
      val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list"))
184
185
186
      add(cat)

      val raxmlBi = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
187
      raxmlBi.input = concensusVariants
188
      raxmlBi.t = raxmlMl.getBestTreeFile
189
      raxmlBi.z = Some(cat.output)
190
      raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
191
      raxmlBi.p = Some(seed)
192
193
      raxmlBi.f = "b"
      raxmlBi.n = outputName + "_bi"
194
      raxmlBi.w = dirSufixRaxml
195
      add(raxmlBi)
196

197
198
199
200
201
202
203
      if (executeGubbins) {
        val gubbins = new RunGubbins(this)
        gubbins.fastafile = concensusVariants
        gubbins.startingTree = raxmlBi.getBipartitionsFile
        gubbins.outputDirectory = dirSufixGubbins
        add(gubbins)
      }
204
205
    }

206
207
208
209
210
211
212
213
    addTreeJobs(catVariantsSnps.output,
                catConsensusVariantsSnps.output,
                new File(outputDir, "trees" + File.separator + "snps_only"),
                "snps_only")
    addTreeJobs(catVariants.output,
                catConsensusVariants.output,
                new File(outputDir, "trees" + File.separator + "snps_indels"),
                "snps_indels")
214

215
216
  }

217
218
219
  def addGenerateFasta(sampleName: String,
                       outputDir: File,
                       outputName: String = null,
220
221
222
                       snpsOnly: Boolean = false): FastaOutput = {
    val bastyGenerateFasta = new BastyGenerateFasta(this)
    bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
223
    bastyGenerateFasta.inputVcf = shiva.multisampleVariantCalling.get.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
224
225
    if (shiva.samples.contains(sampleName)) {
      bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get
226
    }
227
228
229
230
231
232
    bastyGenerateFasta.outputVariants = new File(
      outputDir,
      bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
    bastyGenerateFasta.outputConsensus = new File(
      outputDir,
      bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
233
234
235
236
    bastyGenerateFasta.outputConsensusVariants = new File(
      outputDir,
      bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only"
                                                               else "") + ".fasta")
237
238
    bastyGenerateFasta.sampleName = sampleName
    bastyGenerateFasta.snpsOnly = snpsOnly
239
    qscript.add(bastyGenerateFasta)
240
241
242
    FastaOutput(bastyGenerateFasta.outputVariants,
                bastyGenerateFasta.outputConsensus,
                bastyGenerateFasta.outputConsensusVariants)
243
244
  }
}
Peter van 't Hof's avatar
Peter van 't Hof committed
245

246
object Basty extends PipelineCommand