Basty.scala 7.96 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
16
17
18
19
/**
 * Due to the license issue with GATK, this part of Biopet can only be used inside the
 * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
 * on how to use this protected part of biopet or contact us at sasc@lumc.nl
 */
20
21
22
package nl.lumc.sasc.biopet.pipelines.basty

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
23

24
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins }
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.shiva.Shiva
27
import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import nl.lumc.sasc.biopet.utils.config.Configurable
29
import org.broadinstitute.gatk.queue.QScript
30

31
32
33
class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
34
  def this() = this(null)
35
36

  case class FastaOutput(variants: File, consensus: File, consensusVariants: File)
37

38
  def variantcallers = List("unifiedgenotyper")
Peter van 't Hof's avatar
Peter van 't Hof committed
39

40
  override def defaults = Map(
41
    "ploidy" -> 1,
Peter van 't Hof's avatar
Peter van 't Hof committed
42
    "variantcallers" -> variantcallers
43
  )
44

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  lazy val shiva = new Shiva(qscript)
46

Sander Bollen's avatar
Sander Bollen committed
47
  def summaryFile: File = new File(outputDir, "Basty.summary.json")
48

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  //TODO: Add summary
50
51
  def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
52
  //TODO: Add summary
53
54
  def summarySettings: Map[String, Any] = Map()

55
56
  def makeSample(id: String) = new Sample(id)
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
57
    //TODO: Add summary
58
59
    def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
60
    //TODO: Add summary
61
62
    def summaryStats: Map[String, Any] = Map()

63
    def makeLibrary(id: String) = new Library(id)
64
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
65
      //TODO: Add summary
66
67
      def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
68
      //TODO: Add summary
69
70
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
71
      protected def addJobs(): Unit = {}
72
73
    }

74
75
76
    var output: FastaOutput = _
    var outputSnps: FastaOutput = _

Peter van 't Hof's avatar
Peter van 't Hof committed
77
    protected def addJobs(): Unit = {
78
      addPerLibJobs()
79
80
81
82
      output = addGenerateFasta(sampleId, sampleDir)
      outputSnps = addGenerateFasta(sampleId, sampleDir, snpsOnly = true)
    }
  }
83
84

  def init() {
Peter van 't Hof's avatar
Peter van 't Hof committed
85
    shiva.outputDir = outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
86
    shiva.init()
87
88
89
  }

  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
90
    shiva.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
    addAll(shiva.functions)
    addSummaryQScript(shiva)
93

94
95
    inputFiles :::= shiva.inputFiles

Peter van 't Hof's avatar
Peter van 't Hof committed
96
    addSamplesJobs()
97
98
99
  }

  def addMultiSampleJobs(): Unit = {
100
101
    val refVariants = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference")
    val refVariantSnps = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference", snpsOnly = true)
102

Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
    val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList,
      new File(outputDir, "fastas" + File.separator + "variant.fasta"))
105
    add(catVariants)
Peter van 't Hof's avatar
Peter van 't Hof committed
106
107
    val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList,
      new File(outputDir, "fastas" + File.separator + "variant.snps_only.fasta"))
108
109
    add(catVariantsSnps)

Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
    val catConsensus = Cat(this, refVariants.consensus :: samples.map(_._2.output.consensus).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.fasta"))
112
    add(catConsensus)
Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
    val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.snps_only.fasta"))
115
116
    add(catConsensusSnps)

Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
    val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.fasta"))
119
    add(catConsensusVariants)
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
    val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.snps_only.fasta"))
122
123
124
    add(catConsensusVariantsSnps)

    val seed: Int = config("seed", default = 12345)
Peter van 't Hof's avatar
Peter van 't Hof committed
125
126
127
    def addTreeJobs(variants: File, concensusVariants: File, outputDir: File, outputName: String) {
      val dirSufixRaxml = new File(outputDir, "raxml")
      val dirSufixGubbins = new File(outputDir, "gubbins")
Peter van 't Hof's avatar
Peter van 't Hof committed
128

129
      val raxmlMl = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
130
      raxmlMl.input = variants
131
      raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
132
      raxmlMl.p = Some(seed)
133
      raxmlMl.n = outputName + "_ml"
134
      raxmlMl.w = dirSufixRaxml
Sander Bollen's avatar
Sander Bollen committed
135
      raxmlMl.N = config("ml_runs", default = 20, namespace = "raxml")
136
137
138
      add(raxmlMl)

      val r = new scala.util.Random(seed)
Sander Bollen's avatar
Sander Bollen committed
139
      val numBoot = config("boot_runs", default = 100, namespace = "raxml").asInt
140
141
      val bootList = for (t <- 0 until numBoot) yield {
        val raxmlBoot = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
142
        raxmlBoot.input = variants
143
        raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
144
        raxmlBoot.p = Some(seed)
Peter van 't Hof's avatar
Peter van 't Hof committed
145
        raxmlBoot.b = Some(math.abs(r.nextInt()))
Peter van 't Hof's avatar
Peter van 't Hof committed
146
        raxmlBoot.w = dirSufixRaxml
Peter van 't Hof's avatar
Peter van 't Hof committed
147
        raxmlBoot.N = Some(1)
148
149
        raxmlBoot.n = outputName + "_boot_" + t
        add(raxmlBoot)
150
        raxmlBoot.getBootstrapFile.get
151
152
      }

153
      val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list"))
154
155
156
      add(cat)

      val raxmlBi = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
157
      raxmlBi.input = concensusVariants
158
      raxmlBi.t = raxmlMl.getBestTreeFile
159
      raxmlBi.z = Some(cat.output)
160
      raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
161
      raxmlBi.p = Some(seed)
162
163
      raxmlBi.f = "b"
      raxmlBi.n = outputName + "_bi"
164
      raxmlBi.w = dirSufixRaxml
165
      add(raxmlBi)
166
167

      val gubbins = new RunGubbins(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
168
      gubbins.fastafile = concensusVariants
169
      gubbins.startingTree = raxmlBi.getBipartitionsFile
Peter van 't Hof's avatar
Peter van 't Hof committed
170
      gubbins.outputDirectory = dirSufixGubbins
171
      add(gubbins)
172
173
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
174
175
176
177
    addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output,
      new File(outputDir, "trees" + File.separator + "snps_only"), "snps_only")
    addTreeJobs(catVariants.output, catConsensusVariants.output,
      new File(outputDir, "trees" + File.separator + "snps_indels"), "snps_indels")
178

179
180
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
181
  def addGenerateFasta(sampleName: String, outputDir: File, outputName: String = null,
182
183
184
                       snpsOnly: Boolean = false): FastaOutput = {
    val bastyGenerateFasta = new BastyGenerateFasta(this)
    bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
185
    bastyGenerateFasta.inputVcf = shiva.multisampleVariantCalling.get.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
186
187
    if (shiva.samples.contains(sampleName)) {
      bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get
188
    }
189
190
191
    bastyGenerateFasta.outputVariants = new File(outputDir, bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
    bastyGenerateFasta.outputConsensus = new File(outputDir, bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
    bastyGenerateFasta.outputConsensusVariants = new File(outputDir, bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
192
193
    bastyGenerateFasta.sampleName = sampleName
    bastyGenerateFasta.snpsOnly = snpsOnly
194
    qscript.add(bastyGenerateFasta)
Peter van 't Hof's avatar
Peter van 't Hof committed
195
    FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants)
196
197
  }
}
Peter van 't Hof's avatar
Peter van 't Hof committed
198

199
object Basty extends PipelineCommand