Basty.scala 7.99 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
18
19
20
/**
 * Due to the license issue with GATK, this part of Biopet can only be used inside the
 * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
 * on how to use this protected part of biopet or contact us at sasc@lumc.nl
 */
21
22
23
package nl.lumc.sasc.biopet.pipelines.basty

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
24

25
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins }
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.pipelines.shiva.Shiva
28
import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import nl.lumc.sasc.biopet.utils.config.Configurable
30
import org.broadinstitute.gatk.queue.QScript
31

32
33
34
class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
35
  def this() = this(null)
36
37

  case class FastaOutput(variants: File, consensus: File, consensusVariants: File)
38

39
  def variantcallers = List("unifiedgenotyper")
Peter van 't Hof's avatar
Peter van 't Hof committed
40

41
  override def defaults = Map(
42
    "ploidy" -> 1,
Peter van 't Hof's avatar
Peter van 't Hof committed
43
    "variantcallers" -> variantcallers
44
  )
45

Peter van 't Hof's avatar
Peter van 't Hof committed
46
  lazy val shiva = new Shiva(qscript)
47

Sander Bollen's avatar
Sander Bollen committed
48
  def summaryFile: File = new File(outputDir, "Basty.summary.json")
49

Peter van 't Hof's avatar
Peter van 't Hof committed
50
  //TODO: Add summary
51
52
  def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
53
  //TODO: Add summary
54
55
  def summarySettings: Map[String, Any] = Map()

56
57
  def makeSample(id: String) = new Sample(id)
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
    //TODO: Add summary
59
60
    def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
61
    //TODO: Add summary
62
63
    def summaryStats: Map[String, Any] = Map()

64
    def makeLibrary(id: String) = new Library(id)
65
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
66
      //TODO: Add summary
67
68
      def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
69
      //TODO: Add summary
70
71
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
72
      protected def addJobs(): Unit = {}
73
74
    }

75
76
77
    var output: FastaOutput = _
    var outputSnps: FastaOutput = _

Peter van 't Hof's avatar
Peter van 't Hof committed
78
    protected def addJobs(): Unit = {
79
      addPerLibJobs()
80
81
82
83
      output = addGenerateFasta(sampleId, sampleDir)
      outputSnps = addGenerateFasta(sampleId, sampleDir, snpsOnly = true)
    }
  }
84
85

  def init() {
Peter van 't Hof's avatar
Peter van 't Hof committed
86
    shiva.outputDir = outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
87
    shiva.init()
88
89
90
  }

  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
91
    shiva.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
92
93
    addAll(shiva.functions)
    addSummaryQScript(shiva)
94

95
96
    inputFiles :::= shiva.inputFiles

Peter van 't Hof's avatar
Peter van 't Hof committed
97
    addSamplesJobs()
98
99
100
  }

  def addMultiSampleJobs(): Unit = {
101
102
    val refVariants = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference")
    val refVariantSnps = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference", snpsOnly = true)
103

Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
    val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList,
      new File(outputDir, "fastas" + File.separator + "variant.fasta"))
106
    add(catVariants)
Peter van 't Hof's avatar
Peter van 't Hof committed
107
108
    val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList,
      new File(outputDir, "fastas" + File.separator + "variant.snps_only.fasta"))
109
110
    add(catVariantsSnps)

Peter van 't Hof's avatar
Peter van 't Hof committed
111
112
    val catConsensus = Cat(this, refVariants.consensus :: samples.map(_._2.output.consensus).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.fasta"))
113
    add(catConsensus)
Peter van 't Hof's avatar
Peter van 't Hof committed
114
115
    val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.snps_only.fasta"))
116
117
    add(catConsensusSnps)

Peter van 't Hof's avatar
Peter van 't Hof committed
118
119
    val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.fasta"))
120
    add(catConsensusVariants)
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
    val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.snps_only.fasta"))
123
124
125
    add(catConsensusVariantsSnps)

    val seed: Int = config("seed", default = 12345)
Peter van 't Hof's avatar
Peter van 't Hof committed
126
127
128
    def addTreeJobs(variants: File, concensusVariants: File, outputDir: File, outputName: String) {
      val dirSufixRaxml = new File(outputDir, "raxml")
      val dirSufixGubbins = new File(outputDir, "gubbins")
Peter van 't Hof's avatar
Peter van 't Hof committed
129

130
      val raxmlMl = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
131
      raxmlMl.input = variants
132
      raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
133
      raxmlMl.p = Some(seed)
134
      raxmlMl.n = outputName + "_ml"
135
      raxmlMl.w = dirSufixRaxml
Sander Bollen's avatar
Sander Bollen committed
136
      raxmlMl.N = config("ml_runs", default = 20, namespace = "raxml")
137
138
139
      add(raxmlMl)

      val r = new scala.util.Random(seed)
Sander Bollen's avatar
Sander Bollen committed
140
      val numBoot = config("boot_runs", default = 100, namespace = "raxml").asInt
141
142
      val bootList = for (t <- 0 until numBoot) yield {
        val raxmlBoot = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
143
        raxmlBoot.input = variants
144
        raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
145
        raxmlBoot.p = Some(seed)
Peter van 't Hof's avatar
Peter van 't Hof committed
146
        raxmlBoot.b = Some(math.abs(r.nextInt()))
Peter van 't Hof's avatar
Peter van 't Hof committed
147
        raxmlBoot.w = dirSufixRaxml
Peter van 't Hof's avatar
Peter van 't Hof committed
148
        raxmlBoot.N = Some(1)
149
150
        raxmlBoot.n = outputName + "_boot_" + t
        add(raxmlBoot)
151
        raxmlBoot.getBootstrapFile.get
152
153
      }

154
      val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list"))
155
156
157
      add(cat)

      val raxmlBi = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
158
      raxmlBi.input = concensusVariants
159
      raxmlBi.t = raxmlMl.getBestTreeFile
160
      raxmlBi.z = Some(cat.output)
161
      raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
162
      raxmlBi.p = Some(seed)
163
164
      raxmlBi.f = "b"
      raxmlBi.n = outputName + "_bi"
165
      raxmlBi.w = dirSufixRaxml
166
      add(raxmlBi)
167
168

      val gubbins = new RunGubbins(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
169
      gubbins.fastafile = concensusVariants
170
      gubbins.startingTree = raxmlBi.getBipartitionsFile
Peter van 't Hof's avatar
Peter van 't Hof committed
171
      gubbins.outputDirectory = dirSufixGubbins
172
      add(gubbins)
173
174
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
175
176
177
178
    addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output,
      new File(outputDir, "trees" + File.separator + "snps_only"), "snps_only")
    addTreeJobs(catVariants.output, catConsensusVariants.output,
      new File(outputDir, "trees" + File.separator + "snps_indels"), "snps_indels")
179

180
181
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
182
  def addGenerateFasta(sampleName: String, outputDir: File, outputName: String = null,
183
184
185
                       snpsOnly: Boolean = false): FastaOutput = {
    val bastyGenerateFasta = new BastyGenerateFasta(this)
    bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
186
    bastyGenerateFasta.inputVcf = shiva.multisampleVariantCalling.get.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
187
188
    if (shiva.samples.contains(sampleName)) {
      bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get
189
    }
190
191
192
    bastyGenerateFasta.outputVariants = new File(outputDir, bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
    bastyGenerateFasta.outputConsensus = new File(outputDir, bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
    bastyGenerateFasta.outputConsensusVariants = new File(outputDir, bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
193
194
    bastyGenerateFasta.sampleName = sampleName
    bastyGenerateFasta.snpsOnly = snpsOnly
195
    qscript.add(bastyGenerateFasta)
Peter van 't Hof's avatar
Peter van 't Hof committed
196
    FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants)
197
198
  }
}
Peter van 't Hof's avatar
Peter van 't Hof committed
199

200
object Basty extends PipelineCommand