BastyTrait.scala 7.85 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
18
19
20
/**
 * Due to the license issue with GATK, this part of Biopet can only be used inside the
 * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
 * on how to use this protected part of biopet or contact us at sasc@lumc.nl
 */
21
22
23
package nl.lumc.sasc.biopet.pipelines.basty

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
24

25
import nl.lumc.sasc.biopet.core.MultiSampleQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
26
27
import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins }
import nl.lumc.sasc.biopet.pipelines.shiva.{ Shiva, ShivaTrait }
28
import nl.lumc.sasc.biopet.tools.BastyGenerateFasta
29
import nl.lumc.sasc.biopet.utils.ConfigUtils
30

Peter van 't Hof's avatar
Peter van 't Hof committed
31
trait BastyTrait extends MultiSampleQScript {
32
  qscript =>
33
34

  case class FastaOutput(variants: File, consensus: File, consensusVariants: File)
35

Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
  def variantcallers = List("freebayes")

38
39
  override def defaults = ConfigUtils.mergeMaps(Map(
    "ploidy" -> 1,
Peter van 't Hof's avatar
Peter van 't Hof committed
40
    "variantcallers" -> variantcallers
41
42
  ), super.defaults)

Peter van 't Hof's avatar
Peter van 't Hof committed
43
  lazy val shiva: ShivaTrait = new Shiva(qscript)
44

Sander Bollen's avatar
Sander Bollen committed
45
  def summaryFile: File = new File(outputDir, "Basty.summary.json")
46

Peter van 't Hof's avatar
Peter van 't Hof committed
47
  //TODO: Add summary
48
49
  def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
50
  //TODO: Add summary
51
52
  def summarySettings: Map[String, Any] = Map()

53
54
  def makeSample(id: String) = new Sample(id)
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
55
    //TODO: Add summary
56
57
    def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
58
    //TODO: Add summary
59
60
    def summaryStats: Map[String, Any] = Map()

61
    def makeLibrary(id: String) = new Library(id)
62
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
63
      //TODO: Add summary
64
65
      def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
66
      //TODO: Add summary
67
68
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
69
      protected def addJobs(): Unit = {}
70
71
    }

72
73
74
    var output: FastaOutput = _
    var outputSnps: FastaOutput = _

Peter van 't Hof's avatar
Peter van 't Hof committed
75
    protected def addJobs(): Unit = {
76
      addPerLibJobs()
77
78
79
80
      output = addGenerateFasta(sampleId, sampleDir)
      outputSnps = addGenerateFasta(sampleId, sampleDir, snpsOnly = true)
    }
  }
81
82

  def init() {
Peter van 't Hof's avatar
Peter van 't Hof committed
83
    shiva.outputDir = outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
84
    shiva.init()
85
86
87
  }

  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
88
    shiva.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
89
90
    addAll(shiva.functions)
    addSummaryQScript(shiva)
91

Peter van 't Hof's avatar
Peter van 't Hof committed
92
    addSamplesJobs()
93
94
95
  }

  def addMultiSampleJobs(): Unit = {
96
97
    val refVariants = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference")
    val refVariantSnps = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference", snpsOnly = true)
98

Peter van 't Hof's avatar
Peter van 't Hof committed
99
100
    val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList,
      new File(outputDir, "fastas" + File.separator + "variant.fasta"))
101
    add(catVariants)
Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
    val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList,
      new File(outputDir, "fastas" + File.separator + "variant.snps_only.fasta"))
104
105
    add(catVariantsSnps)

Peter van 't Hof's avatar
Peter van 't Hof committed
106
107
    val catConsensus = Cat(this, refVariants.consensus :: samples.map(_._2.output.consensus).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.fasta"))
108
    add(catConsensus)
Peter van 't Hof's avatar
Peter van 't Hof committed
109
110
    val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.snps_only.fasta"))
111
112
    add(catConsensusSnps)

Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
    val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.fasta"))
115
    add(catConsensusVariants)
Peter van 't Hof's avatar
Peter van 't Hof committed
116
117
    val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.snps_only.fasta"))
118
119
120
    add(catConsensusVariantsSnps)

    val seed: Int = config("seed", default = 12345)
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
123
    def addTreeJobs(variants: File, concensusVariants: File, outputDir: File, outputName: String) {
      val dirSufixRaxml = new File(outputDir, "raxml")
      val dirSufixGubbins = new File(outputDir, "gubbins")
Peter van 't Hof's avatar
Peter van 't Hof committed
124

125
      val raxmlMl = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
126
      raxmlMl.input = variants
127
      raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
128
      raxmlMl.p = Some(seed)
129
      raxmlMl.n = outputName + "_ml"
130
      raxmlMl.w = dirSufixRaxml
131
132
133
134
135
136
137
138
      raxmlMl.N = config("ml_runs", default = 20, submodule = "raxml")
      add(raxmlMl)

      val r = new scala.util.Random(seed)
      val numBoot = config("boot_runs", default = 100, submodule = "raxml").asInt
      val bootList = for (t <- 0 until numBoot) yield {
        val raxmlBoot = new Raxml(this)
        raxmlBoot.threads = 1
Peter van 't Hof's avatar
Peter van 't Hof committed
139
        raxmlBoot.input = variants
140
        raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
141
        raxmlBoot.p = Some(seed)
Peter van 't Hof's avatar
Peter van 't Hof committed
142
        raxmlBoot.b = Some(math.abs(r.nextInt()))
Peter van 't Hof's avatar
Peter van 't Hof committed
143
        raxmlBoot.w = dirSufixRaxml
Peter van 't Hof's avatar
Peter van 't Hof committed
144
        raxmlBoot.N = Some(1)
145
146
        raxmlBoot.n = outputName + "_boot_" + t
        add(raxmlBoot)
147
        raxmlBoot.getBootstrapFile.get
148
149
      }

150
      val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list"))
151
152
153
      add(cat)

      val raxmlBi = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
154
      raxmlBi.input = concensusVariants
155
      raxmlBi.t = raxmlMl.getBestTreeFile
156
      raxmlBi.z = Some(cat.output)
157
      raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
158
      raxmlBi.p = Some(seed)
159
160
      raxmlBi.f = "b"
      raxmlBi.n = outputName + "_bi"
161
      raxmlBi.w = dirSufixRaxml
162
      add(raxmlBi)
163
164

      val gubbins = new RunGubbins(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
165
      gubbins.fastafile = concensusVariants
166
      gubbins.startingTree = raxmlBi.getBipartitionsFile
Peter van 't Hof's avatar
Peter van 't Hof committed
167
      gubbins.outputDirectory = dirSufixGubbins
168
      add(gubbins)
169
170
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
173
174
    addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output,
      new File(outputDir, "trees" + File.separator + "snps_only"), "snps_only")
    addTreeJobs(catVariants.output, catConsensusVariants.output,
      new File(outputDir, "trees" + File.separator + "snps_indels"), "snps_indels")
175

176
177
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
178
  def addGenerateFasta(sampleName: String, outputDir: File, outputName: String = null,
179
180
181
                       snpsOnly: Boolean = false): FastaOutput = {
    val bastyGenerateFasta = new BastyGenerateFasta(this)
    bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
Peter van 't Hof's avatar
Peter van 't Hof committed
182
    bastyGenerateFasta.inputVcf = shiva.variantCalling.get.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
183
184
    if (shiva.samples.contains(sampleName)) {
      bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get
185
    }
186
187
188
    bastyGenerateFasta.outputVariants = new File(outputDir, bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
    bastyGenerateFasta.outputConsensus = new File(outputDir, bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
    bastyGenerateFasta.outputConsensusVariants = new File(outputDir, bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
189
190
    bastyGenerateFasta.sampleName = sampleName
    bastyGenerateFasta.snpsOnly = snpsOnly
191
    qscript.add(bastyGenerateFasta)
Peter van 't Hof's avatar
Peter van 't Hof committed
192
    FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants)
193
194
  }
}