Basty.scala 7.96 KB
Newer Older
bow's avatar
bow committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16 17 18 19
/**
 * Due to the license issue with GATK, this part of Biopet can only be used inside the
 * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
 * on how to use this protected part of biopet or contact us at sasc@lumc.nl
 */
20 21 22
package nl.lumc.sasc.biopet.pipelines.basty

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
23

24
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins }
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.shiva.Shiva
27
import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import nl.lumc.sasc.biopet.utils.config.Configurable
29
import org.broadinstitute.gatk.queue.QScript
30

31 32 33
class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
34
  def this() = this(null)
35 36

  case class FastaOutput(variants: File, consensus: File, consensusVariants: File)
37

38
  def variantcallers = List("unifiedgenotyper")
Peter van 't Hof's avatar
Peter van 't Hof committed
39

40
  override def defaults = Map(
41
    "ploidy" -> 1,
Peter van 't Hof's avatar
Peter van 't Hof committed
42
    "variantcallers" -> variantcallers
43
  )
44

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  lazy val shiva = new Shiva(qscript)
46

Sander Bollen's avatar
Sander Bollen committed
47
  def summaryFile: File = new File(outputDir, "Basty.summary.json")
48

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  //TODO: Add summary
50 51
  def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
52
  //TODO: Add summary
53 54
  def summarySettings: Map[String, Any] = Map()

55 56
  def makeSample(id: String) = new Sample(id)
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
57
    //TODO: Add summary
58 59
    def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
60
    //TODO: Add summary
61 62
    def summaryStats: Map[String, Any] = Map()

63
    def makeLibrary(id: String) = new Library(id)
64
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
65
      //TODO: Add summary
66 67
      def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
68
      //TODO: Add summary
69 70
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
71
      protected def addJobs(): Unit = {}
72 73
    }

74 75 76
    var output: FastaOutput = _
    var outputSnps: FastaOutput = _

Peter van 't Hof's avatar
Peter van 't Hof committed
77
    protected def addJobs(): Unit = {
78
      addPerLibJobs()
79 80 81 82
      output = addGenerateFasta(sampleId, sampleDir)
      outputSnps = addGenerateFasta(sampleId, sampleDir, snpsOnly = true)
    }
  }
83 84

  def init() {
Peter van 't Hof's avatar
Peter van 't Hof committed
85
    shiva.outputDir = outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
86
    shiva.init()
87 88 89
  }

  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
90
    shiva.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
91 92
    addAll(shiva.functions)
    addSummaryQScript(shiva)
93

94 95
    inputFiles :::= shiva.inputFiles

Peter van 't Hof's avatar
Peter van 't Hof committed
96
    addSamplesJobs()
97 98 99
  }

  def addMultiSampleJobs(): Unit = {
100 101
    val refVariants = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference")
    val refVariantSnps = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference", snpsOnly = true)
102

Peter van 't Hof's avatar
Peter van 't Hof committed
103 104
    val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList,
      new File(outputDir, "fastas" + File.separator + "variant.fasta"))
105
    add(catVariants)
Peter van 't Hof's avatar
Peter van 't Hof committed
106 107
    val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList,
      new File(outputDir, "fastas" + File.separator + "variant.snps_only.fasta"))
108 109
    add(catVariantsSnps)

Peter van 't Hof's avatar
Peter van 't Hof committed
110 111
    val catConsensus = Cat(this, refVariants.consensus :: samples.map(_._2.output.consensus).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.fasta"))
112
    add(catConsensus)
Peter van 't Hof's avatar
Peter van 't Hof committed
113 114
    val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.snps_only.fasta"))
115 116
    add(catConsensusSnps)

Peter van 't Hof's avatar
Peter van 't Hof committed
117 118
    val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.fasta"))
119
    add(catConsensusVariants)
Peter van 't Hof's avatar
Peter van 't Hof committed
120 121
    val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.snps_only.fasta"))
122 123 124
    add(catConsensusVariantsSnps)

    val seed: Int = config("seed", default = 12345)
Peter van 't Hof's avatar
Peter van 't Hof committed
125 126 127
    def addTreeJobs(variants: File, concensusVariants: File, outputDir: File, outputName: String) {
      val dirSufixRaxml = new File(outputDir, "raxml")
      val dirSufixGubbins = new File(outputDir, "gubbins")
Peter van 't Hof's avatar
Peter van 't Hof committed
128

129
      val raxmlMl = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
130
      raxmlMl.input = variants
131
      raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
132
      raxmlMl.p = Some(seed)
133
      raxmlMl.n = outputName + "_ml"
134
      raxmlMl.w = dirSufixRaxml
Sander Bollen's avatar
Sander Bollen committed
135
      raxmlMl.N = config("ml_runs", default = 20, namespace = "raxml")
136 137 138
      add(raxmlMl)

      val r = new scala.util.Random(seed)
Sander Bollen's avatar
Sander Bollen committed
139
      val numBoot = config("boot_runs", default = 100, namespace = "raxml").asInt
140 141
      val bootList = for (t <- 0 until numBoot) yield {
        val raxmlBoot = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
142
        raxmlBoot.input = variants
143
        raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
144
        raxmlBoot.p = Some(seed)
Peter van 't Hof's avatar
Peter van 't Hof committed
145
        raxmlBoot.b = Some(math.abs(r.nextInt()))
Peter van 't Hof's avatar
Peter van 't Hof committed
146
        raxmlBoot.w = dirSufixRaxml
Peter van 't Hof's avatar
Peter van 't Hof committed
147
        raxmlBoot.N = Some(1)
148 149
        raxmlBoot.n = outputName + "_boot_" + t
        add(raxmlBoot)
150
        raxmlBoot.getBootstrapFile.get
151 152
      }

153
      val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list"))
154 155 156
      add(cat)

      val raxmlBi = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
157
      raxmlBi.input = concensusVariants
158
      raxmlBi.t = raxmlMl.getBestTreeFile
159
      raxmlBi.z = Some(cat.output)
160
      raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
161
      raxmlBi.p = Some(seed)
162 163
      raxmlBi.f = "b"
      raxmlBi.n = outputName + "_bi"
164
      raxmlBi.w = dirSufixRaxml
165
      add(raxmlBi)
166 167

      val gubbins = new RunGubbins(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
168
      gubbins.fastafile = concensusVariants
169
      gubbins.startingTree = raxmlBi.getBipartitionsFile
Peter van 't Hof's avatar
Peter van 't Hof committed
170
      gubbins.outputDirectory = dirSufixGubbins
171
      add(gubbins)
172 173
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
174 175 176 177
    addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output,
      new File(outputDir, "trees" + File.separator + "snps_only"), "snps_only")
    addTreeJobs(catVariants.output, catConsensusVariants.output,
      new File(outputDir, "trees" + File.separator + "snps_indels"), "snps_indels")
178

179 180
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
181
  def addGenerateFasta(sampleName: String, outputDir: File, outputName: String = null,
182 183 184
                       snpsOnly: Boolean = false): FastaOutput = {
    val bastyGenerateFasta = new BastyGenerateFasta(this)
    bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
185
    bastyGenerateFasta.inputVcf = shiva.multisampleVariantCalling.get.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
186 187
    if (shiva.samples.contains(sampleName)) {
      bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get
188
    }
189 190 191
    bastyGenerateFasta.outputVariants = new File(outputDir, bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
    bastyGenerateFasta.outputConsensus = new File(outputDir, bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
    bastyGenerateFasta.outputConsensusVariants = new File(outputDir, bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
192 193
    bastyGenerateFasta.sampleName = sampleName
    bastyGenerateFasta.snpsOnly = snpsOnly
194
    qscript.add(bastyGenerateFasta)
Peter van 't Hof's avatar
Peter van 't Hof committed
195
    FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants)
196 197
  }
}
Peter van 't Hof's avatar
Peter van 't Hof committed
198

199
object Basty extends PipelineCommand