BastyTrait.scala 7.85 KB
Newer Older
bow's avatar
bow committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16 17 18 19 20
/**
 * Due to the license issue with GATK, this part of Biopet can only be used inside the
 * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
 * on how to use this protected part of biopet or contact us at sasc@lumc.nl
 */
21 22 23
package nl.lumc.sasc.biopet.pipelines.basty

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
24

25
import nl.lumc.sasc.biopet.core.MultiSampleQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
26 27
import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins }
import nl.lumc.sasc.biopet.pipelines.shiva.{ Shiva, ShivaTrait }
28
import nl.lumc.sasc.biopet.tools.BastyGenerateFasta
29
import nl.lumc.sasc.biopet.utils.ConfigUtils
30

Peter van 't Hof's avatar
Peter van 't Hof committed
31
trait BastyTrait extends MultiSampleQScript {
32
  qscript =>
33 34

  case class FastaOutput(variants: File, consensus: File, consensusVariants: File)
35

Peter van 't Hof's avatar
Peter van 't Hof committed
36 37
  def variantcallers = List("freebayes")

38 39
  override def defaults = ConfigUtils.mergeMaps(Map(
    "ploidy" -> 1,
Peter van 't Hof's avatar
Peter van 't Hof committed
40
    "variantcallers" -> variantcallers
41 42
  ), super.defaults)

Peter van 't Hof's avatar
Peter van 't Hof committed
43
  lazy val shiva: ShivaTrait = new Shiva(qscript)
44

Sander Bollen's avatar
Sander Bollen committed
45
  def summaryFile: File = new File(outputDir, "Basty.summary.json")
46

Peter van 't Hof's avatar
Peter van 't Hof committed
47
  //TODO: Add summary
48 49
  def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
50
  //TODO: Add summary
51 52
  def summarySettings: Map[String, Any] = Map()

53 54
  def makeSample(id: String) = new Sample(id)
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
55
    //TODO: Add summary
56 57
    def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
58
    //TODO: Add summary
59 60
    def summaryStats: Map[String, Any] = Map()

61
    def makeLibrary(id: String) = new Library(id)
62
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
63
      //TODO: Add summary
64 65
      def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
66
      //TODO: Add summary
67 68
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
69
      protected def addJobs(): Unit = {}
70 71
    }

72 73 74
    var output: FastaOutput = _
    var outputSnps: FastaOutput = _

Peter van 't Hof's avatar
Peter van 't Hof committed
75
    protected def addJobs(): Unit = {
76
      addPerLibJobs()
77 78 79 80
      output = addGenerateFasta(sampleId, sampleDir)
      outputSnps = addGenerateFasta(sampleId, sampleDir, snpsOnly = true)
    }
  }
81 82

  def init() {
Peter van 't Hof's avatar
Peter van 't Hof committed
83
    shiva.outputDir = outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
84
    shiva.init()
85 86 87
  }

  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
88
    shiva.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
89 90
    addAll(shiva.functions)
    addSummaryQScript(shiva)
91

Peter van 't Hof's avatar
Peter van 't Hof committed
92
    addSamplesJobs()
93 94 95
  }

  def addMultiSampleJobs(): Unit = {
96 97
    val refVariants = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference")
    val refVariantSnps = addGenerateFasta(null, new File(outputDir, "fastas" + File.separator + "reference"), outputName = "reference", snpsOnly = true)
98

Peter van 't Hof's avatar
Peter van 't Hof committed
99 100
    val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList,
      new File(outputDir, "fastas" + File.separator + "variant.fasta"))
101
    add(catVariants)
Peter van 't Hof's avatar
Peter van 't Hof committed
102 103
    val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList,
      new File(outputDir, "fastas" + File.separator + "variant.snps_only.fasta"))
104 105
    add(catVariantsSnps)

Peter van 't Hof's avatar
Peter van 't Hof committed
106 107
    val catConsensus = Cat(this, refVariants.consensus :: samples.map(_._2.output.consensus).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.fasta"))
108
    add(catConsensus)
Peter van 't Hof's avatar
Peter van 't Hof committed
109 110
    val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.snps_only.fasta"))
111 112
    add(catConsensusSnps)

Peter van 't Hof's avatar
Peter van 't Hof committed
113 114
    val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.fasta"))
115
    add(catConsensusVariants)
Peter van 't Hof's avatar
Peter van 't Hof committed
116 117
    val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList,
      new File(outputDir, "fastas" + File.separator + "consensus.variant.snps_only.fasta"))
118 119 120
    add(catConsensusVariantsSnps)

    val seed: Int = config("seed", default = 12345)
Peter van 't Hof's avatar
Peter van 't Hof committed
121 122 123
    def addTreeJobs(variants: File, concensusVariants: File, outputDir: File, outputName: String) {
      val dirSufixRaxml = new File(outputDir, "raxml")
      val dirSufixGubbins = new File(outputDir, "gubbins")
Peter van 't Hof's avatar
Peter van 't Hof committed
124

125
      val raxmlMl = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
126
      raxmlMl.input = variants
127
      raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
128
      raxmlMl.p = Some(seed)
129
      raxmlMl.n = outputName + "_ml"
130
      raxmlMl.w = dirSufixRaxml
131 132 133 134 135 136 137 138
      raxmlMl.N = config("ml_runs", default = 20, submodule = "raxml")
      add(raxmlMl)

      val r = new scala.util.Random(seed)
      val numBoot = config("boot_runs", default = 100, submodule = "raxml").asInt
      val bootList = for (t <- 0 until numBoot) yield {
        val raxmlBoot = new Raxml(this)
        raxmlBoot.threads = 1
Peter van 't Hof's avatar
Peter van 't Hof committed
139
        raxmlBoot.input = variants
140
        raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
141
        raxmlBoot.p = Some(seed)
Peter van 't Hof's avatar
Peter van 't Hof committed
142
        raxmlBoot.b = Some(math.abs(r.nextInt()))
Peter van 't Hof's avatar
Peter van 't Hof committed
143
        raxmlBoot.w = dirSufixRaxml
Peter van 't Hof's avatar
Peter van 't Hof committed
144
        raxmlBoot.N = Some(1)
145 146
        raxmlBoot.n = outputName + "_boot_" + t
        add(raxmlBoot)
147
        raxmlBoot.getBootstrapFile.get
148 149
      }

150
      val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list"))
151 152 153
      add(cat)

      val raxmlBi = new Raxml(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
154
      raxmlBi.input = concensusVariants
155
      raxmlBi.t = raxmlMl.getBestTreeFile
156
      raxmlBi.z = Some(cat.output)
157
      raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
Peter van 't Hof's avatar
Peter van 't Hof committed
158
      raxmlBi.p = Some(seed)
159 160
      raxmlBi.f = "b"
      raxmlBi.n = outputName + "_bi"
161
      raxmlBi.w = dirSufixRaxml
162
      add(raxmlBi)
163 164

      val gubbins = new RunGubbins(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
165
      gubbins.fastafile = concensusVariants
166
      gubbins.startingTree = raxmlBi.getBipartitionsFile
Peter van 't Hof's avatar
Peter van 't Hof committed
167
      gubbins.outputDirectory = dirSufixGubbins
168
      add(gubbins)
169 170
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
171 172 173 174
    addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output,
      new File(outputDir, "trees" + File.separator + "snps_only"), "snps_only")
    addTreeJobs(catVariants.output, catConsensusVariants.output,
      new File(outputDir, "trees" + File.separator + "snps_indels"), "snps_indels")
175

176 177
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
178
  def addGenerateFasta(sampleName: String, outputDir: File, outputName: String = null,
179 180 181
                       snpsOnly: Boolean = false): FastaOutput = {
    val bastyGenerateFasta = new BastyGenerateFasta(this)
    bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
Peter van 't Hof's avatar
Peter van 't Hof committed
182
    bastyGenerateFasta.inputVcf = shiva.variantCalling.get.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
183 184
    if (shiva.samples.contains(sampleName)) {
      bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get
185
    }
186 187 188
    bastyGenerateFasta.outputVariants = new File(outputDir, bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
    bastyGenerateFasta.outputConsensus = new File(outputDir, bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
    bastyGenerateFasta.outputConsensusVariants = new File(outputDir, bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta")
189 190
    bastyGenerateFasta.sampleName = sampleName
    bastyGenerateFasta.snpsOnly = snpsOnly
191
    qscript.add(bastyGenerateFasta)
Peter van 't Hof's avatar
Peter van 't Hof committed
192
    FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants)
193 194
  }
}