Basty.scala 6.79 KB
Newer Older
1
2
3
4
5
6
package nl.lumc.sasc.biopet.pipelines.basty

import java.io.File
import nl.lumc.sasc.biopet.core.MultiSampleQScript
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable
7
import nl.lumc.sasc.biopet.extensions.{ RunGubbins, Cat, Raxml }
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline
import nl.lumc.sasc.biopet.tools.BastyGenerateFasta
import org.broadinstitute.gatk.queue.QScript

class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
  def this() = this(null)

  class LibraryOutput extends AbstractLibraryOutput {
  }

  case class FastaOutput(variants: File, consensus: File, consensusVariants: File)
  class SampleOutput extends AbstractSampleOutput {
    var output: FastaOutput = _
    var outputSnps: FastaOutput = _
  }

  defaults ++= Map("ploidy" -> 1, "use_haplotypecaller" -> false, "use_unifiedgenotyper" -> true, "joint_variantcalling" -> true)

  var gatkPipeline: GatkPipeline = new GatkPipeline(this)
  gatkPipeline.jointVariantcalling = true

  def init() {
    gatkPipeline.outputDir = outputDir
    gatkPipeline.init
  }

  def biopetScript() {
    gatkPipeline.biopetScript
    addAll(gatkPipeline.functions)

    val refVariants = addGenerateFasta(null, outputDir + "reference/", outputName = "reference")
    val refVariantSnps = addGenerateFasta(null, outputDir + "reference/", outputName = "reference", snpsOnly = true)

    runSamplesJobs()

    val catVariants = Cat(this, refVariants.variants :: samplesOutput.map(_._2.output.variants).toList, outputDir + "fastas/variant.fasta")
    add(catVariants)
    val catVariantsSnps = Cat(this, refVariantSnps.variants :: samplesOutput.map(_._2.outputSnps.variants).toList, outputDir + "fastas/variant.snps_only.fasta")
    add(catVariantsSnps)

    val catConsensus = Cat(this, refVariants.consensus :: samplesOutput.map(_._2.output.consensus).toList, outputDir + "fastas/consensus.fasta")
    add(catConsensus)
    val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samplesOutput.map(_._2.outputSnps.consensus).toList, outputDir + "fastas/consensus.snps_only.fasta")
    add(catConsensusSnps)

    val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samplesOutput.map(_._2.output.consensusVariants).toList, outputDir + "fastas/consensus.variant.fasta")
    add(catConsensusVariants)
    val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samplesOutput.map(_._2.outputSnps.consensusVariants).toList, outputDir + "fastas/consensus.variant.snps_only.fasta")
    add(catConsensusVariantsSnps)

    val seed: Int = config("seed", default = 12345)
Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
61
62
    def addTreeJobs(input: File, outputDir: String, outputName: String) {
      val dirSufixRaxml = if (outputDir.endsWith(File.separator)) "raxml" else File.separator + "raxml"
      val dirSufixGubbins = if (outputDir.endsWith(File.separator)) "gubbins" else File.separator + "gubbins"

63
64
65
66
67
      val raxmlMl = new Raxml(this)
      raxmlMl.input = input
      raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
      raxmlMl.p = seed
      raxmlMl.n = outputName + "_ml"
Peter van 't Hof's avatar
Peter van 't Hof committed
68
      raxmlMl.w = outputDir + dirSufixRaxml
69
70
71
72
73
74
75
76
77
78
79
80
      raxmlMl.N = config("ml_runs", default = 20, submodule = "raxml")
      add(raxmlMl)

      val r = new scala.util.Random(seed)
      val numBoot = config("boot_runs", default = 100, submodule = "raxml").asInt
      val bootList = for (t <- 0 until numBoot) yield {
        val raxmlBoot = new Raxml(this)
        raxmlBoot.threads = 1
        raxmlBoot.input = input
        raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
        raxmlBoot.p = seed
        raxmlBoot.b = math.abs(r.nextInt)
Peter van 't Hof's avatar
Peter van 't Hof committed
81
        raxmlBoot.w = outputDir + dirSufixRaxml
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
        raxmlBoot.N = 1
        raxmlBoot.n = outputName + "_boot_" + t
        add(raxmlBoot)
        raxmlBoot.getBootstrapFile
      }

      val cat = Cat(this, bootList.toList, outputDir + "/boot_list")
      add(cat)

      val raxmlBi = new Raxml(this)
      raxmlBi.input = input
      raxmlBi.t = raxmlMl.getBestTreeFile
      raxmlBi.z = cat.output
      raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
      raxmlBi.p = seed
      raxmlBi.f = "b"
      raxmlBi.n = outputName + "_bi"
Peter van 't Hof's avatar
Peter van 't Hof committed
99
      raxmlBi.w = outputDir + dirSufixRaxml
100
      add(raxmlBi)
101
102
103
104

      val gubbins = new RunGubbins(this)
      gubbins.fastafile = input
      gubbins.startingTree = raxmlBi.getBipartitionsFile
Peter van 't Hof's avatar
Peter van 't Hof committed
105
      gubbins.outputDirectory = outputDir + dirSufixGubbins
106
      add(gubbins)
107
108
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
109
    addTreeJobs(catVariantsSnps.output, outputDir + "trees" + File.separator + "snps_only", "snps_only")
Peter van 't Hof's avatar
Peter van 't Hof committed
110
    addTreeJobs(catVariants.output, outputDir + "trees" + File.separator + "snps_indels", "snps_indels")
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
  }

  // Called for each sample
  def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
    val sampleOutput = new SampleOutput
    val sampleID: String = sampleConfig("ID").toString
    val sampleDir = globalSampleDir + sampleID + "/"

    sampleOutput.libraries = runLibraryJobs(sampleConfig)

    sampleOutput.output = addGenerateFasta(sampleID, sampleDir)
    sampleOutput.outputSnps = addGenerateFasta(sampleID, sampleDir, snpsOnly = true)

    return sampleOutput
  }

  // Called for each run from a sample
  def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
    val libraryOutput = new LibraryOutput

    val runID: String = runConfig("ID").toString
    val sampleID: String = sampleConfig("ID").toString
    val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/"

    return libraryOutput
  }

  def addGenerateFasta(sampleName: String, outputDir: String, outputName: String = null,
                       snpsOnly: Boolean = false): FastaOutput = {
    val bastyGenerateFasta = new BastyGenerateFasta(this)
    bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
    bastyGenerateFasta.inputVcf = gatkPipeline.multisampleVariantcalling.scriptOutput.finalVcfFile
    if (gatkPipeline.samplesOutput.contains(sampleName)) {
      bastyGenerateFasta.bamFile = gatkPipeline.samplesOutput(sampleName).variantcalling.bamFiles.head
    }
    bastyGenerateFasta.outputVariants = outputDir + bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta"
    bastyGenerateFasta.outputConsensus = outputDir + bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta"
    bastyGenerateFasta.outputConsensusVariants = outputDir + bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta"
    bastyGenerateFasta.sampleName = sampleName
    bastyGenerateFasta.snpsOnly = snpsOnly
    add(bastyGenerateFasta)
    return FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants)
  }
}

object Basty extends PipelineCommand