ShivaVariantcallingTrait.scala 8.67 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
19
20
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.File

import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
21
22
import nl.lumc.sasc.biopet.core.{ Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
25
26
import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats }
import nl.lumc.sasc.biopet.extensions.{ Bgzip, Tabix }
27
28
import nl.lumc.sasc.biopet.utils.Logging
import org.broadinstitute.gatk.utils.commandline.Input
Peter van 't Hof's avatar
Peter van 't Hof committed
29
30

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32
 * Common trait for ShivaVariantcalling
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
 * Created by pjvan_thof on 2/26/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
35
trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
38
39
40
  qscript =>

  @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
  var inputBams: List[File] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
41
  /** Name prefix, can override this methods if neeeded */
42
43
  def namePrefix: String = {
    (sampleId, libId) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
44
45
46
      case (Some(s), Some(l)) => s + "-" + l
      case (Some(s), _)       => s
      case _                  => config("name_prefix")
47
48
    }
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
49

50
51
  override def defaults = Map("bcftoolscall" -> Map("f" -> List("GQ")))

Peter van 't Hof's avatar
Peter van 't Hof committed
52
  /** Executed before script */
Peter van 't Hof's avatar
Peter van 't Hof committed
53
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
56
  /** Final merged output files of all variantcaller modes */
Peter van 't Hof's avatar
Peter van 't Hof committed
57
  def finalFile = new File(outputDir, namePrefix + ".final.vcf.gz")
58

Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
  /** Variantcallers requested by the config */
  protected val configCallers: Set[String] = config("variantcallers")
61

Peter van 't Hof's avatar
Peter van 't Hof committed
62
  /** This will add jobs for this pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
63
  def biopetScript(): Unit = {
64
65
    for (cal <- configCallers) {
      if (!callersList.exists(_.name == cal))
66
        Logging.addError("variantcaller '" + cal + "' does not exist, possible to use: " + callersList.map(_.name).mkString(", "))
67
68
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
69
    val callers = callersList.filter(x => configCallers.contains(x.name)).sortBy(_.prio)
Peter van 't Hof's avatar
Peter van 't Hof committed
70

Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
    require(inputBams.nonEmpty, "No input bams found")
    require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", "))
Peter van 't Hof's avatar
Peter van 't Hof committed
73

Peter van 't Hof's avatar
Peter van 't Hof committed
74
    val cv = new CombineVariants(qscript)
75
76
    cv.outputFile = finalFile
    cv.setKey = "VariantCaller"
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
79
    cv.genotypeMergeOptions = Some("PRIORITIZE")
    cv.rodPriorityList = callers.map(_.name).mkString(",")
    for (caller <- callers) {
Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
      caller.addJobs()
      cv.addInput(caller.outputFile, caller.name)
Peter van 't Hof's avatar
Peter van 't Hof committed
82
83
84
85
86
87

      val vcfStats = new VcfStats(qscript)
      vcfStats.input = caller.outputFile
      vcfStats.setOutputDir(new File(caller.outputDir, "vcfstats"))
      add(vcfStats)
      addSummarizable(vcfStats, namePrefix + "-vcfstats-" + caller.name)
Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
90
    }
    add(cv)

Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
93
    val vcfStats = new VcfStats(qscript)
    vcfStats.input = finalFile
    vcfStats.setOutputDir(new File(outputDir, "vcfstats"))
Peter van 't Hof's avatar
Peter van 't Hof committed
94
    vcfStats.infoTags :+= cv.setKey
Peter van 't Hof's avatar
Peter van 't Hof committed
95
    add(vcfStats)
Peter van 't Hof's avatar
Peter van 't Hof committed
96
    addSummarizable(vcfStats, namePrefix + "-vcfstats-final")
Peter van 't Hof's avatar
Peter van 't Hof committed
97

Peter van 't Hof's avatar
Peter van 't Hof committed
98
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
99
100
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
101
  /** Will generate all available variantcallers */
102
  protected def callersList: List[Variantcaller] = List(new Freebayes, new RawVcf, new Bcftools, new BcftoolsSingleSample)
Peter van 't Hof's avatar
Peter van 't Hof committed
103

Peter van 't Hof's avatar
Peter van 't Hof committed
104
  /** General trait for a variantcaller mode */
Peter van 't Hof's avatar
Peter van 't Hof committed
105
  trait Variantcaller {
Peter van 't Hof's avatar
Peter van 't Hof committed
106
    /** Name of mode, this should also be used in the config */
Peter van 't Hof's avatar
Peter van 't Hof committed
107
    val name: String
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109

    /** Output dir for this mode */
Peter van 't Hof's avatar
Peter van 't Hof committed
110
    def outputDir = new File(qscript.outputDir, name)
Peter van 't Hof's avatar
Peter van 't Hof committed
111
112

    /** Prio in merging  in the final file */
Peter van 't Hof's avatar
Peter van 't Hof committed
113
    protected val defaultPrio: Int
Peter van 't Hof's avatar
Peter van 't Hof committed
114
115

    /** Prio from the config */
Peter van 't Hof's avatar
Peter van 't Hof committed
116
    lazy val prio: Int = config("prio_" + name, default = defaultPrio)
Peter van 't Hof's avatar
Peter van 't Hof committed
117
118

    /** This should add the variantcaller jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
119
    def addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121

    /** Final output file of this mode */
Peter van 't Hof's avatar
Peter van 't Hof committed
122
    def outputFile: File
Peter van 't Hof's avatar
Peter van 't Hof committed
123
124
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
125
  /** default mode of freebayes */
Peter van 't Hof's avatar
Peter van 't Hof committed
126
127
128
129
  class Freebayes extends Variantcaller {
    val name = "freebayes"
    protected val defaultPrio = 7

Peter van 't Hof's avatar
Peter van 't Hof committed
130
    /** Final output file of this mode */
131
    def outputFile = new File(outputDir, namePrefix + ".freebayes.vcf.gz")
Peter van 't Hof's avatar
Peter van 't Hof committed
132
133
134
135

    def addJobs() {
      val fb = new nl.lumc.sasc.biopet.extensions.Freebayes(qscript)
      fb.bamfiles = inputBams
136
137
      fb.outputVcf = new File(outputDir, namePrefix + ".freebayes.vcf")
      fb.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
138
      add(fb)
139

Peter van 't Hof's avatar
Peter van 't Hof committed
140
      //TODO: need piping for this, see also issue #114
141
142
143
144
145
146
147
148
149
      val bz = new Bgzip(qscript)
      bz.input = List(fb.outputVcf)
      bz.output = outputFile
      add(bz)

      val ti = new Tabix(qscript)
      ti.input = bz.output
      ti.p = Some("vcf")
      add(ti)
Peter van 't Hof's avatar
Peter van 't Hof committed
150
151
152
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
153
  /** default mode of bcftools */
Peter van 't Hof's avatar
Peter van 't Hof committed
154
155
156
157
  class Bcftools extends Variantcaller {
    val name = "bcftools"
    protected val defaultPrio = 8

Peter van 't Hof's avatar
Peter van 't Hof committed
158
    /** Final output file of this mode */
Peter van 't Hof's avatar
Peter van 't Hof committed
159
160
161
162
163
164
    def outputFile = new File(outputDir, namePrefix + ".bcftools.vcf.gz")

    def addJobs() {
      val mp = new SamtoolsMpileup(qscript)
      mp.input = inputBams
      mp.u = true
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
165
      mp.reference = referenceFasta()
Peter van 't Hof's avatar
Peter van 't Hof committed
166
167

      val bt = new BcftoolsCall(qscript)
168
      bt.O = Some("z")
Peter van 't Hof's avatar
Peter van 't Hof committed
169
170
171
      bt.v = true
      bt.c = true

172
      add(mp | bt > outputFile)
173
      add(Tabix(qscript, outputFile))
Peter van 't Hof's avatar
Peter van 't Hof committed
174
175
176
    }
  }

177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
  /** default mode of bcftools */
  class BcftoolsSingleSample extends Variantcaller {
    val name = "bcftools_singlesample"
    protected val defaultPrio = 8

    /** Final output file of this mode */
    def outputFile = new File(outputDir, namePrefix + ".bcftools_singlesample.vcf.gz")

    def addJobs() {
      val sampleVcfs = for (inputBam <- inputBams) yield {
        val mp = new SamtoolsMpileup(qscript)
        mp.input :+= inputBam
        mp.u = true
        mp.reference = referenceFasta()

        val bt = new BcftoolsCall(qscript)
193
        bt.O = Some("z")
194
195
        bt.v = true
        bt.c = true
196
        bt.output = new File(outputDir, inputBam.getName + ".vcf.gz")
197

198
        add(mp | bt)
199
        add(Tabix(qscript, bt.output))
200
        bt.output
201
202
      }

203
204
205
206
207
208
      val bcfmerge = new BcftoolsMerge(qscript)
      bcfmerge.input = sampleVcfs
      bcfmerge.output = outputFile
      bcfmerge.O = Some("z")
      add(bcfmerge)
      add(Tabix(qscript, outputFile))
209
210
211
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
212
  /** Makes a vcf file from a mpileup without statistics */
Peter van 't Hof's avatar
Peter van 't Hof committed
213
214
  class RawVcf extends Variantcaller {
    val name = "raw"
Peter van 't Hof's avatar
Peter van 't Hof committed
215
216
217

    // This caller is designed as fallback when other variantcallers fails to report
    protected val defaultPrio = Int.MaxValue
Peter van 't Hof's avatar
Peter van 't Hof committed
218

Peter van 't Hof's avatar
Peter van 't Hof committed
219
    /** Final output file of this mode */
Peter van 't Hof's avatar
Peter van 't Hof committed
220
    def outputFile = new File(outputDir, namePrefix + ".raw.vcf.gz")
Peter van 't Hof's avatar
Peter van 't Hof committed
221

Peter van 't Hof's avatar
Peter van 't Hof committed
222
    def addJobs() {
Peter van 't Hof's avatar
Peter van 't Hof committed
223
      val rawFiles = inputBams.map(bamFile => {
224
225
226
227
228
229
        val mp = new SamtoolsMpileup(qscript) {
          override def configName = "samtoolsmpileup"
          override def defaults = Map("samtoolsmpileup" -> Map("disable_baq" -> true, "min_map_quality" -> 1))
        }
        mp.input :+= bamFile

Peter van 't Hof's avatar
Peter van 't Hof committed
230
231
232
        val m2v = new MpileupToVcf(qscript)
        m2v.inputBam = bamFile
        m2v.output = new File(outputDir, bamFile.getName.stripSuffix(".bam") + ".raw.vcf")
233
        add(mp | m2v)
Peter van 't Hof's avatar
Peter van 't Hof committed
234
235
236

        val vcfFilter = new VcfFilter(qscript) {
          override def configName = "vcffilter"
237
          override def defaults = Map("min_sample_depth" -> 8,
Peter van 't Hof's avatar
Peter van 't Hof committed
238
239
240
            "min_alternate_depth" -> 2,
            "min_samples_pass" -> 1,
            "filter_ref_calls" -> true
241
          )
Peter van 't Hof's avatar
Peter van 't Hof committed
242
243
244
245
246
247
248
        }
        vcfFilter.inputVcf = m2v.output
        vcfFilter.outputVcf = new File(outputDir, bamFile.getName.stripSuffix(".bam") + ".raw.filter.vcf.gz")
        add(vcfFilter)
        vcfFilter.outputVcf
      })

Peter van 't Hof's avatar
Peter van 't Hof committed
249
250
251
      val cv = new CombineVariants(qscript)
      cv.inputFiles = rawFiles
      cv.outputFile = outputFile
Peter van 't Hof's avatar
Peter van 't Hof committed
252
      cv.setKey = "null"
Peter van 't Hof's avatar
Peter van 't Hof committed
253
      cv.excludeNonVariants = true
Peter van 't Hof's avatar
Peter van 't Hof committed
254
      add(cv)
Peter van 't Hof's avatar
Peter van 't Hof committed
255
256
257
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
258
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
259
260
  def summaryFile = new File(outputDir, "ShivaVariantcalling.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
261
262
  /** Settings for the summary */
  def summarySettings = Map("variantcallers" -> configCallers.toList)
Peter van 't Hof's avatar
Peter van 't Hof committed
263

Peter van 't Hof's avatar
Peter van 't Hof committed
264
  /** Files for the summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
265
266
  def summaryFiles: Map[String, File] = {
    val callers: Set[String] = config("variantcallers")
Peter van 't Hof's avatar
Peter van 't Hof committed
267
    callersList.filter(x => callers.contains(x.name)).map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
268
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
269
}