ShivaVariantcallingTrait.scala 7.15 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
package nl.lumc.sasc.biopet.pipelines.shiva

import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
19
import nl.lumc.sasc.biopet.core.{ Reference, SampleLibraryTag }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.extensions.Tabix
21
22
import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance }
import nl.lumc.sasc.biopet.extensions.tools.VcfStats
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.vt.{ VtDecompose, VtNormalize }
24
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
25
26
27
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers._
import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
28
29

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
30
31
 * Common trait for ShivaVariantcalling
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
 * Created by pjvan_thof on 2/26/15.
 */
34
35
36
37
trait ShivaVariantcallingTrait extends SummaryQScript
  with SampleLibraryTag
  with Reference
  with TargetRegions {
38
  qscript: QScript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
39
40

  @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
41
42
43
44
45
46
47
48
  protected var inputBamsArg: List[File] = Nil

  var inputBams: Map[String, File] = Map()

  /** Executed before script */
  def init(): Unit = {
    if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
49

50
51
52
53
  var referenceVcf: Option[File] = config("reference_vcf")

  var referenceVcfRegions: Option[File] = config("reference_vcf_regions")

Peter van 't Hof's avatar
Peter van 't Hof committed
54
  /** Name prefix, can override this methods if neeeded */
55
56
  def namePrefix: String = {
    (sampleId, libId) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
57
58
59
      case (Some(s), Some(l)) => s + "-" + l
      case (Some(s), _)       => s
      case _                  => config("name_prefix")
60
61
    }
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
62

63
64
  override def defaults = Map("bcftoolscall" -> Map("f" -> List("GQ")))

Peter van 't Hof's avatar
Peter van 't Hof committed
65
  /** Final merged output files of all variantcaller modes */
Peter van 't Hof's avatar
Peter van 't Hof committed
66
  def finalFile = new File(outputDir, namePrefix + ".final.vcf.gz")
67

Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
  /** Variantcallers requested by the config */
  protected val configCallers: Set[String] = config("variantcallers")
70

Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
73
74
75
76
77
78
  protected val callers: List[Variantcaller] = {
    (for (name <- configCallers) yield {
      if (!callersList.exists(_.name == name))
        Logging.addError(s"variantcaller '$name' does not exist, possible to use: " + callersList.map(_.name).mkString(", "))
      callersList.find(_.name == name)
    }).flatten.toList.sortBy(_.prio)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
79
  /** This will add jobs for this pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
82
  def biopetScript(): Unit = {
    require(inputBams.nonEmpty, "No input bams found")
    require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", "))
Peter van 't Hof's avatar
Peter van 't Hof committed
83

Peter van 't Hof's avatar
Peter van 't Hof committed
84
    val cv = new CombineVariants(qscript)
85
86
    cv.outputFile = finalFile
    cv.setKey = "VariantCaller"
Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
89
    cv.genotypeMergeOptions = Some("PRIORITIZE")
    cv.rodPriorityList = callers.map(_.name).mkString(",")
    for (caller <- callers) {
90
      caller.inputBams = inputBams
Peter van 't Hof's avatar
Peter van 't Hof committed
91
      caller.namePrefix = namePrefix
Peter van 't Hof's avatar
Peter van 't Hof committed
92
      caller.outputDir = new File(outputDir, caller.name)
93
      add(caller)
94
      addStats(caller.outputFile, caller.name)
95
96
      val normalize: Boolean = config("execute_vt_normalize", default = false, submodule = caller.configName)
      val decompose: Boolean = config("execute_vt_decompose", default = false, submodule = caller.configName)
97
98
99
100
101
102

      val vtNormalize = new VtNormalize(this)
      vtNormalize.inputVcf = caller.outputFile
      val vtDecompose = new VtDecompose(this)

      if (normalize && decompose) {
Peter van 't Hof's avatar
Peter van 't Hof committed
103
        vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz")
Peter van 't Hof's avatar
Peter van 't Hof committed
104
        vtNormalize.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
105
        add(vtNormalize, Tabix(this, vtNormalize.outputVcf))
106
107
        vtDecompose.inputVcf = vtNormalize.outputVcf
        vtDecompose.outputVcf = swapExt(caller.outputDir, vtNormalize.outputVcf, ".vcf.gz", ".decompose.vcf.gz")
Peter van 't Hof's avatar
Peter van 't Hof committed
108
        add(vtDecompose, Tabix(this, vtDecompose.outputVcf))
109
110
        cv.addInput(vtDecompose.outputVcf, caller.name)
      } else if (normalize && !decompose) {
Peter van 't Hof's avatar
Peter van 't Hof committed
111
        vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz")
Peter van 't Hof's avatar
Peter van 't Hof committed
112
        add(vtNormalize, Tabix(this, vtNormalize.outputVcf))
113
114
115
116
        cv.addInput(vtNormalize.outputVcf, caller.name)
      } else if (!normalize && decompose) {
        vtDecompose.inputVcf = caller.outputFile
        vtDecompose.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".decompose.vcf.gz")
Peter van 't Hof's avatar
Peter van 't Hof committed
117
        add(vtDecompose, Tabix(this, vtDecompose.outputVcf))
118
        cv.addInput(vtDecompose.outputVcf, caller.name)
119
      } else cv.addInput(caller.outputFile, caller.name)
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
122
    }
    add(cv)

123
124
125
126
127
128
    addStats(finalFile, "final")

    addSummaryJobs()
  }

  protected def addStats(vcfFile: File, name: String): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
129
    val vcfStats = new VcfStats(qscript)
130
131
    vcfStats.input = vcfFile
    vcfStats.setOutputDir(new File(vcfFile.getParentFile, "vcfstats"))
Peter van 't Hof's avatar
Peter van 't Hof committed
132
    if (name == "final") vcfStats.infoTags :+= "VariantCaller"
Peter van 't Hof's avatar
Peter van 't Hof committed
133
    add(vcfStats)
134
    addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name")
Peter van 't Hof's avatar
Peter van 't Hof committed
135

136
137
    referenceVcf.foreach(referenceVcfFile => {
      val gc = new GenotypeConcordance(this)
138
      gc.evalFile = vcfFile
139
      gc.compFile = referenceVcfFile
140
      gc.outputFile = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt")
141
142
      referenceVcfRegions.foreach(gc.intervals ::= _)
      add(gc)
143
      addSummarizable(gc, s"$namePrefix-genotype_concordance-$name")
144
145
    })

146
147
148
149
150
151
    for (bedFile <- ampliconBedFile.toList ::: roiBedFiles) {
      val regionName = bedFile.getName.stripSuffix(".bed")
      val vcfStats = new VcfStats(qscript)
      vcfStats.input = vcfFile
      vcfStats.intervals = Some(bedFile)
      vcfStats.setOutputDir(new File(vcfFile.getParentFile, s"vcfstats-$regionName"))
Peter van 't Hof's avatar
Peter van 't Hof committed
152
      if (name == "final") vcfStats.infoTags :+= "VariantCaller"
153
154
155
      add(vcfStats)
      addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name-$regionName")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
156
157
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
158
  /** Will generate all available variantcallers */
159
160
161
162
163
164
  protected def callersList: List[Variantcaller] = List(
    new Freebayes(this),
    new RawVcf(this),
    new Bcftools(this),
    new BcftoolsSingleSample(this),
    new VarscanCnsSingleSample(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
165

Peter van 't Hof's avatar
Peter van 't Hof committed
166
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
167
168
  def summaryFile = new File(outputDir, "ShivaVariantcalling.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
169
  /** Settings for the summary */
170
171
172
173
174
  def summarySettings = Map(
    "variantcallers" -> configCallers.toList,
    "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
    "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
  )
Peter van 't Hof's avatar
Peter van 't Hof committed
175

Peter van 't Hof's avatar
Peter van 't Hof committed
176
  /** Files for the summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
177
  def summaryFiles: Map[String, File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
178
    callers.map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
179
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
180
}