ShivaVariantcalling.scala 8.66 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
package nl.lumc.sasc.biopet.pipelines.shiva

17
18
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand, Reference, SampleLibraryTag }
import MultiSampleQScript.Gender
19
20
21
22
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.Tabix
import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance }
import nl.lumc.sasc.biopet.extensions.tools.VcfStats
23
import nl.lumc.sasc.biopet.extensions.vt.{ VtDecompose, VtNormalize }
24
25
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.{ VarscanCnsSingleSample, _ }
26
import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import org.broadinstitute.gatk.queue.QScript
29
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
Peter van 't Hof's avatar
Peter van 't Hof committed
30
31

/**
32
 * Implementation of ShivaVariantcalling
Peter van 't Hof's avatar
Peter van 't Hof committed
33
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
 * Created by pjvan_thof on 2/26/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
36
class ShivaVariantcalling(val parent: Configurable) extends QScript
37
38
39
40
41
42
  with SummaryQScript
  with SampleLibraryTag
  with Reference
  with TargetRegions {
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
43
  def this() = this(null)
44
45
46
47
48
49

  @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
  protected var inputBamsArg: List[File] = Nil

  var inputBams: Map[String, File] = Map()

50
51
  var inputBqsrFiles: Map[String, File] = Map()

52
53
  var genders: Map[String, Gender.Value] = _

54
55
56
  /** Executed before script */
  def init(): Unit = {
    if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
Peter van 't Hof's avatar
Peter van 't Hof committed
57
    if (Option(genders).isEmpty) genders = {
58
      val samples: Map[String, Any] = config("genders", default = Map())
59
60
61
62
63
64
65
      samples.map {
        case (sampleName, gender) =>
          sampleName -> (gender.toString.toLowerCase match {
            case "male"   => Gender.Male
            case "female" => Gender.Female
            case _        => Gender.Unknown
          })
66
67
      }
    }
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  }

  var referenceVcf: Option[File] = config("reference_vcf")

  var referenceVcfRegions: Option[File] = config("reference_vcf_regions")

  /** Name prefix, can override this methods if neeeded */
  def namePrefix: String = {
    (sampleId, libId) match {
      case (Some(s), Some(l)) => s + "-" + l
      case (Some(s), _)       => s
      case _                  => config("name_prefix")
    }
  }

  override def defaults = Map("bcftoolscall" -> Map("f" -> List("GQ")))

  /** Final merged output files of all variantcaller modes */
  def finalFile = new File(outputDir, namePrefix + ".final.vcf.gz")

  /** Variantcallers requested by the config */
  protected val configCallers: Set[String] = config("variantcallers")

Sander Bollen's avatar
Sander Bollen committed
91
  val callers: List[Variantcaller] = {
92
    (for (name <- configCallers) yield {
93
94
95
      if (!ShivaVariantcalling.callersList(this).exists(_.name == name))
        Logging.addError(s"variantcaller '$name' does not exist, possible to use: " + ShivaVariantcalling.callersList(this).map(_.name).mkString(", "))
      ShivaVariantcalling.callersList(this).find(_.name == name)
96
97
98
99
100
101
    }).flatten.toList.sortBy(_.prio)
  }

  /** This will add jobs for this pipeline */
  def biopetScript(): Unit = {
    require(inputBams.nonEmpty, "No input bams found")
102
    require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + ShivaVariantcalling.callersList(this).map(_.name).mkString(", "))
Peter van 't Hof's avatar
Peter van 't Hof committed
103
    if (!callers.exists(_.mergeVcfResults)) Logging.addError("must select at least 1 variantcaller where merge_vcf_results is true")
104

Peter van 't Hof's avatar
Peter van 't Hof committed
105
    addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta(), new File(outputDir, ".validate"))).getOrElse(Nil))
106

107
    val cv = new CombineVariants(qscript)
108
    cv.out = finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
109
    cv.setKey = Some("VariantCaller")
110
    cv.genotypemergeoption = Some("PRIORITIZE")
111
    cv.rod_priority_list = Some(callers.filter(_.mergeVcfResults).map(_.name).mkString(","))
112
113
    for (caller <- callers) {
      caller.inputBams = inputBams
Peter van 't Hof's avatar
Peter van 't Hof committed
114
      caller.inputBqsrFiles = inputBqsrFiles
115
116
      caller.namePrefix = namePrefix
      caller.outputDir = new File(outputDir, caller.name)
117
      caller.genders = genders
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
      add(caller)
      addStats(caller.outputFile, caller.name)
      val normalize: Boolean = config("execute_vt_normalize", default = false, namespace = caller.configNamespace)
      val decompose: Boolean = config("execute_vt_decompose", default = false, namespace = caller.configNamespace)

      val vtNormalize = new VtNormalize(this)
      vtNormalize.inputVcf = caller.outputFile
      val vtDecompose = new VtDecompose(this)

      if (normalize && decompose) {
        vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz")
        vtNormalize.isIntermediate = true
        add(vtNormalize, Tabix(this, vtNormalize.outputVcf))
        vtDecompose.inputVcf = vtNormalize.outputVcf
        vtDecompose.outputVcf = swapExt(caller.outputDir, vtNormalize.outputVcf, ".vcf.gz", ".decompose.vcf.gz")
        add(vtDecompose, Tabix(this, vtDecompose.outputVcf))
Peter van 't Hof's avatar
Peter van 't Hof committed
134
        if (caller.mergeVcfResults) cv.variant :+= TaggedFile(vtDecompose.outputVcf, caller.name)
135
136
137
      } else if (normalize && !decompose) {
        vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz")
        add(vtNormalize, Tabix(this, vtNormalize.outputVcf))
Peter van 't Hof's avatar
Peter van 't Hof committed
138
        if (caller.mergeVcfResults) cv.variant :+= TaggedFile(vtNormalize.outputVcf, caller.name)
139
140
141
142
      } else if (!normalize && decompose) {
        vtDecompose.inputVcf = caller.outputFile
        vtDecompose.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".decompose.vcf.gz")
        add(vtDecompose, Tabix(this, vtDecompose.outputVcf))
Peter van 't Hof's avatar
Peter van 't Hof committed
143
144
145
146
147
148
        if (caller.mergeVcfResults) cv.variant :+= TaggedFile(vtDecompose.outputVcf, caller.name)
      } else if (caller.mergeVcfResults) cv.variant :+= TaggedFile(caller.outputFile, caller.name)
    }
    if (cv.variant.nonEmpty) {
      add(cv)
      addStats(finalFile, "final")
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
    }

    addSummaryJobs()
  }

  protected def addStats(vcfFile: File, name: String): Unit = {
    val vcfStats = new VcfStats(qscript)
    vcfStats.input = vcfFile
    vcfStats.setOutputDir(new File(vcfFile.getParentFile, "vcfstats"))
    if (name == "final") vcfStats.infoTags :+= "VariantCaller"
    add(vcfStats)
    addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name")

    referenceVcf.foreach(referenceVcfFile => {
      val gc = new GenotypeConcordance(this)
164
165
166
      gc.eval = vcfFile
      gc.comp = referenceVcfFile
      gc.out = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt")
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
      referenceVcfRegions.foreach(gc.intervals ::= _)
      add(gc)
      addSummarizable(gc, s"$namePrefix-genotype_concordance-$name")
    })

    for (bedFile <- ampliconBedFile.toList ::: roiBedFiles) {
      val regionName = bedFile.getName.stripSuffix(".bed")
      val vcfStats = new VcfStats(qscript)
      vcfStats.input = vcfFile
      vcfStats.intervals = Some(bedFile)
      vcfStats.setOutputDir(new File(vcfFile.getParentFile, s"vcfstats-$regionName"))
      if (name == "final") vcfStats.infoTags :+= "VariantCaller"
      add(vcfStats)
      addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name-$regionName")
    }
  }

  /** Settings for the summary */
  def summarySettings = Map(
    "variantcallers" -> configCallers.toList,
Peter van 't Hof's avatar
Peter van 't Hof committed
187
188
    "regions_of_interest" -> roiBedFiles.map(_.getName),
    "amplicon_bed" -> ampliconBedFile.map(_.getAbsolutePath)
189
190
191
192
193
194
  )

  /** Files for the summary */
  def summaryFiles: Map[String, File] = {
    callers.map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile)
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
195
196
}

197
198
object ShivaVariantcalling extends PipelineCommand {
  /** Will generate all available variantcallers */
Peter van 't Hof's avatar
Peter van 't Hof committed
199
  protected[pipelines] def callersList(root: Configurable): List[Variantcaller] =
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201
202
203
204
205
206
207
208
209
    new HaplotypeCallerGvcf(root) ::
      new HaplotypeCallerAllele(root) ::
      new UnifiedGenotyperAllele(root) ::
      new UnifiedGenotyper(root) ::
      new HaplotypeCaller(root) ::
      new Freebayes(root) ::
      new RawVcf(root) ::
      new Bcftools(root) ::
      new BcftoolsSingleSample(root) ::
      new VarscanCnsSingleSample(root) :: Nil
210
}