ShivaVariantcalling.scala 10.4 KB
Newer Older
bow's avatar
bow committed
1
/**
2
3
4
5
6
7
8
9
10
11
12
13
14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
package nl.lumc.sasc.biopet.pipelines.shiva

17
import nl.lumc.sasc.biopet.core.{MultiSampleQScript, PipelineCommand, Reference, SampleLibraryTag}
18
import MultiSampleQScript.Gender
19
20
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.Tabix
21
import nl.lumc.sasc.biopet.extensions.gatk.{CombineVariants, GenotypeConcordance}
22
import nl.lumc.sasc.biopet.extensions.tools.VcfStats
23
import nl.lumc.sasc.biopet.extensions.vt.{VtDecompose, VtNormalize}
24
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
pjvan_thof's avatar
pjvan_thof committed
25
26
27
28
29
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.somatic.{
  MuTect2,
  SomaticVariantCaller,
  TumorNormalPair
}
30
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.{VarscanCnsSingleSample, _}
pjvan_thof's avatar
pjvan_thof committed
31
import nl.lumc.sasc.biopet.utils.{BamUtils, Logging}
Peter van 't Hof's avatar
Peter van 't Hof committed
32
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
33
import org.broadinstitute.gatk.queue.QScript
34
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
Peter van 't Hof's avatar
Peter van 't Hof committed
35
36

/**
37
38
39
40
41
42
43
44
45
46
  * Implementation of ShivaVariantcalling
  *
  * Created by pjvan_thof on 2/26/15.
  */
class ShivaVariantcalling(val parent: Configurable)
    extends QScript
    with SummaryQScript
    with SampleLibraryTag
    with Reference
    with TargetRegions { qscript =>
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  def this() = this(null)
49
50
51
52
53
54

  @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
  protected var inputBamsArg: List[File] = Nil

  var inputBams: Map[String, File] = Map()

55
56
  var inputBqsrFiles: Map[String, File] = Map()

57
58
  var genders: Map[String, Gender.Value] = _

pjvan_thof's avatar
pjvan_thof committed
59
  var tumorSamples: List[TumorNormalPair] = _
60

Peter van 't Hof's avatar
Peter van 't Hof committed
61
  def isGermlineVariantCallingConfigured: Boolean = {
akaljuvee's avatar
akaljuvee committed
62
    callers.exists(!_.isInstanceOf[SomaticVariantCaller])
63
64
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
65
  def isSomaticVariantCallingConfigured: Boolean = {
66
67
68
    callers.exists(_.isInstanceOf[SomaticVariantCaller])
  }

69
70
71
  /** Executed before script */
  def init(): Unit = {
    if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
pjvan_thof's avatar
pjvan_thof committed
72
73

    //TODO: this needs changed when the sample/library refactoring is beeing done
Peter van 't Hof's avatar
Peter van 't Hof committed
74
    if (Option(genders).isEmpty) genders = {
pjvan_thof's avatar
pjvan_thof committed
75
      inputBams.keys.map { sampleName =>
Peter van 't Hof's avatar
Peter van 't Hof committed
76
77
        val gender: Option[String] =
          config("gender", path = "samples" :: sampleName :: "tags" :: Nil)
pjvan_thof's avatar
pjvan_thof committed
78
        sampleName -> (gender match {
Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
          case Some("male") => Gender.Male
          case Some("female") => Gender.Female
pjvan_thof's avatar
pjvan_thof committed
81
82
83
          case _ => Gender.Unknown
        })
      }.toMap
84
    }
pjvan_thof's avatar
pjvan_thof committed
85
86
87
88
89

    //TODO: this needs changed when the sample/library refactoring is beeing done
    if (Option(tumorSamples).isEmpty)
      tumorSamples = inputBams.keys
        .filter(name =>
Peter van 't Hof's avatar
Peter van 't Hof committed
90
          config("type", path = "samples" :: name :: "tags" :: Nil, default = "normal").asString.toLowerCase == "tumor")
pjvan_thof's avatar
pjvan_thof committed
91
92
93
94
95
96
97
        .map { tumorSample =>
          val normal: String = config("normal", path = "samples" :: tumorSample :: "tags" :: Nil)
          if (!inputBams.keySet.contains(normal))
            Logging.addError(s"Normal sample '$normal' does not exist")
          TumorNormalPair(tumorSample, normal)
        }
        .toList
98
99
100
101
102
103
104
105
106
107
  }

  var referenceVcf: Option[File] = config("reference_vcf")

  var referenceVcfRegions: Option[File] = config("reference_vcf_regions")

  /** Name prefix, can override this methods if neeeded */
  def namePrefix: String = {
    (sampleId, libId) match {
      case (Some(s), Some(l)) => s + "-" + l
108
      case (Some(s), _) => s
pjvan_thof's avatar
pjvan_thof committed
109
      case _ => config("name_prefix", default = "multisample")
110
111
112
113
114
115
116
117
118
119
120
    }
  }

  override def defaults = Map("bcftoolscall" -> Map("f" -> List("GQ")))

  /** Final merged output files of all variantcaller modes */
  def finalFile = new File(outputDir, namePrefix + ".final.vcf.gz")

  /** Variantcallers requested by the config */
  protected val configCallers: Set[String] = config("variantcallers")

Sander Bollen's avatar
Sander Bollen committed
121
  val callers: List[Variantcaller] = {
122
    (for (name <- configCallers) yield {
123
      if (!ShivaVariantcalling.callersList(this).exists(_.name == name))
124
125
126
127
128
        Logging.addError(
          s"variantcaller '$name' does not exist, possible to use: " + ShivaVariantcalling
            .callersList(this)
            .map(_.name)
            .mkString(", "))
129
      ShivaVariantcalling.callersList(this).find(_.name == name)
130
131
132
133
134
135
    }).flatten.toList.sortBy(_.prio)
  }

  /** This will add jobs for this pipeline */
  def biopetScript(): Unit = {
    require(inputBams.nonEmpty, "No input bams found")
136
137
138
139
140
    require(callers.nonEmpty,
            "must select at least 1 variantcaller, choices are: " + ShivaVariantcalling
              .callersList(this)
              .map(_.name)
              .mkString(", "))
Peter van 't Hof's avatar
Peter van 't Hof committed
141
    if (!isGermlineVariantCallingConfigured)
pjvan_thof's avatar
pjvan_thof committed
142
143
      Logging.addError(
        "For running the pipeline at least one germline variant caller has to be configured")
akaljuvee's avatar
akaljuvee committed
144
    else if (!callers.exists(_.mergeVcfResults))
145
146
147
148
149
150
151
      Logging.addError("must select at least 1 variantcaller where merge_vcf_results is true")

    addAll(
      dbsnpVcfFile
        .map(
          Shiva.makeValidateVcfJobs(this, _, referenceFasta(), new File(outputDir, ".validate")))
        .getOrElse(Nil))
152

153
    val cv = new CombineVariants(qscript)
154
    cv.out = finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
155
    cv.setKey = Some("VariantCaller")
156
    cv.genotypemergeoption = Some("PRIORITIZE")
157
    cv.rod_priority_list = Some(callers.filter(_.mergeVcfResults).map(_.name).mkString(","))
158
159
    for (caller <- callers) {
      caller.inputBams = inputBams
Peter van 't Hof's avatar
Peter van 't Hof committed
160
      caller.inputBqsrFiles = inputBqsrFiles
161
162
      caller.namePrefix = namePrefix
      caller.outputDir = new File(outputDir, caller.name)
163
      caller.genders = genders
Peter van 't Hof's avatar
Peter van 't Hof committed
164
      caller match {
Peter van 't Hof's avatar
Peter van 't Hof committed
165
        case c: SomaticVariantCaller => c.tnPairs = tumorSamples
Peter van 't Hof's avatar
Peter van 't Hof committed
166
167
        case _ =>
      }
akaljuvee's avatar
akaljuvee committed
168

169
170
      add(caller)
      addStats(caller.outputFile, caller.name)
171
172
173
174
      val normalize: Boolean =
        config("execute_vt_normalize", default = false, namespace = caller.configNamespace)
      val decompose: Boolean =
        config("execute_vt_decompose", default = false, namespace = caller.configNamespace)
175
176
177
178
179
180

      val vtNormalize = new VtNormalize(this)
      vtNormalize.inputVcf = caller.outputFile
      val vtDecompose = new VtDecompose(this)

      if (normalize && decompose) {
181
182
        vtNormalize.outputVcf =
          swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz")
183
184
185
        vtNormalize.isIntermediate = true
        add(vtNormalize, Tabix(this, vtNormalize.outputVcf))
        vtDecompose.inputVcf = vtNormalize.outputVcf
186
187
        vtDecompose.outputVcf =
          swapExt(caller.outputDir, vtNormalize.outputVcf, ".vcf.gz", ".decompose.vcf.gz")
188
        add(vtDecompose, Tabix(this, vtDecompose.outputVcf))
Peter van 't Hof's avatar
Peter van 't Hof committed
189
        if (caller.mergeVcfResults) cv.variant :+= TaggedFile(vtDecompose.outputVcf, caller.name)
190
      } else if (normalize && !decompose) {
191
192
        vtNormalize.outputVcf =
          swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz")
193
        add(vtNormalize, Tabix(this, vtNormalize.outputVcf))
Peter van 't Hof's avatar
Peter van 't Hof committed
194
        if (caller.mergeVcfResults) cv.variant :+= TaggedFile(vtNormalize.outputVcf, caller.name)
195
196
      } else if (!normalize && decompose) {
        vtDecompose.inputVcf = caller.outputFile
197
198
        vtDecompose.outputVcf =
          swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".decompose.vcf.gz")
199
        add(vtDecompose, Tabix(this, vtDecompose.outputVcf))
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201
202
203
204
205
        if (caller.mergeVcfResults) cv.variant :+= TaggedFile(vtDecompose.outputVcf, caller.name)
      } else if (caller.mergeVcfResults) cv.variant :+= TaggedFile(caller.outputFile, caller.name)
    }
    if (cv.variant.nonEmpty) {
      add(cv)
      addStats(finalFile, "final")
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
    }

    addSummaryJobs()
  }

  protected def addStats(vcfFile: File, name: String): Unit = {
    val vcfStats = new VcfStats(qscript)
    vcfStats.input = vcfFile
    vcfStats.setOutputDir(new File(vcfFile.getParentFile, "vcfstats"))
    if (name == "final") vcfStats.infoTags :+= "VariantCaller"
    add(vcfStats)
    addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name")

    referenceVcf.foreach(referenceVcfFile => {
      val gc = new GenotypeConcordance(this)
221
222
223
      gc.eval = vcfFile
      gc.comp = referenceVcfFile
      gc.out = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt")
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
      referenceVcfRegions.foreach(gc.intervals ::= _)
      add(gc)
      addSummarizable(gc, s"$namePrefix-genotype_concordance-$name")
    })

    for (bedFile <- ampliconBedFile.toList ::: roiBedFiles) {
      val regionName = bedFile.getName.stripSuffix(".bed")
      val vcfStats = new VcfStats(qscript)
      vcfStats.input = vcfFile
      vcfStats.intervals = Some(bedFile)
      vcfStats.setOutputDir(new File(vcfFile.getParentFile, s"vcfstats-$regionName"))
      if (name == "final") vcfStats.infoTags :+= "VariantCaller"
      add(vcfStats)
      addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name-$regionName")
    }
  }

  /** Settings for the summary */
  def summarySettings = Map(
    "variantcallers" -> configCallers.toList,
Peter van 't Hof's avatar
Peter van 't Hof committed
244
    "regions_of_interest" -> roiBedFiles.map(_.getName),
Peter van 't Hof's avatar
Peter van 't Hof committed
245
    "amplicon_bed" -> ampliconBedFile.map(_.getAbsolutePath),
Peter van 't Hof's avatar
Peter van 't Hof committed
246
    "somatic_variant_calling" -> isSomaticVariantCallingConfigured,
Peter van 't Hof's avatar
Peter van 't Hof committed
247
    "germline_variant_calling" -> isGermlineVariantCallingConfigured
248
249
250
251
252
253
  )

  /** Files for the summary */
  def summaryFiles: Map[String, File] = {
    callers.map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile)
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
254
255
}

256
object ShivaVariantcalling extends PipelineCommand {
257

258
  /** Will generate all available variantcallers */
Peter van 't Hof's avatar
Peter van 't Hof committed
259
  protected[pipelines] def callersList(root: Configurable): List[Variantcaller] =
Peter van 't Hof's avatar
Peter van 't Hof committed
260
261
262
263
264
265
266
267
268
    new HaplotypeCallerGvcf(root) ::
      new HaplotypeCallerAllele(root) ::
      new UnifiedGenotyperAllele(root) ::
      new UnifiedGenotyper(root) ::
      new HaplotypeCaller(root) ::
      new Freebayes(root) ::
      new RawVcf(root) ::
      new Bcftools(root) ::
      new BcftoolsSingleSample(root) ::
akaljuvee's avatar
akaljuvee committed
269
270
      new VarscanCnsSingleSample(root) ::
      new MuTect2(root) :: Nil
271

272
}