HaplotypeCallerGvcf.scala 4.95 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
17
18
19
/**
 * Due to the license issue with GATK, this part of Biopet can only be used inside the
 * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
 * on how to use this protected part of biopet or contact us at sasc@lumc.nl
 */
20
package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
21

22
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.gatk
24
import nl.lumc.sasc.biopet.extensions.gatk.CombineGVCFs
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.utils.Logging
26
import nl.lumc.sasc.biopet.utils.config.Configurable
27
import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
28
29

/** Gvcf mode for haplotypecaller */
Peter van 't Hof's avatar
Peter van 't Hof committed
30
class HaplotypeCallerGvcf(val parent: Configurable) extends Variantcaller {
31
32
33
  val name = "haplotypecaller_gvcf"
  protected def defaultPrio = 5

Sander Bollen's avatar
Sander Bollen committed
34
  /**
Sander Bollen's avatar
Sander Bollen committed
35
36
   * Map of sample name -> gvcf. May be empty.
   */
Sander Bollen's avatar
Sander Bollen committed
37
38
39
40
  protected var gVcfFiles: Map[String, File] = Map()

  def getGvcfs = gVcfFiles

41
42
  val genderAwareCalling: Boolean = config("gender_aware_calling", default = false)
  val haploidRegions: Option[File] = config("hap̦loid_regions")
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
  val haploidRegionsMale: Option[File] = config("haploid_regions_male")
  val haploidRegionsFemale: Option[File] = config("haploid_regions_female")
45

46
47
48
  lazy val fractionMale: Double = BedRecordList.fromFiles(Seq(haploidRegions, haploidRegionsMale).flatten, true).fractionOfReference(referenceDict)
  lazy val fractionFemale: Double = BedRecordList.fromFiles(Seq(haploidRegions, haploidRegionsFemale).flatten, true).fractionOfReference(referenceDict)
  lazy val fractionUnknown: Double = BedRecordList.fromFiles(Seq(haploidRegions).flatten, true).fractionOfReference(referenceDict)
49

Peter van 't Hof's avatar
Peter van 't Hof committed
50
51
  override def fixedValues = Map("haplotypecaller" -> Map("emitRefConfidence" -> "GVCF"))

52
53
54
55
56
  override def defaults = Map("haplotypecaller" -> Map(
    "variant_index_type" -> "LINEAR",
    "variant_index_parameter" -> 128000)
  )

Peter van 't Hof's avatar
Peter van 't Hof committed
57
58
59
  override def init(): Unit = {
    super.init()
    if (genderAwareCalling && haploidRegions.isEmpty && haploidRegionsMale.isEmpty && haploidRegionsFemale.isEmpty)
Peter van 't Hof's avatar
Peter van 't Hof committed
60
      Logging.addError("Gender aware variantcalling is enabled but no haploid bed files are given")
Peter van 't Hof's avatar
Peter van 't Hof committed
61
62
  }

63
  def biopetScript() {
Sander Bollen's avatar
Sander Bollen committed
64
    gVcfFiles = for ((sample, inputBam) <- inputBams) yield {
65
66
      if (genderAwareCalling) {
        val finalFile = new File(outputDir, sample + ".gvcf.vcf.gz")
67
68
        val gender = genders.getOrElse(sample, Gender.Unknown)
        val haploidBedFiles: List[File] = gender match {
69
70
71
72
73
          case Gender.Female => haploidRegions.toList ::: haploidRegionsFemale.toList ::: Nil
          case Gender.Male   => haploidRegions.toList ::: haploidRegionsMale.toList ::: Nil
          case _             => haploidRegions.toList
        }

74
75
76
77
78
79
        val fraction: Double = gender match {
          case Gender.Female => fractionFemale
          case Gender.Male   => fractionMale
          case _             => fractionUnknown
        }

80
81
82
83
        val haploidGvcf = if (haploidBedFiles.nonEmpty) {
          val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".haploid.gvcf.vcf.gz"))
          hc.BQSR = inputBqsrFiles.get(sample)
          hc.intervals = haploidBedFiles
84
85
          hc.scatterCount = (hc.scatterCount * fraction).toInt
          hc.sample_ploidy = Some(1)
86
87
88
89
90
91
92
          add(hc)
          Some(hc.out)
        } else None

        val hcDiploid = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".diploid.gvcf.vcf.gz"))
        hcDiploid.BQSR = inputBqsrFiles.get(sample)
        hcDiploid.excludeIntervals = haploidBedFiles
93
        hcDiploid.scatterCount = (hcDiploid.scatterCount * (1 - fraction)).toInt
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
        add(hcDiploid)

        haploidGvcf match {
          case Some(file) =>
            val combine = new CombineGVCFs(this)
            combine.variant = Seq(hcDiploid.out, file)
            combine.out = new File(outputDir, sample + ".gvcf.vcf.gz")
            add(combine)
            sample -> combine.out
          case _ => sample -> hcDiploid.out
        }
      } else {
        val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".gvcf.vcf.gz"))
        hc.BQSR = inputBqsrFiles.get(sample)
        add(hc)
        sample -> hc.out
      }
111
112
    }

Sander Bollen's avatar
Sander Bollen committed
113
    val genotypeGVCFs = gatk.GenotypeGVCFs(this, gVcfFiles.values.toList, outputFile)
114
115
116
    add(genotypeGVCFs)
  }
}