HaplotypeCallerGvcf.scala 5.48 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
17
18
19
/**
 * Due to the license issue with GATK, this part of Biopet can only be used inside the
 * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
 * on how to use this protected part of biopet or contact us at sasc@lumc.nl
 */
20
package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
21

22
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.gatk
24
import nl.lumc.sasc.biopet.extensions.gatk.CombineGVCFs
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.utils.Logging
26
import nl.lumc.sasc.biopet.utils.config.Configurable
27
import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
28
29

/** Gvcf mode for haplotypecaller */
Peter van 't Hof's avatar
Peter van 't Hof committed
30
class HaplotypeCallerGvcf(val parent: Configurable) extends Variantcaller {
31
32
33
  val name = "haplotypecaller_gvcf"
  protected def defaultPrio = 5

Sander Bollen's avatar
Sander Bollen committed
34
  /**
Sander Bollen's avatar
Sander Bollen committed
35
36
   * Map of sample name -> gvcf. May be empty.
   */
Sander Bollen's avatar
Sander Bollen committed
37
38
39
40
  protected var gVcfFiles: Map[String, File] = Map()

  def getGvcfs = gVcfFiles

41
42
  val genderAwareCalling: Boolean = config("gender_aware_calling", default = false)
  val haploidRegions: Option[File] = config("hap̦loid_regions")
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
  val haploidRegionsMale: Option[File] = config("haploid_regions_male")
  val haploidRegionsFemale: Option[File] = config("haploid_regions_female")
45

Peter van 't Hof's avatar
Peter van 't Hof committed
46
  lazy val referenceSize = referenceDict.getReferenceLength
47
48
49
50
51

  lazy val fractionMale: Double = calculateFraction(haploidRegions, haploidRegionsMale)
  lazy val fractionFemale: Double = calculateFraction(haploidRegions, haploidRegionsFemale)
  lazy val fractionUnknown: Double = calculateFraction(haploidRegions, None)

Peter van 't Hof's avatar
Peter van 't Hof committed
52
53
54
55
56
57
58
  /**
   * This method will calculate the fraction of the given bed files of the used reference
   *
   * @param file1 Bed file
   * @param file2 Bed file
   * @return Fraction
   */
59
60
  def calculateFraction(file1: Option[File], file2: Option[File]): Double = {
    (file1.map(BedRecordList.fromFile(_)), file2.map(BedRecordList.fromFile(_))) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
61
62
      case (Some(l), None) => l.length.toDouble / referenceSize
      case (None, Some(l)) => l.length.toDouble / referenceSize
63
      case (Some(l1), Some(l2)) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
64
        BedRecordList.fromList(l1.allRecords ++ l2.allRecords).combineOverlap.length.toDouble / referenceSize
65
66
67
68
      case (None, None) => 0.0
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
69
70
  override def fixedValues = Map("haplotypecaller" -> Map("emitRefConfidence" -> "GVCF"))

71
72
73
74
75
  override def defaults = Map("haplotypecaller" -> Map(
    "variant_index_type" -> "LINEAR",
    "variant_index_parameter" -> 128000)
  )

Peter van 't Hof's avatar
Peter van 't Hof committed
76
77
78
  override def init(): Unit = {
    super.init()
    if (genderAwareCalling && haploidRegions.isEmpty && haploidRegionsMale.isEmpty && haploidRegionsFemale.isEmpty)
Peter van 't Hof's avatar
Peter van 't Hof committed
79
      Logging.addError("Gender aware variantcalling is enabled but no haploid bed files are given")
Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
  }

82
  def biopetScript() {
Sander Bollen's avatar
Sander Bollen committed
83
    gVcfFiles = for ((sample, inputBam) <- inputBams) yield {
84
85
      if (genderAwareCalling) {
        val finalFile = new File(outputDir, sample + ".gvcf.vcf.gz")
86
87
        val gender = genders.getOrElse(sample, Gender.Unknown)
        val haploidBedFiles: List[File] = gender match {
88
89
90
91
92
          case Gender.Female => haploidRegions.toList ::: haploidRegionsFemale.toList ::: Nil
          case Gender.Male   => haploidRegions.toList ::: haploidRegionsMale.toList ::: Nil
          case _             => haploidRegions.toList
        }

93
94
95
96
97
98
        val fraction: Double = gender match {
          case Gender.Female => fractionFemale
          case Gender.Male   => fractionMale
          case _             => fractionUnknown
        }

99
100
101
102
        val haploidGvcf = if (haploidBedFiles.nonEmpty) {
          val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".haploid.gvcf.vcf.gz"))
          hc.BQSR = inputBqsrFiles.get(sample)
          hc.intervals = haploidBedFiles
103
104
          hc.scatterCount = (hc.scatterCount * fraction).toInt
          hc.sample_ploidy = Some(1)
105
106
107
108
109
110
111
          add(hc)
          Some(hc.out)
        } else None

        val hcDiploid = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".diploid.gvcf.vcf.gz"))
        hcDiploid.BQSR = inputBqsrFiles.get(sample)
        hcDiploid.excludeIntervals = haploidBedFiles
112
        hcDiploid.scatterCount = (hcDiploid.scatterCount * (1 - fraction)).toInt
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
        add(hcDiploid)

        haploidGvcf match {
          case Some(file) =>
            val combine = new CombineGVCFs(this)
            combine.variant = Seq(hcDiploid.out, file)
            combine.out = new File(outputDir, sample + ".gvcf.vcf.gz")
            add(combine)
            sample -> combine.out
          case _ => sample -> hcDiploid.out
        }
      } else {
        val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".gvcf.vcf.gz"))
        hc.BQSR = inputBqsrFiles.get(sample)
        add(hc)
        sample -> hc.out
      }
130
131
    }

Sander Bollen's avatar
Sander Bollen committed
132
    val genotypeGVCFs = gatk.GenotypeGVCFs(this, gVcfFiles.values.toList, outputFile)
133
134
135
    add(genotypeGVCFs)
  }
}