HaplotypeCallerGvcf.scala 5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
17
18
19
/**
 * Due to the license issue with GATK, this part of Biopet can only be used inside the
 * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
 * on how to use this protected part of biopet or contact us at sasc@lumc.nl
 */
20
package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
21

22
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.gatk
24
import nl.lumc.sasc.biopet.extensions.gatk.CombineGVCFs
25
import nl.lumc.sasc.biopet.utils.config.Configurable
26
import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
27
28

/** Gvcf mode for haplotypecaller */
Peter van 't Hof's avatar
Peter van 't Hof committed
29
class HaplotypeCallerGvcf(val parent: Configurable) extends Variantcaller {
30
31
32
  val name = "haplotypecaller_gvcf"
  protected def defaultPrio = 5

Sander Bollen's avatar
Sander Bollen committed
33
  /**
Sander Bollen's avatar
Sander Bollen committed
34
35
   * Map of sample name -> gvcf. May be empty.
   */
Sander Bollen's avatar
Sander Bollen committed
36
37
38
39
  protected var gVcfFiles: Map[String, File] = Map()

  def getGvcfs = gVcfFiles

40
41
  val genderAwareCalling: Boolean = config("gender_aware_calling", default = false)
  val haploidRegions: Option[File] = config("hap̦loid_regions")
Peter van 't Hof's avatar
Peter van 't Hof committed
42
43
  val haploidRegionsMale: Option[File] = config("haploid_regions_male")
  val haploidRegionsFemale: Option[File] = config("haploid_regions_female")
44

45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
  lazy val refernceSize = referenceDict.getReferenceLength

  lazy val fractionMale: Double = calculateFraction(haploidRegions, haploidRegionsMale)
  lazy val fractionFemale: Double = calculateFraction(haploidRegions, haploidRegionsFemale)
  lazy val fractionUnknown: Double = calculateFraction(haploidRegions, None)

  def calculateFraction(file1: Option[File], file2: Option[File]): Double = {
    (file1.map(BedRecordList.fromFile(_)), file2.map(BedRecordList.fromFile(_))) match {
      case (Some(l), None) => l.length.toDouble / refernceSize
      case (None, Some(l)) => l.length.toDouble / refernceSize
      case (Some(l1), Some(l2)) =>
        BedRecordList.fromList(l1.allRecords ++ l2.allRecords).combineOverlap.length.toDouble / refernceSize
      case (None, None) => 0.0
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
61
62
  override def fixedValues = Map("haplotypecaller" -> Map("emitRefConfidence" -> "GVCF"))

63
64
65
66
67
  override def defaults = Map("haplotypecaller" -> Map(
    "variant_index_type" -> "LINEAR",
    "variant_index_parameter" -> 128000)
  )

68
  def biopetScript() {
Sander Bollen's avatar
Sander Bollen committed
69
    gVcfFiles = for ((sample, inputBam) <- inputBams) yield {
70
71
      if (genderAwareCalling) {
        val finalFile = new File(outputDir, sample + ".gvcf.vcf.gz")
72
73
        val gender = genders.getOrElse(sample, Gender.Unknown)
        val haploidBedFiles: List[File] = gender match {
74
75
76
77
78
          case Gender.Female => haploidRegions.toList ::: haploidRegionsFemale.toList ::: Nil
          case Gender.Male   => haploidRegions.toList ::: haploidRegionsMale.toList ::: Nil
          case _             => haploidRegions.toList
        }

79
80
81
82
83
84
        val fraction: Double = gender match {
          case Gender.Female => fractionFemale
          case Gender.Male   => fractionMale
          case _             => fractionUnknown
        }

85
86
87
88
        val haploidGvcf = if (haploidBedFiles.nonEmpty) {
          val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".haploid.gvcf.vcf.gz"))
          hc.BQSR = inputBqsrFiles.get(sample)
          hc.intervals = haploidBedFiles
89
90
          hc.scatterCount = (hc.scatterCount * fraction).toInt
          hc.sample_ploidy = Some(1)
91
92
93
94
95
96
97
          add(hc)
          Some(hc.out)
        } else None

        val hcDiploid = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".diploid.gvcf.vcf.gz"))
        hcDiploid.BQSR = inputBqsrFiles.get(sample)
        hcDiploid.excludeIntervals = haploidBedFiles
98
        hcDiploid.scatterCount = (hcDiploid.scatterCount * (1 - fraction)).toInt
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
        add(hcDiploid)

        haploidGvcf match {
          case Some(file) =>
            val combine = new CombineGVCFs(this)
            combine.variant = Seq(hcDiploid.out, file)
            combine.out = new File(outputDir, sample + ".gvcf.vcf.gz")
            add(combine)
            sample -> combine.out
          case _ => sample -> hcDiploid.out
        }
      } else {
        val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".gvcf.vcf.gz"))
        hc.BQSR = inputBqsrFiles.get(sample)
        add(hc)
        sample -> hc.out
      }
116
117
    }

Sander Bollen's avatar
Sander Bollen committed
118
    val genotypeGVCFs = gatk.GenotypeGVCFs(this, gVcfFiles.values.toList, outputFile)
119
120
121
    add(genotypeGVCFs)
  }
}