GatkVariantcalling.scala 7.69 KB
Newer Older
1
2
package nl.lumc.sasc.biopet.pipelines.gatk

Peter van 't Hof's avatar
Peter van 't Hof committed
3
4
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.config.Configurable
5
import nl.lumc.sasc.biopet.extensions._
Peter van 't Hof's avatar
Peter van 't Hof committed
6
import org.broadinstitute.gatk.queue.QScript
bow's avatar
bow committed
7
import org.broadinstitute.gatk.queue.extensions.gatk.{ BaseRecalibrator, CommandLineGATK, HaplotypeCaller, IndelRealigner, PrintReads, RealignerTargetCreator, GenotypeGVCFs, AnalyzeCovariates }
Peter van 't Hof's avatar
Peter van 't Hof committed
8
import org.broadinstitute.gatk.queue.function._
bow's avatar
bow committed
9
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
Peter van 't Hof's avatar
Peter van 't Hof committed
10
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType
11

bow's avatar
bow committed
12
class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScript {
13
  def this() = this(null)
bow's avatar
bow committed
14
15

  @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM")
16
  var inputBams: List[File] = Nil
bow's avatar
bow committed
17
18

  @Argument(doc = "Reference", shortName = "R", required = false)
19
  var reference: File = _
bow's avatar
bow committed
20
21

  @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false)
22
  var dbsnp: File = _
bow's avatar
bow committed
23
24

  @Argument(doc = "OutputName", required = false)
25
  var outputName: String = "hc"
bow's avatar
bow committed
26
27

  @Output(doc = "OutputFile", required = false)
28
  var outputFile: File = _
bow's avatar
bow committed
29

30
31
  var gvcfMode = true
  var singleGenotyping = false
bow's avatar
bow committed
32

33
  def init() {
bow's avatar
bow committed
34
    if (gvcfMode) gvcfMode = config("gvcfmode", default = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
35
    if (!singleGenotyping) singleGenotyping = config("singlegenotyping")
bow's avatar
bow committed
36
    if (reference == null) reference = config("reference", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
37
    if (dbsnp == null) dbsnp = config("dbsnp")
38
    if (outputFile == null) outputFile = outputDir + outputName + (if (gvcfMode) ".gvcf.vcf" else ".vcf")
39
40
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
41
  }
bow's avatar
bow committed
42

43
44
45
46
47
48
49
50
51
  def biopetScript() {
    var bamFiles: List[File] = Nil
    for (inputBam <- inputBams) {
      var bamFile = if (dbsnp != null) addIndelRealign(inputBam, outputDir) else inputBam
      bamFiles :+= addBaseRecalibrator(bamFile, outputDir)
    }
    addHaplotypeCaller(bamFiles, outputFile)
    if (gvcfMode && singleGenotyping) addGenotypeGVCFs(List(outputFile), outputDir)
  }
bow's avatar
bow committed
52

53
54
55
56
57
  trait gatkArguments extends CommandLineGATK {
    this.reference_sequence = reference
    this.memoryLimit = 2
    this.jobResourceRequests :+= "h_vmem=4G"
  }
bow's avatar
bow committed
58
59

  def addIndelRealign(inputBam: File, dir: String): File = {
60
61
    val realignerTargetCreator = new RealignerTargetCreator with gatkArguments {
      this.I :+= inputBam
bow's avatar
bow committed
62
      this.o = swapExt(dir, inputBam, ".bam", ".realign.intervals")
63
64
65
66
67
      this.jobResourceRequests :+= "h_vmem=5G"
      if (configContains("scattercount", "realignertargetcreator")) this.scatterCount = config("scattercount", 1, "realignertargetcreator")
    }
    realignerTargetCreator.isIntermediate = true
    add(realignerTargetCreator)
bow's avatar
bow committed
68

69
70
71
    val indelRealigner = new IndelRealigner with gatkArguments {
      this.I :+= inputBam
      this.targetIntervals = realignerTargetCreator.o
bow's avatar
bow committed
72
      this.o = swapExt(dir, inputBam, ".bam", ".realign.bam")
73
74
75
76
      if (configContains("scattercount", "indelrealigner")) this.scatterCount = config("scattercount", 1, "indelrealigner")
    }
    indelRealigner.isIntermediate = true
    add(indelRealigner)
bow's avatar
bow committed
77

78
79
    return indelRealigner.o
  }
bow's avatar
bow committed
80
81

  def addBaseRecalibrator(inputBam: File, dir: String): File = {
82
83
    val baseRecalibrator = new BaseRecalibrator with gatkArguments {
      this.I :+= inputBam
bow's avatar
bow committed
84
      this.o = swapExt(dir, inputBam, ".bam", ".baserecal")
85
86
      if (dbsnp != null) this.knownSites :+= dbsnp
      if (configContains("scattercount", "baserecalibrator")) this.scatterCount = config("scattercount", 1, "baserecalibrator")
87
      this.nct = config("threads", 1, "baserecalibrator")
88
89
    }
    add(baseRecalibrator)
bow's avatar
bow committed
90

Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
    val baseRecalibratorAfter = new BaseRecalibrator with gatkArguments {
      this.I :+= inputBam
bow's avatar
bow committed
93
      this.o = swapExt(dir, inputBam, ".bam", ".baserecal.after")
Peter van 't Hof's avatar
Peter van 't Hof committed
94
95
96
      this.BQSR = baseRecalibrator.o
      if (dbsnp != null) this.knownSites :+= dbsnp
      if (configContains("scattercount", "baserecalibrator")) this.scatterCount = config("scattercount", 1, "baserecalibrator")
97
      this.nct = config("threads", 1, "baserecalibrator")
Peter van 't Hof's avatar
Peter van 't Hof committed
98
99
    }
    add(baseRecalibratorAfter)
bow's avatar
bow committed
100

Peter van 't Hof's avatar
Peter van 't Hof committed
101
102
103
    val analyzeCovariates = new AnalyzeCovariates with gatkArguments {
      this.before = baseRecalibrator.o
      this.after = baseRecalibratorAfter.o
bow's avatar
bow committed
104
      this.plots = swapExt(dir, inputBam, ".bam", ".baserecal.pdf")
Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
    }
    add(analyzeCovariates)
bow's avatar
bow committed
107

108
109
    val printReads = new PrintReads with gatkArguments {
      this.I :+= inputBam
bow's avatar
bow committed
110
      this.o = swapExt(dir, inputBam, ".bam", ".baserecal.bam")
111
112
113
114
115
      this.BQSR = baseRecalibrator.o
      if (configContains("scattercount", "printreads")) this.scatterCount = config("scattercount", 1, "printreads")
    }
    printReads.isIntermediate = true
    add(printReads)
bow's avatar
bow committed
116

117
118
    return printReads.o
  }
bow's avatar
bow committed
119
120

  def addHaplotypeCaller(bamfiles: List[File], outputfile: File): File = {
121
    val haplotypeCaller = new HaplotypeCaller with gatkArguments {
122
      this.min_mapping_quality_score = config("minMappingQualityScore", 20, "haplotypecaller")
123
124
125
126
      if (configContains("scattercount", "haplotypecaller")) this.scatterCount = config("scattercount", 1, "haplotypecaller")
      this.input_file = bamfiles
      this.out = outputfile
      if (configContains("dbsnp")) this.dbsnp = config("dbsnp")
127
128
129
130
131
132
      this.nct = config("threads", default = 3, submodule = "haplotypecaller")
      if (config("outputToBam", default = false, submodule = "haplotypecaller").getBoolean) {
        this.bamOutput = outputfile.getAbsolutePath + ".bam"
        nct = 1
        logger.warn("BamOutput is on, nct/threads is forced to set on 1, this option is only for debug")
      }
133
      this.memoryLimit = this.nct * 2
Peter van 't Hof's avatar
Peter van 't Hof committed
134
135
136
      
      if (configContains("allSitePLs")) this.allSitePLs = config("allSitePLs")
      
137
138
      // GVCF options
      if (gvcfMode) {
Peter van 't Hof's avatar
Peter van 't Hof committed
139
        this.emitRefConfidence = org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF
140
141
142
        this.variant_index_type = GATKVCFIndexType.LINEAR
        this.variant_index_parameter = 128000
      }
bow's avatar
bow committed
143
144

      val inputType: String = config("inputtype", "dna")
145
146
147
148
149
150
151
152
153
      if (inputType == "rna") {
        this.dontUseSoftClippedBases = config("dontusesoftclippedbases", true, "haplotypecaller")
        this.recoverDanglingHeads = config("recoverdanglingheads", true, "haplotypecaller")
        this.stand_call_conf = config("stand_call_conf", 20, "haplotypecaller")
        this.stand_emit_conf = config("stand_emit_conf", 20, "haplotypecaller")
      } else {
        this.dontUseSoftClippedBases = config("dontusesoftclippedbases", false, "haplotypecaller")
        this.recoverDanglingHeads = config("recoverdanglingheads", false, "haplotypecaller")
        this.stand_call_conf = config("stand_call_conf", 30, "haplotypecaller")
154
        this.stand_emit_conf = config("stand_emit_conf", 30, "haplotypecaller")
155
156
157
      }
    }
    add(haplotypeCaller)
bow's avatar
bow committed
158

159
160
    return haplotypeCaller.out
  }
bow's avatar
bow committed
161
162

  def addGenotypeGVCFs(gvcfFiles: List[File], dir: String): File = {
163
164
165
166
167
168
    val genotypeGVCFs = new GenotypeGVCFs() with gatkArguments {
      this.variant = gvcfFiles
      this.annotation ++= Seq("FisherStrand", "QualByDepth", "ChromosomeCounts")
      if (configContains("dbsnp")) this.dbsnp = config("dbsnp")
      if (configContains("scattercount", "genotypegvcfs")) this.scatterCount = config("scattercount", 1, "genotypegvcfs")
      this.out = outputDir + outputName + ".vcf"
169
170
      this.stand_call_conf = config("stand_call_conf", 30, "genotypegvcfs")
      this.stand_emit_conf = config("stand_emit_conf", 30, "genotypegvcfs")
171
172
173
174
175
176
177
178
179
    }
    add(genotypeGVCFs)
    return genotypeGVCFs.out
  }
}

object GatkVariantcalling extends PipelineCommand {
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.class"
}