Gatk.scala 14.4 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
package nl.lumc.sasc.biopet.pipelines.gatk

import nl.lumc.sasc.biopet.wrappers._
4
import nl.lumc.sasc.biopet.wrappers.aligners._
Peter van 't Hof's avatar
Peter van 't Hof committed
5
import nl.lumc.sasc.biopet.core._
6
import nl.lumc.sasc.biopet.pipelines.mapping._
Peter van 't Hof's avatar
Peter van 't Hof committed
7
8
9
10
11
12
13
14
import nl.lumc.sasc.biopet.pipelines.flexiprep._
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.extensions.picard._
import org.broadinstitute.sting.queue.function._
import scala.util.parsing.json._
import org.broadinstitute.sting.utils.variant._

15
class Gatk(private var globalConfig: Config) extends QScript with BiopetQScript {
Peter van 't Hof's avatar
Peter van 't Hof committed
16
  qscript =>
17
18
  def this() = this(new Config())
  
Peter van 't Hof's avatar
Peter van 't Hof committed
19
  @Argument(doc="Config Json file",shortName="config") var configfiles: List[File] = Nil
20
  @Argument(doc="Only Sample",shortName="sample", required=false) var onlySample: String = ""
Peter van 't Hof's avatar
Peter van 't Hof committed
21
  @Argument(doc="Output directory", shortName="outputDir", required=true) var outputDir: String = _
22
  
Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
25
  var referenceFile: File = _
  var dbsnp: File = _
  var gvcfFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
26
  var finalBamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
27
28
29
  
  def init() {
    for (file <- configfiles) globalConfig.loadConfigFile(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
30
    config = Config.mergeConfigs(globalConfig.getAsConfig("gatk"), globalConfig)
Peter van 't Hof's avatar
Peter van 't Hof committed
31
    referenceFile = config.getAsString("referenceFile")
32
    if (config.contains("dbsnp")) dbsnp = config.getAsString("dbsnp")
Peter van 't Hof's avatar
Peter van 't Hof committed
33
    gvcfFiles = config.getAsListOfStrings("gvcfFiles", Nil)
34
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
Peter van 't Hof's avatar
Peter van 't Hof committed
35
    else if (!outputDir.endsWith("/")) outputDir += "/"
Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
38
  }
  
  def script() {
39
40
41
42
    init()
    if (onlySample.isEmpty) {
      runSamplesJobs
      
Peter van 't Hof's avatar
Peter van 't Hof committed
43
      //SampleWide jobs
Peter van 't Hof's avatar
Peter van 't Hof committed
44
      if (gvcfFiles.size > 0) {
45
        var vcfFile = addGenotypeGVCFs(gvcfFiles, outputDir + "recalibration/")
46
47
48
49
50
        if (qscript.config.getAsString("inputtype", "dna") != "rna") {
          vcfFile = addVariantAnnotator(vcfFile, finalBamFiles, outputDir + "recalibration/")
          vcfFile = addSnpVariantRecalibrator(vcfFile, outputDir + "recalibration/")
          vcfFile = addIndelVariantRecalibrator(vcfFile, outputDir + "recalibration/")
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
51
      } else logger.warn("No gVCFs to genotype")
52
    } else runSingleSampleJobs(onlySample)
Peter van 't Hof's avatar
Peter van 't Hof committed
53
54
55
  }
  
  // Called for each sample
56
  override def runSingleSampleJobs(sampleConfig:Config) : Map[String,List[File]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
57
    var outputFiles:Map[String,List[File]] = Map()
58
59
60
61
    var runBamfiles: List[File] = List()
    var sampleID: String = sampleConfig.getAsString("ID")
    for ((run, runFiles) <- runRunsJobs(sampleConfig)) {
      runBamfiles +:= runFiles("FinalBam")
Peter van 't Hof's avatar
Peter van 't Hof committed
62
    }
63
64
65
    outputFiles += ("FinalBams" -> runBamfiles)
    
    if (runBamfiles.size > 0) {
Peter van 't Hof's avatar
Peter van 't Hof committed
66
      finalBamFiles ++= runBamfiles
67
68
69
70
      var gvcfFile = addHaplotypeCaller(runBamfiles, new File(outputDir + sampleID + "/" + sampleID + ".gvcf.vcf"))
      outputFiles += ("gvcf" -> List(gvcfFile))
      gvcfFiles :+= gvcfFile
    } else logger.warn("No bamfiles for variant calling for sample: " + sampleID)
Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
73
74
    return outputFiles
  }
  
  // Called for each run from a sample
75
  override def runSingleRunJobs(runConfig:Config, sampleConfig:Config) : Map[String,File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
76
    var outputFiles:Map[String,File] = Map()
77
78
    val runID: String = runConfig.getAsString("ID")
    val sampleID: String = sampleConfig.get("ID").toString
79
    val runDir: String = outputDir + sampleID + "/run_" + runID + "/"
80
    val inputType = runConfig.getAsString("inputtype", config.getAsString("inputtype", "dna"))
81
    if (runConfig.contains("R1")) {
82
      val mapping = new Mapping(config)
83
      mapping.loadRunConfig(runConfig, sampleConfig, runDir)
84
85
      mapping.script
      addAll(mapping.functions) // Add functions of mapping to curent function pool
Peter van 't Hof's avatar
Peter van 't Hof committed
86
      
87
      var bamFile:File = mapping.outputFiles("finalBamFile")
88
      if (inputType == "rna") bamFile = addSplitNCigarReads(bamFile,runDir)
89
90
      bamFile = addIndelRealign(bamFile,runDir) // Indel realigner
      bamFile = addBaseRecalibrator(bamFile,runDir) // Base recalibrator
Peter van 't Hof's avatar
Peter van 't Hof committed
91
      
Peter van 't Hof's avatar
Peter van 't Hof committed
92
      outputFiles += ("FinalBam" -> bamFile)
93
    } else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)    
Peter van 't Hof's avatar
Peter van 't Hof committed
94
    return outputFiles
Peter van 't Hof's avatar
Peter van 't Hof committed
95
  }
96
97
98
99
100
101
102
103
104
105
106
107
  
  def addMarkDuplicates(inputBams:List[File], outputFile:File, dir:String) : File = {
    val markDuplicates = new MarkDuplicates {
      this.input = inputBams
      this.output = outputFile
      this.REMOVE_DUPLICATES = false
      this.metrics = swapExt(dir,outputFile,".bam",".metrics")
      this.outputIndex = swapExt(dir,this.output,".bam",".bai")
      this.memoryLimit = 2
      this.jobResourceRequests :+= "h_vmem=4G"
    }
    add(markDuplicates)
Peter van 't Hof's avatar
Peter van 't Hof committed
108
    
109
110
    return markDuplicates.output
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
111
112
  
  def addIndelRealign(inputBam:File, dir:String): File = {
113
    val realignerTargetCreator = new RealignerTargetCreator with gatkArguments {
114
      val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("realignertargetcreator"), qscript.config)
115
116
117
      this.I :+= inputBam
      this.o = swapExt(dir,inputBam,".bam",".realign.intervals")
      this.jobResourceRequests :+= "h_vmem=5G"
118
      if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
119
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
    add(realignerTargetCreator)

122
    val indelRealigner = new IndelRealigner with gatkArguments {
123
      val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("indelrealigner"), qscript.config)
124
125
126
      this.I :+= inputBam
      this.targetIntervals = realignerTargetCreator.o
      this.o = swapExt(dir,inputBam,".bam",".realign.bam")
127
      if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
128
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
129
130
131
132
133
134
    add(indelRealigner)
    
    return indelRealigner.o
  }
  
  def addBaseRecalibrator(inputBam:File, dir:String): File = {
135
    val baseRecalibrator = new BaseRecalibrator with gatkArguments {
136
      val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("baserecalibrator"), qscript.config)
137
138
      this.I :+= inputBam
      this.o = swapExt(dir,inputBam,".bam",".baserecal")
139
      if (dbsnp != null) this.knownSites :+= dbsnp
140
      if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
141
      this.nct = this.config.getThreads(2)
142
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
143
144
    add(baseRecalibrator)

145
    val printReads = new PrintReads with gatkArguments {
146
      val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("printreads"), qscript.config)
147
148
149
      this.I :+= inputBam
      this.o = swapExt(dir,inputBam,".bam",".baserecal.bam")
      this.BQSR = baseRecalibrator.o
150
      if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
151
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
152
    add(printReads)
Peter van 't Hof's avatar
Peter van 't Hof committed
153
154
155
    
    return printReads.o
  }
156
  
157
158
159
160
161
162
  def addSplitNCigarReads(inputBam:File, dir:String) : File = {
    val splitNCigarReads = new SplitNCigarReads with gatkArguments {
      val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("splitncigarreads"), qscript.config)
      if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
      this.input_file = Seq(inputBam)
      this.out = swapExt(dir,inputBam,".bam",".split.bam")
163
164
      this.read_filter :+= "ReassignMappingQuality"
      
165
166
167
168
169
170
171
      this.U = org.broadinstitute.sting.gatk.arguments.ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS
    }
    add(splitNCigarReads)
    
    return splitNCigarReads.out
  }
  
172
173
174
175
176
177
178
  def addHaplotypeCaller(bamfiles:List[File], outputfile:File): File = {
    val haplotypeCaller = new HaplotypeCaller with gatkArguments {
      val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("haplotypecaller"), qscript.config)
      if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
      this.input_file = bamfiles
      this.out = outputfile
      if (dbsnp != null) this.dbsnp = qscript.dbsnp
179
      this.nct = this.config.getThreads(3)
180
181
182
183
184
185
      this.memoryLimit = this.nct * 2
      
      // GVCF options
      this.emitRefConfidence = org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCaller.ReferenceConfidenceMode.GVCF
      this.variant_index_type = GATKVCFIndexType.LINEAR
      this.variant_index_parameter = 128000
186
187
188
189
190
191
192
      
      if (qscript.config.getAsString("inputtype", "dna") == "rna") {
        this.dontUseSoftClippedBases = true
        this.recoverDanglingHeads = true
        this.stand_call_conf = 20
        this.stand_emit_conf = 20
      }
193
194
195
196
197
198
199
    }
    add(haplotypeCaller)
    
    return haplotypeCaller.out
  }
  
  def addSnpVariantRecalibrator(inputVcf:File, dir:String): File = {
200
201
202
203
    val snpVariantRecalibrator = getVariantRecalibrator("snp")
    snpVariantRecalibrator.input +:= inputVcf
    snpVariantRecalibrator.recal_file = swapExt(dir, inputVcf,".vcf",".snp.recal")
    snpVariantRecalibrator.tranches_file = swapExt(dir, inputVcf,".vcf",".snp.tranches")
204
205
    add(snpVariantRecalibrator)

206
207
208
209
210
    val snpApplyRecalibration = getApplyRecalibration("snp")
    snpApplyRecalibration.input +:= inputVcf
    snpApplyRecalibration.recal_file = snpVariantRecalibrator.recal_file
    snpApplyRecalibration.tranches_file = snpVariantRecalibrator.tranches_file
    snpApplyRecalibration.out = swapExt(dir, inputVcf,".vcf",".snp.recal.vcf")
211
212
213
214
215
216
    add(snpApplyRecalibration)
    
    return snpApplyRecalibration.out
  }
  
  def addIndelVariantRecalibrator(inputVcf:File, dir:String): File = {
217
218
219
220
    val indelVariantRecalibrator = getVariantRecalibrator("indel")
    indelVariantRecalibrator.input +:= inputVcf
    indelVariantRecalibrator.recal_file = swapExt(dir, inputVcf,".vcf",".indel.recal")
    indelVariantRecalibrator.tranches_file = swapExt(dir, inputVcf,".vcf",".indel.tranches")
221
222
    add(indelVariantRecalibrator)

223
224
225
226
227
    val indelApplyRecalibration = getApplyRecalibration("indel")
    indelApplyRecalibration.input +:= inputVcf
    indelApplyRecalibration.recal_file = indelVariantRecalibrator.recal_file
    indelApplyRecalibration.tranches_file = indelVariantRecalibrator.tranches_file
    indelApplyRecalibration.out = swapExt(dir, inputVcf,".vcf",".indel.recal.vcf")
228
229
230
231
232
    add(indelApplyRecalibration)
    
    return indelApplyRecalibration.out
  }
  
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
  def getVariantRecalibrator(mode_arg:String) : VariantRecalibrator = {
    val variantRecalibrator = new VariantRecalibrator() with gatkArguments {
      var config = Config.mergeConfigs(qscript.config.getAsConfig("variantrecalibrator"), qscript.config)
      if (mode_arg == "indel") {
        this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
        this.config = Config.mergeConfigs(this.config.getAsConfig("indel"),this.config)
        if (config.contains("mills")) this.resource :+= new TaggedFile(this.config.getAsString("mills"), "known=false,training=true,truth=true,prior=12.0")
      } else { // SNP
        this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
        this.config = Config.mergeConfigs(this.config.getAsConfig("snp"),this.config)
        if (this.config.contains("hapmap")) this.resource +:= new TaggedFile(this.config.getAsString("hapmap"), "known=false,training=true,truth=true,prior=15.0")
        if (this.config.contains("omni")) this.resource +:= new TaggedFile(this.config.getAsString("omni"), "known=false,training=true,truth=true,prior=12.0")
        if (this.config.contains("1000G")) this.resource +:= new TaggedFile(this.config.getAsString("1000G"), "known=false,training=true,truth=false,prior=10.0")
      }
      logger.debug("VariantRecalibrator-" + mode_arg + ": " + this.config)
      if (this.config.contains("dbsnp")) this.resource :+= new TaggedFile(this.config.getAsString("dbsnp"), "known=true,training=false,truth=false,prior=2.0")
      this.nt = config.getThreads(4)
      this.memoryLimit = 2 * nt
      this.an = Seq("QD","DP","FS","ReadPosRankSum","MQRankSum")
      if (this.config.contains("minnumbadvariants")) this.minNumBadVariants = this.config.getAsInt("minnumbadvariants")
      if (this.config.contains("maxgaussians")) this.maxGaussians = this.config.getAsInt("maxgaussians")
    }
    return variantRecalibrator
  }
  
  def getApplyRecalibration(mode_arg:String) : ApplyRecalibration = {
    val applyRecalibration = new ApplyRecalibration() with gatkArguments {
      var config = Config.mergeConfigs(qscript.config.getAsConfig("applyrecalibration"), qscript.config)
      if (mode_arg == "indel") {
        this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
        this.config = Config.mergeConfigs(this.config.getAsConfig("indel"),this.config)
        this.ts_filter_level = this.config.getAsDouble("ts_filter_level", 99.0)
      } else { // SNP
        this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
        this.config = Config.mergeConfigs(this.config.getAsConfig("snp"),this.config)
        this.ts_filter_level = this.config.getAsDouble("ts_filter_level", 99.5)
      }
      this.nt = config.getThreads(3)
      this.memoryLimit = 2 * nt
      if (config.contains("scattercount")) this.scatterCount = this.config.getAsInt("scattercount")
    }
    return applyRecalibration
  }
  
277
278
279
280
  def addGenotypeGVCFs(gvcfFiles: List[File], dir:String): File = {
    val genotypeGVCFs = new GenotypeGVCFs() with gatkArguments {
      val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("genotypegvcfs"), qscript.config)
      this.variant = gvcfFiles
Peter van 't Hof's avatar
Peter van 't Hof committed
281
      this.annotation ++= Seq("FisherStrand", "QualByDepth", "ChromosomeCounts")
282
283
284
285
286
287
288
      if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
      this.out = new File(outputDir,"genotype.vcf")
    }
    add(genotypeGVCFs)
    return genotypeGVCFs.out
  }
  
Peter van 't Hof's avatar
Peter van 't Hof committed
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
  def addVariantAnnotator(inputvcf:File, bamfiles:List[File], dir:String): File = {
    val variantAnnotator = new VariantAnnotator with gatkArguments {
      val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("variantannotator"), qscript.config)
      this.variant = inputvcf
      this.input_file = bamfiles
      this.dbsnp = config.getAsString("dbsnp")
      this.out = swapExt(dir, inputvcf,".vcf",".anotated.vcf")
      //this.all = true
      //this.excludeAnnotation +:= "SnpEff"
      if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
    }
    add(variantAnnotator)
    
    return variantAnnotator.out
  }
  
305
306
307
308
309
  trait gatkArguments extends CommandLineGATK {
    this.reference_sequence = referenceFile
    this.memoryLimit = 2
    this.jobResourceRequests :+= "h_vmem=4G"
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
310
}
311
312
313
314

object Gatk extends PipelineCommand {
  override val src = "Gatk"
}