GatkVariantcalling.scala 9.51 KB
Newer Older
1
2
package nl.lumc.sasc.biopet.pipelines.gatk

Peter van 't Hof's avatar
Peter van 't Hof committed
3
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
4
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
5
import nl.lumc.sasc.biopet.tools.{ MpileupToVcf, VcfFilter, MergeAlleles }
Peter van 't Hof's avatar
Peter van 't Hof committed
6
import nl.lumc.sasc.biopet.core.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.extensions.gatk.{ AnalyzeCovariates, BaseRecalibrator, GenotypeGVCFs, HaplotypeCaller, IndelRealigner, PrintReads, RealignerTargetCreator, SelectVariants, CombineVariants, UnifiedGenotyper }
Peter van 't Hof's avatar
Peter van 't Hof committed
8
import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
Peter van 't Hof's avatar
Peter van 't Hof committed
9
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
10
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
bow's avatar
bow committed
11
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
Peter van 't Hof's avatar
Peter van 't Hof committed
12
import scala.collection.SortedMap
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import scala.language.reflectiveCalls
14

bow's avatar
bow committed
15
class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScript {
16
  def this() = this(null)
bow's avatar
bow committed
17

18
  val scriptOutput = new GatkVariantcalling.ScriptOutput
Peter van 't Hof's avatar
Peter van 't Hof committed
19

bow's avatar
bow committed
20
  @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM")
21
  var inputBams: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
22

Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
  @Input(doc = "Raw vcf file", shortName = "raw")
  var rawVcfInput: File = _
bow's avatar
bow committed
25
26

  @Argument(doc = "Reference", shortName = "R", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
27
  var reference: File = config("reference", required = true)
bow's avatar
bow committed
28
29

  @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
30
  var dbsnp: File = config("dbsnp")
bow's avatar
bow committed
31
32

  @Argument(doc = "OutputName", required = false)
33
  var outputName: String = _
bow's avatar
bow committed
34

35
36
  @Argument(doc = "Sample name", required = false)
  var sampleID: String = _
Peter van 't Hof's avatar
Peter van 't Hof committed
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  var preProcesBams: Option[Boolean] = config("pre_proces_bams", default = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  var variantcalling: Boolean = true
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
42
43
  var doublePreProces: Option[Boolean] = config("double_pre_proces", default = true)
  var useHaplotypecaller: Option[Boolean] = config("use_haplotypecaller", default = true)
  var useUnifiedGenotyper: Option[Boolean] = config("use_unifiedgenotyper", default = false)
  var useAllelesOption: Option[Boolean] = config("use_alleles_option", default = false)
44
  var useMpileup: Option[Boolean] = config("use_mpileup", default = true)
bow's avatar
bow committed
45

46
  def init() {
47
    if (outputName == null && sampleID != null) outputName = sampleID
Peter van 't Hof's avatar
Peter van 't Hof committed
48
    else if (outputName == null) outputName = "noname"
49
50
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
51
  }
bow's avatar
bow committed
52

Peter van 't Hof's avatar
Peter van 't Hof committed
53
54
55
56
57
58
59
60
61
62
63
  private def doublePreProces(files: List[File]): List[File] = {
    if (files.size == 1) return files
    if (files.isEmpty) throw new IllegalStateException("Files can't be empty")
    if (!doublePreProces.get) return files
    val markDub = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam"))
    if (dbsnp != null) {
      add(markDub, isIntermediate = true)
      List(addIndelRealign(markDub.output, outputDir, isIntermediate = false))
    } else {
      add(markDub, isIntermediate = true)
      List(markDub.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
64
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
65
66
  }

67
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
68
    scriptOutput.bamFiles = if (preProcesBams.get) {
Peter van 't Hof's avatar
Peter van 't Hof committed
69
70
      var bamFiles: List[File] = Nil
      for (inputBam <- inputBams) {
Peter van 't Hof's avatar
Peter van 't Hof committed
71
        var bamFile = addIndelRealign(inputBam, outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
        bamFiles :+= addBaseRecalibrator(bamFile, outputDir, isIntermediate = bamFiles.size > 1)
      }
74
      doublePreProces(bamFiles)
Peter van 't Hof's avatar
Peter van 't Hof committed
75
    } else if (inputBams.size > 1 && doublePreProces.get) {
76
      doublePreProces(inputBams)
Peter van 't Hof's avatar
Peter van 't Hof committed
77
    } else inputBams
Peter van 't Hof's avatar
Peter van 't Hof committed
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79
    if (variantcalling) {
Peter van 't Hof's avatar
Peter van 't Hof committed
80
      var mergBuffer: SortedMap[String, File] = SortedMap()
81
      def mergeList = mergBuffer map { case (key, file) => TaggedFile(removeNoneVariants(file), "name=" + key) }
Peter van 't Hof's avatar
Peter van 't Hof committed
82

Peter van 't Hof's avatar
Peter van 't Hof committed
83
      if (sampleID != null && (useHaplotypecaller.get || config("joint_genotyping", default = false).getBoolean)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
86
87
88
89
90
        val hcGvcf = new HaplotypeCaller(this)
        hcGvcf.useGvcf
        hcGvcf.input_file = scriptOutput.bamFiles
        hcGvcf.out = outputDir + outputName + ".hc.discovery.gvcf.vcf.gz"
        add(hcGvcf)
        scriptOutput.gvcfFile = hcGvcf.out
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
91

Peter van 't Hof's avatar
Peter van 't Hof committed
92
      if (useHaplotypecaller.get) {
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
95
96
97
98
99
100
101
102
103
104
105
        if (sampleID != null) {
          val genotypeGVCFs = GenotypeGVCFs(this, List(scriptOutput.gvcfFile), outputDir + outputName + ".hc.discovery.vcf.gz")
          add(genotypeGVCFs)
          scriptOutput.hcVcfFile = genotypeGVCFs.out
        } else {
          val hcGvcf = new HaplotypeCaller(this)
          hcGvcf.input_file = scriptOutput.bamFiles
          hcGvcf.out = outputDir + outputName + ".hc.discovery.vcf.gz"
          add(hcGvcf)
          scriptOutput.hcVcfFile = hcGvcf.out
        }
        mergBuffer += ("1.HC-Discovery" -> scriptOutput.hcVcfFile)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
106

Peter van 't Hof's avatar
Peter van 't Hof committed
107
      if (useUnifiedGenotyper.get) {
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
110
111
112
113
114
        val ugVcf = new UnifiedGenotyper(this)
        ugVcf.input_file = scriptOutput.bamFiles
        ugVcf.out = outputDir + outputName + ".ug.discovery.vcf.gz"
        add(ugVcf)
        scriptOutput.ugVcfFile = ugVcf.out
        mergBuffer += ("2.UG-Discovery" -> scriptOutput.ugVcfFile)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
115

Peter van 't Hof's avatar
Peter van 't Hof committed
116
      // Generate raw vcf
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
      if (useMpileup) {
        if (sampleID != null && scriptOutput.bamFiles.size == 1) {
          val m2v = new MpileupToVcf(this)
          m2v.inputBam = scriptOutput.bamFiles.head
          m2v.sample = sampleID
          m2v.output = outputDir + outputName + ".raw.vcf"
          add(m2v)
          scriptOutput.rawVcfFile = m2v.output

          val vcfFilter = new VcfFilter(this)
          vcfFilter.defaults ++= Map("min_sample_depth" -> 8,
            "min_alternate_depth" -> 2,
            "min_samples_pass" -> 1,
            "filter_ref_calls" -> true)
          vcfFilter.inputVcf = m2v.output
          vcfFilter.outputVcf = this.swapExt(outputDir, m2v.output, ".vcf", ".filter.vcf.gz")
          add(vcfFilter)
          scriptOutput.rawFilterVcfFile = vcfFilter.outputVcf
        } else if (rawVcfInput != null) scriptOutput.rawFilterVcfFile = rawVcfInput
        if (scriptOutput.rawFilterVcfFile == null) throw new IllegalStateException("Files can't be empty")
        mergBuffer += ("9.raw" -> scriptOutput.rawFilterVcfFile)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
139

140
      // Allele mode
Peter van 't Hof's avatar
Peter van 't Hof committed
141
      if (useAllelesOption.get) {
Peter van 't Hof's avatar
Peter van 't Hof committed
142
        val mergeAlleles = MergeAlleles(this, mergeList.toList, outputDir + "raw.allele__temp_only.vcf.gz")
Peter van 't Hof's avatar
Peter van 't Hof committed
143
        add(mergeAlleles, isIntermediate = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
144

Peter van 't Hof's avatar
Peter van 't Hof committed
145
        if (useHaplotypecaller.get) {
Peter van 't Hof's avatar
Peter van 't Hof committed
146
147
148
          val hcAlleles = new HaplotypeCaller(this)
          hcAlleles.input_file = scriptOutput.bamFiles
          hcAlleles.out = outputDir + outputName + ".hc.allele.vcf.gz"
Peter van 't Hof's avatar
Peter van 't Hof committed
149
          hcAlleles.alleles = mergeAlleles.output
Peter van 't Hof's avatar
Peter van 't Hof committed
150
151
152
153
154
          hcAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
          add(hcAlleles)
          scriptOutput.hcAlleleVcf = hcAlleles.out
          mergBuffer += ("3.HC-alleles" -> hcAlleles.out)
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
155

Peter van 't Hof's avatar
Peter van 't Hof committed
156
        if (useUnifiedGenotyper.get) {
Peter van 't Hof's avatar
Peter van 't Hof committed
157
158
159
          val ugAlleles = new UnifiedGenotyper(this)
          ugAlleles.input_file = scriptOutput.bamFiles
          ugAlleles.out = outputDir + outputName + ".ug.allele.vcf.gz"
Peter van 't Hof's avatar
Peter van 't Hof committed
160
          ugAlleles.alleles = mergeAlleles.output
Peter van 't Hof's avatar
Peter van 't Hof committed
161
162
163
164
165
          ugAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
          add(ugAlleles)
          scriptOutput.ugAlleleVcf = ugAlleles.out
          mergBuffer += ("4.UG-alleles" -> ugAlleles.out)
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
166
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
167

Peter van 't Hof's avatar
Peter van 't Hof committed
168
      def removeNoneVariants(input: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
169
170
171
172
173
174
175
176
177
        val output = input.getAbsolutePath.stripSuffix(".vcf.gz") + ".variants_only.vcf.gz"
        val sv = SelectVariants(this, input, output)
        sv.excludeFiltered = true
        sv.excludeNonVariants = true
        add(sv, isIntermediate = true)
        sv.out
      }

      val cvFinal = CombineVariants(this, mergeList.toList, outputDir + outputName + ".final.vcf.gz")
178
      cvFinal.genotypemergeoption = org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils.GenotypeMergeType.UNSORTED
Peter van 't Hof's avatar
Peter van 't Hof committed
179
180
      add(cvFinal)
      scriptOutput.finalVcfFile = cvFinal.out
181
182
    }
  }
bow's avatar
bow committed
183

Peter van 't Hof's avatar
Peter van 't Hof committed
184
  def addIndelRealign(inputBam: File, dir: String, isIntermediate: Boolean = true): File = {
185
186
187
188
    val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir)
    add(realignerTargetCreator, isIntermediate = true)

    val indelRealigner = IndelRealigner.apply(this, inputBam, realignerTargetCreator.out, dir)
Peter van 't Hof's avatar
Peter van 't Hof committed
189
    add(indelRealigner, isIntermediate = isIntermediate)
bow's avatar
bow committed
190

191
192
    return indelRealigner.o
  }
bow's avatar
bow committed
193

Peter van 't Hof's avatar
Peter van 't Hof committed
194
195
  def addBaseRecalibrator(inputBam: File, dir: String, isIntermediate: Boolean = false): File = {
    val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal")) //with gatkArguments {
Peter van 't Hof's avatar
Peter van 't Hof committed
196

Peter van 't Hof's avatar
Peter van 't Hof committed
197
    if (baseRecalibrator.knownSites.isEmpty) {
Peter van 't Hof's avatar
Peter van 't Hof committed
198
      logger.warn("No Known site found, skipping base recalibration, file: " + inputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
199
200
      return inputBam
    }
201
    add(baseRecalibrator)
bow's avatar
bow committed
202

203
204
    val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after")) //with gatkArguments {
    baseRecalibratorAfter.BQSR = baseRecalibrator.o
Peter van 't Hof's avatar
Peter van 't Hof committed
205
    add(baseRecalibratorAfter)
bow's avatar
bow committed
206

207
    add(AnalyzeCovariates(this, baseRecalibrator.o, baseRecalibratorAfter.o, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
bow's avatar
bow committed
208

209
210
    val printReads = PrintReads(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
    printReads.BQSR = baseRecalibrator.o
Peter van 't Hof's avatar
Peter van 't Hof committed
211
    add(printReads, isIntermediate = isIntermediate)
bow's avatar
bow committed
212

213
214
215
216
217
    return printReads.o
  }
}

object GatkVariantcalling extends PipelineCommand {
218
219
220
  class ScriptOutput {
    var bamFiles: List[File] = _
    var gvcfFile: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
221
222
    var hcVcfFile: File = _
    var ugVcfFile: File = _
223
    var rawVcfFile: File = _
224
    var rawFilterVcfFile: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
225
226
    var hcAlleleVcf: File = _
    var ugAlleleVcf: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
227
    var finalVcfFile: File = _
228
  }
229
}