GatkVariantcalling.scala 9.86 KB
Newer Older
1
2
package nl.lumc.sasc.biopet.pipelines.gatk

Peter van 't Hof's avatar
Peter van 't Hof committed
3
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
4
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
5
import nl.lumc.sasc.biopet.tools.{ MpileupToVcf, VcfFilter, MergeAlleles }
Peter van 't Hof's avatar
Peter van 't Hof committed
6
import nl.lumc.sasc.biopet.core.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.extensions.gatk.{ AnalyzeCovariates, BaseRecalibrator, GenotypeGVCFs, HaplotypeCaller, IndelRealigner, PrintReads, RealignerTargetCreator, SelectVariants, CombineVariants, UnifiedGenotyper }
Peter van 't Hof's avatar
Peter van 't Hof committed
8
import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
Peter van 't Hof's avatar
Peter van 't Hof committed
9
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
10
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument }
Peter van 't Hof's avatar
Peter van 't Hof committed
12
import scala.collection.SortedMap
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import scala.language.reflectiveCalls
14

bow's avatar
bow committed
15
class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScript {
16
  def this() = this(null)
bow's avatar
bow committed
17

18
  val scriptOutput = new GatkVariantcalling.ScriptOutput
Peter van 't Hof's avatar
Peter van 't Hof committed
19

bow's avatar
bow committed
20
  @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM")
21
  var inputBams: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
22

Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
  @Input(doc = "Raw vcf file", shortName = "raw")
  var rawVcfInput: File = _
bow's avatar
bow committed
25
26

  @Argument(doc = "Reference", shortName = "R", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
27
  var reference: File = config("reference", required = true)
bow's avatar
bow committed
28
29

  @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
30
  var dbsnp: File = config("dbsnp")
bow's avatar
bow committed
31
32

  @Argument(doc = "OutputName", required = false)
33
  var outputName: String = _
bow's avatar
bow committed
34

35
36
  @Argument(doc = "Sample name", required = false)
  var sampleID: String = _
Peter van 't Hof's avatar
Peter van 't Hof committed
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  var preProcesBams: Option[Boolean] = config("pre_proces_bams", default = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  var variantcalling: Boolean = true
Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
42
43
  var doublePreProces: Option[Boolean] = config("double_pre_proces", default = true)
  var useHaplotypecaller: Option[Boolean] = config("use_haplotypecaller", default = true)
  var useUnifiedGenotyper: Option[Boolean] = config("use_unifiedgenotyper", default = false)
  var useAllelesOption: Option[Boolean] = config("use_alleles_option", default = false)
44
45
  var useIndelRealigner: Boolean = config("use_indel_realign", default = true)
  var useBaseRecalibration: Boolean = config("use_base_recalibration", default = true)
bow's avatar
bow committed
46

47
  def init() {
48
    if (outputName == null && sampleID != null) outputName = sampleID
Peter van 't Hof's avatar
Peter van 't Hof committed
49
    else if (outputName == null) outputName = "noname"
50
51
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
52
53

    val baseRecalibrator = new BaseRecalibrator(this)
54
    if (preProcesBams && useBaseRecalibration && baseRecalibrator.knownSites.isEmpty) {
55
56
57
      logger.warn("No Known site found, skipping base recalibration")
      useBaseRecalibration = false
    }
58
  }
bow's avatar
bow committed
59

Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
62
63
  private def doublePreProces(files: List[File]): List[File] = {
    if (files.size == 1) return files
    if (files.isEmpty) throw new IllegalStateException("Files can't be empty")
    if (!doublePreProces.get) return files
Peter van 't Hof's avatar
Peter van 't Hof committed
64
65
    val markDup = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam"))
    add(markDup, isIntermediate = useIndelRealigner)
66
    if (useIndelRealigner) {
Peter van 't Hof's avatar
Peter van 't Hof committed
67
      List(addIndelRealign(markDup.output, outputDir, isIntermediate = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
68
    } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
69
      List(markDup.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
70
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
71
72
  }

73
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
74
    scriptOutput.bamFiles = if (preProcesBams.get) {
Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
      var bamFiles: List[File] = Nil
      for (inputBam <- inputBams) {
77
78
79
80
81
82
83
84
        var bamFile = inputBam
        if (useIndelRealigner) {
          bamFile = addIndelRealign(bamFile, outputDir, isIntermediate = useBaseRecalibration)
        }
        if (useBaseRecalibration) {
          bamFile = addBaseRecalibrator(bamFile, outputDir, isIntermediate = bamFiles.size > 1)
        }
        bamFiles :+= bamFile
Peter van 't Hof's avatar
Peter van 't Hof committed
85
      }
86
      doublePreProces(bamFiles)
Peter van 't Hof's avatar
Peter van 't Hof committed
87
    } else if (inputBams.size > 1 && doublePreProces.get) {
88
      doublePreProces(inputBams)
Peter van 't Hof's avatar
Peter van 't Hof committed
89
    } else inputBams
Peter van 't Hof's avatar
Peter van 't Hof committed
90

Peter van 't Hof's avatar
Peter van 't Hof committed
91
    if (variantcalling) {
Peter van 't Hof's avatar
Peter van 't Hof committed
92
      var mergBuffer: SortedMap[String, File] = SortedMap()
93
      def mergeList = mergBuffer map { case (key, file) => TaggedFile(removeNoneVariants(file), "name=" + key) }
Peter van 't Hof's avatar
Peter van 't Hof committed
94

Peter van 't Hof's avatar
Peter van 't Hof committed
95
      if (sampleID != null && (useHaplotypecaller.get || config("joint_genotyping", default = false).getBoolean)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
98
99
100
101
102
        val hcGvcf = new HaplotypeCaller(this)
        hcGvcf.useGvcf
        hcGvcf.input_file = scriptOutput.bamFiles
        hcGvcf.out = outputDir + outputName + ".hc.discovery.gvcf.vcf.gz"
        add(hcGvcf)
        scriptOutput.gvcfFile = hcGvcf.out
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
103

Peter van 't Hof's avatar
Peter van 't Hof committed
104
      if (useHaplotypecaller.get) {
Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
107
108
109
110
111
112
113
114
115
116
117
        if (sampleID != null) {
          val genotypeGVCFs = GenotypeGVCFs(this, List(scriptOutput.gvcfFile), outputDir + outputName + ".hc.discovery.vcf.gz")
          add(genotypeGVCFs)
          scriptOutput.hcVcfFile = genotypeGVCFs.out
        } else {
          val hcGvcf = new HaplotypeCaller(this)
          hcGvcf.input_file = scriptOutput.bamFiles
          hcGvcf.out = outputDir + outputName + ".hc.discovery.vcf.gz"
          add(hcGvcf)
          scriptOutput.hcVcfFile = hcGvcf.out
        }
        mergBuffer += ("1.HC-Discovery" -> scriptOutput.hcVcfFile)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
118

Peter van 't Hof's avatar
Peter van 't Hof committed
119
      if (useUnifiedGenotyper.get) {
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
122
123
124
125
126
        val ugVcf = new UnifiedGenotyper(this)
        ugVcf.input_file = scriptOutput.bamFiles
        ugVcf.out = outputDir + outputName + ".ug.discovery.vcf.gz"
        add(ugVcf)
        scriptOutput.ugVcfFile = ugVcf.out
        mergBuffer += ("2.UG-Discovery" -> scriptOutput.ugVcfFile)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
127

Peter van 't Hof's avatar
Peter van 't Hof committed
128
      // Generate raw vcf
Peter van 't Hof's avatar
Peter van 't Hof committed
129
      if (sampleID != null && scriptOutput.bamFiles.size == 1) {
130
        val m2v = new MpileupToVcf(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
131
        m2v.inputBam = scriptOutput.bamFiles.head
132
133
134
135
136
        m2v.sample = sampleID
        m2v.output = outputDir + outputName + ".raw.vcf"
        add(m2v)
        scriptOutput.rawVcfFile = m2v.output

Peter van 't Hof's avatar
Peter van 't Hof committed
137
        val vcfFilter = new VcfFilter(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
138
139
140
141
        vcfFilter.defaults ++= Map("min_sample_depth" -> 8,
          "min_alternate_depth" -> 2,
          "min_samples_pass" -> 1,
          "filter_ref_calls" -> true)
Peter van 't Hof's avatar
Peter van 't Hof committed
142
143
144
        vcfFilter.inputVcf = m2v.output
        vcfFilter.outputVcf = this.swapExt(outputDir, m2v.output, ".vcf", ".filter.vcf.gz")
        add(vcfFilter)
145
        scriptOutput.rawFilterVcfFile = vcfFilter.outputVcf
Peter van 't Hof's avatar
Peter van 't Hof committed
146
147
148
      } else if (rawVcfInput != null) scriptOutput.rawFilterVcfFile = rawVcfInput
      if (scriptOutput.rawFilterVcfFile == null) throw new IllegalStateException("Files can't be empty")
      mergBuffer += ("9.raw" -> scriptOutput.rawFilterVcfFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
149

Peter van 't Hof's avatar
Peter van 't Hof committed
150
      if (useAllelesOption.get) {
Peter van 't Hof's avatar
Peter van 't Hof committed
151
        val mergeAlleles = MergeAlleles(this, mergeList.toList, outputDir + "raw.allele__temp_only.vcf.gz")
Peter van 't Hof's avatar
Peter van 't Hof committed
152
        add(mergeAlleles, isIntermediate = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
153

Peter van 't Hof's avatar
Peter van 't Hof committed
154
        if (useHaplotypecaller.get) {
Peter van 't Hof's avatar
Peter van 't Hof committed
155
156
157
          val hcAlleles = new HaplotypeCaller(this)
          hcAlleles.input_file = scriptOutput.bamFiles
          hcAlleles.out = outputDir + outputName + ".hc.allele.vcf.gz"
Peter van 't Hof's avatar
Peter van 't Hof committed
158
          hcAlleles.alleles = mergeAlleles.output
Peter van 't Hof's avatar
Peter van 't Hof committed
159
160
161
162
163
          hcAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
          add(hcAlleles)
          scriptOutput.hcAlleleVcf = hcAlleles.out
          mergBuffer += ("3.HC-alleles" -> hcAlleles.out)
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
164

Peter van 't Hof's avatar
Peter van 't Hof committed
165
        if (useUnifiedGenotyper.get) {
Peter van 't Hof's avatar
Peter van 't Hof committed
166
167
168
          val ugAlleles = new UnifiedGenotyper(this)
          ugAlleles.input_file = scriptOutput.bamFiles
          ugAlleles.out = outputDir + outputName + ".ug.allele.vcf.gz"
Peter van 't Hof's avatar
Peter van 't Hof committed
169
          ugAlleles.alleles = mergeAlleles.output
Peter van 't Hof's avatar
Peter van 't Hof committed
170
171
172
173
174
          ugAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
          add(ugAlleles)
          scriptOutput.ugAlleleVcf = ugAlleles.out
          mergBuffer += ("4.UG-alleles" -> ugAlleles.out)
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
175
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
176

Peter van 't Hof's avatar
Peter van 't Hof committed
177
      def removeNoneVariants(input: File): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
178
179
180
181
182
183
184
185
186
        val output = input.getAbsolutePath.stripSuffix(".vcf.gz") + ".variants_only.vcf.gz"
        val sv = SelectVariants(this, input, output)
        sv.excludeFiltered = true
        sv.excludeNonVariants = true
        add(sv, isIntermediate = true)
        sv.out
      }

      val cvFinal = CombineVariants(this, mergeList.toList, outputDir + outputName + ".final.vcf.gz")
187
      cvFinal.genotypemergeoption = org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils.GenotypeMergeType.UNSORTED
Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
      add(cvFinal)
      scriptOutput.finalVcfFile = cvFinal.out
190
191
    }
  }
bow's avatar
bow committed
192

Peter van 't Hof's avatar
Peter van 't Hof committed
193
  def addIndelRealign(inputBam: File, dir: String, isIntermediate: Boolean = true): File = {
194
195
196
197
    val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir)
    add(realignerTargetCreator, isIntermediate = true)

    val indelRealigner = IndelRealigner.apply(this, inputBam, realignerTargetCreator.out, dir)
Peter van 't Hof's avatar
Peter van 't Hof committed
198
    add(indelRealigner, isIntermediate = isIntermediate)
bow's avatar
bow committed
199

200
201
    return indelRealigner.o
  }
bow's avatar
bow committed
202

Peter van 't Hof's avatar
Peter van 't Hof committed
203
  def addBaseRecalibrator(inputBam: File, dir: String, isIntermediate: Boolean = false): File = {
204
    val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal"))
Peter van 't Hof's avatar
Peter van 't Hof committed
205

Peter van 't Hof's avatar
Peter van 't Hof committed
206
    if (baseRecalibrator.knownSites.isEmpty) {
Peter van 't Hof's avatar
Peter van 't Hof committed
207
      logger.warn("No Known site found, skipping base recalibration, file: " + inputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
208
209
      return inputBam
    }
210
    add(baseRecalibrator)
bow's avatar
bow committed
211

212
    val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after"))
213
    baseRecalibratorAfter.BQSR = baseRecalibrator.o
Peter van 't Hof's avatar
Peter van 't Hof committed
214
    add(baseRecalibratorAfter)
bow's avatar
bow committed
215

216
    add(AnalyzeCovariates(this, baseRecalibrator.o, baseRecalibratorAfter.o, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
bow's avatar
bow committed
217

218
219
    val printReads = PrintReads(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
    printReads.BQSR = baseRecalibrator.o
Peter van 't Hof's avatar
Peter van 't Hof committed
220
    add(printReads, isIntermediate = isIntermediate)
bow's avatar
bow committed
221

222
223
224
225
226
    return printReads.o
  }
}

object GatkVariantcalling extends PipelineCommand {
227
228
229
  class ScriptOutput {
    var bamFiles: List[File] = _
    var gvcfFile: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
230
231
    var hcVcfFile: File = _
    var ugVcfFile: File = _
232
    var rawVcfFile: File = _
233
    var rawFilterVcfFile: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
234
235
    var hcAlleleVcf: File = _
    var ugAlleleVcf: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
236
    var finalVcfFile: File = _
237
  }
238
}