GatkGenotyping.scala 3.32 KB
Newer Older
1
2
3
4
5
package nl.lumc.sasc.biopet.pipelines.gatk

import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.core.config._
import nl.lumc.sasc.biopet.function._
Peter van 't Hof's avatar
Peter van 't Hof committed
6
7
8
9
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.extensions.gatk.{CommandLineGATK, GenotypeGVCFs, SelectVariants}
import org.broadinstitute.gatk.queue.function._
import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument}
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35

class GatkGenotyping(val root:Configurable) extends QScript with BiopetQScript {
  def this() = this(null)
  
  @Input(doc="Gvcf files", shortName="I")
  var inputGvcfs: List[File] = Nil
  
  @Argument(doc="Reference", shortName="R", required=false)
  var reference: File = _
  
  @Argument(doc="Dbsnp", shortName="dbsnp", required=false)
  var dbsnp: File = _
  
  @Argument(doc="OutputName", required=false)
  var outputName: String = "genotype"
  
  @Output(doc="OutputFile", shortName="O", required=false)
  var outputFile: File = _
  
  @Argument(doc="Samples", shortName="sample", required=false)
  var samples: List[String] = Nil
  
  def init() {
    if (reference == null) reference = config("reference")
    if (dbsnp == null && configContains("dbsnp")) dbsnp = config("dbsnp")
    if (outputFile == null) outputFile = outputDir + outputName + ".vcf"
Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
  }
  
  def biopetScript() {
    addGenotypeGVCFs(inputGvcfs, outputFile)
    if (!samples.isEmpty) {
      if (samples.size > 1) addSelectVariants(outputFile, samples, outputDir + "samples/", "all")
      for (sample <- samples) addSelectVariants(outputFile, List(sample), outputDir + "samples/", sample)
    }
  }
  
  trait gatkArguments extends CommandLineGATK {
    this.reference_sequence = reference
    this.memoryLimit = 2
    this.jobResourceRequests :+= "h_vmem=4G"
  }
  
  def addGenotypeGVCFs(gvcfFiles: List[File], outputFile:File): File = {
    val genotypeGVCFs = new GenotypeGVCFs() with gatkArguments {
      this.variant = gvcfFiles
      if (configContains("dbsnp")) this.dbsnp = config("dbsnp")
      if (configContains("scattercount", "genotypegvcfs")) this.scatterCount = config("scattercount", 1, "genotypegvcfs")
      this.out = outputFile
60
61
62
63
64
65
66
      if (config("inputtype", "dna") == "rna") {
        this.stand_call_conf = config("stand_call_conf", 20, "haplotypecaller")
        this.stand_emit_conf = config("stand_emit_conf", 20, "haplotypecaller")
      } else {
        this.stand_call_conf = config("stand_call_conf", 30, "haplotypecaller")
        this.stand_emit_conf = config("stand_emit_conf", 30, "haplotypecaller")
      }
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
    }
    add(genotypeGVCFs)
    return genotypeGVCFs.out
  }
  
  def addSelectVariants(inputFile:File, samples:List[String], outputDir:String, name:String) {
    val selectVariants = new SelectVariants with gatkArguments {
      this.variant = inputFile
      for (sample <- samples) this.sample_name :+= sample
      this.excludeNonVariants = true
      if (configContains("scattercount", "selectvariants")) this.scatterCount = config("scattercount", 1, "selectvariants")
      this.out = outputDir + name + ".vcf"
    }
    add(selectVariants)
  }
}

object GatkGenotyping extends PipelineCommand {
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.class"
}