GatkPipeline.scala 10.3 KB
Newer Older
1
2
package nl.lumc.sasc.biopet.pipelines.gatk

Peter van 't Hof's avatar
Peter van 't Hof committed
3
4
5
import nl.lumc.sasc.biopet.core.MultiSampleQScript
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable
6
import htsjdk.samtools.SamReaderFactory
7
import scala.collection.JavaConversions._
8
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
9
import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, CombineGVCFs }
10
import nl.lumc.sasc.biopet.extensions.picard.AddOrReplaceReadGroups
Peter van 't Hof's avatar
Peter van 't Hof committed
11
import nl.lumc.sasc.biopet.extensions.picard.SamToFastq
12
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
14
import org.broadinstitute.gatk.queue.QScript
bow's avatar
bow committed
15
import org.broadinstitute.gatk.utils.commandline.{ Argument }
16

bow's avatar
bow committed
17
class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScript {
18
  def this() = this(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
19

bow's avatar
bow committed
20
  @Argument(doc = "Only Sample", shortName = "sample", required = false)
21
  val onlySample: String = ""
bow's avatar
bow committed
22
23

  @Argument(doc = "Skip Genotyping step", shortName = "skipgenotyping", required = false)
24
  var skipGenotyping: Boolean = false
bow's avatar
bow committed
25
26

  @Argument(doc = "Merge gvcfs", shortName = "mergegvcfs", required = false)
27
  var mergeGvcfs: Boolean = false
bow's avatar
bow committed
28

Peter van 't Hof's avatar
Peter van 't Hof committed
29
  @Argument(doc = "Joint variantcalling", shortName = "jointVariantCalling", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
30
  var jointVariantcalling: Boolean = config("joint_variantcalling", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
31

Peter van 't Hof's avatar
Peter van 't Hof committed
32
  @Argument(doc = "Joint genotyping", shortName = "jointGenotyping", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
33
  var jointGenotyping: Boolean = config("joint_genotyping", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
34

Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
37
  var singleSampleCalling = config("single_sample_calling", default = true)
  var reference: File = config("reference", required = true)
  var dbsnp: File = config("dbsnp")
38
39
  var gvcfFiles: List[File] = Nil
  var finalBamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
40
  var useAllelesOption: Boolean = config("use_alleles_option", default = false)
bow's avatar
bow committed
41

Peter van 't Hof's avatar
Peter van 't Hof committed
42
43
  class LibraryOutput extends AbstractLibraryOutput {
    var mappedBamFile: File = _
44
    var variantcalling: GatkVariantcalling.ScriptOutput = _
Peter van 't Hof's avatar
Peter van 't Hof committed
45
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
46

47
  class SampleOutput extends AbstractSampleOutput {
48
    var variantcalling: GatkVariantcalling.ScriptOutput = _
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  def init() {
52
    if (config.contains("target_bed")) {
Peter van 't Hof's avatar
Peter van 't Hof committed
53
      defaults ++= Map("gatk" -> Map(("intervals" -> config("target_bed").getStringList)))
54
    }
55
    if (config.contains("gvcfFiles"))
bow's avatar
bow committed
56
      for (file <- config("gvcfFiles").getList)
Peter van 't Hof's avatar
Peter van 't Hof committed
57
        gvcfFiles :+= file.toString
58
59
60
    if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
    else if (!outputDir.endsWith("/")) outputDir += "/"
  }
bow's avatar
bow committed
61

62
63
64
65
66
  val multisampleVariantcalling = new GatkVariantcalling(this) {
    override protected lazy val configName = "gatkvariantcalling"
    override def configPath: List[String] = "multisample" :: super.configPath
  }

67
68
69
  def biopetScript() {
    if (onlySample.isEmpty) {
      runSamplesJobs
Peter van 't Hof's avatar
Peter van 't Hof committed
70

71
72
      //SampleWide jobs
      if (mergeGvcfs && gvcfFiles.size > 0) {
73
        val newFile = outputDir + "merged.gvcf.vcf.gz"
74
        add(CombineGVCFs(this, gvcfFiles, newFile))
75
76
        gvcfFiles = List(newFile)
      }
bow's avatar
bow committed
77

78
      if (!skipGenotyping && gvcfFiles.size > 0) {
79
80
81
82
83
84
85
86
        if (jointGenotyping) {
          val gatkGenotyping = new GatkGenotyping(this)
          gatkGenotyping.inputGvcfs = gvcfFiles
          gatkGenotyping.outputDir = outputDir + "genotyping/"
          gatkGenotyping.init
          gatkGenotyping.biopetScript
          addAll(gatkGenotyping.functions)
          var vcfFile = gatkGenotyping.outputFile
87
88
        }
      } else logger.warn("No gVCFs to genotype")
Peter van 't Hof's avatar
Peter van 't Hof committed
89

90
      if (jointVariantcalling) {
Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
93
94
95
96
        val allBamfiles = for (
          (sampleID, sampleOutput) <- samplesOutput;
          file <- sampleOutput.variantcalling.bamFiles
        ) yield file
        val allRawVcfFiles = for ((sampleID, sampleOutput) <- samplesOutput) yield sampleOutput.variantcalling.rawFilterVcfFile

Peter van 't Hof's avatar
Peter van 't Hof committed
97
98
        val cvRaw = CombineVariants(this, allRawVcfFiles.toList, outputDir + "variantcalling/multisample.raw.vcf.gz")
        add(cvRaw)
Peter van 't Hof's avatar
Peter van 't Hof committed
99

Peter van 't Hof's avatar
Peter van 't Hof committed
100
101
        multisampleVariantcalling.preProcesBams = false
        multisampleVariantcalling.doublePreProces = false
102
103
104
105
106
107
108
        multisampleVariantcalling.inputBams = allBamfiles.toList
        multisampleVariantcalling.rawVcfInput = cvRaw.out
        multisampleVariantcalling.outputDir = outputDir + "variantcalling"
        multisampleVariantcalling.outputName = "multisample"
        multisampleVariantcalling.init
        multisampleVariantcalling.biopetScript
        addAll(multisampleVariantcalling.functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
109

110
111
        if (config("inputtype", default = "dna").getString != "rna" && config("recalibration", default = false).getBoolean) {
          val recalibration = new GatkVariantRecalibration(this)
112
          recalibration.inputVcf = multisampleVariantcalling.scriptOutput.finalVcfFile
113
114
115
116
117
          recalibration.bamFiles = finalBamFiles
          recalibration.outputDir = outputDir + "recalibration/"
          recalibration.init
          recalibration.biopetScript
        }
118
      }
bow's avatar
bow committed
119
    } else runSingleSampleJobs(onlySample)
120
  }
bow's avatar
bow committed
121

122
  // Called for each sample
123
124
  def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
    val sampleOutput = new SampleOutput
Peter van 't Hof's avatar
Peter van 't Hof committed
125
    var libraryBamfiles: List[File] = List()
126
    val sampleID: String = sampleConfig("ID").toString
127
    sampleOutput.libraries = runLibraryJobs(sampleConfig)
128
    val sampleDir = globalSampleDir + sampleID
129
    for ((libraryID, libraryOutput) <- sampleOutput.libraries) {
130
      libraryBamfiles ++= libraryOutput.variantcalling.bamFiles
131
    }
bow's avatar
bow committed
132

Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
    if (libraryBamfiles.size > 0) {
      finalBamFiles ++= libraryBamfiles
135
      val gatkVariantcalling = new GatkVariantcalling(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
136
      gatkVariantcalling.inputBams = libraryBamfiles
137
      gatkVariantcalling.outputDir = sampleDir + "/variantcalling/"
Peter van 't Hof's avatar
Peter van 't Hof committed
138
      gatkVariantcalling.preProcesBams = false
139
      if (!singleSampleCalling) {
Peter van 't Hof's avatar
Peter van 't Hof committed
140
141
        gatkVariantcalling.useHaplotypecaller = false
        gatkVariantcalling.useUnifiedGenotyper = false
142
      }
143
      gatkVariantcalling.sampleID = sampleID
144
145
146
      gatkVariantcalling.init
      gatkVariantcalling.biopetScript
      addAll(gatkVariantcalling.functions)
147
148
      sampleOutput.variantcalling = gatkVariantcalling.scriptOutput
      gvcfFiles :+= gatkVariantcalling.scriptOutput.gvcfFile
149
    } else logger.warn("No bamfiles for variant calling for sample: " + sampleID)
Peter van 't Hof's avatar
Peter van 't Hof committed
150
    return sampleOutput
151
  }
bow's avatar
bow committed
152

153
  // Called for each run from a sample
Peter van 't Hof's avatar
Peter van 't Hof committed
154
155
  def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
    val libraryOutput = new LibraryOutput
156
157
    val runID: String = runConfig("ID").toString
    val sampleID: String = sampleConfig("ID").toString
158
    val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/"
159
160
    var inputType = ""
    if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
bow's avatar
bow committed
161
    else inputType = config("inputtype", default = "dna").toString
162
    if (runConfig.contains("R1")) {
Peter van 't Hof's avatar
Peter van 't Hof committed
163
      val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir)
164
      addAll(mapping.functions) // Add functions of mapping to curent function pool
Peter van 't Hof's avatar
Peter van 't Hof committed
165
      libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
166
167
168
    } else if (runConfig.contains("bam")) {
      var bamFile = new File(runConfig("bam").toString)
      if (!bamFile.exists) throw new IllegalStateException("Bam in config does not exist, file: " + bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
169

Peter van 't Hof's avatar
Peter van 't Hof committed
170
      if (config("bam_to_fastq", default = false).getBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
        val samToFastq = SamToFastq(this, bamFile, runDir + sampleID + "-" + runID + ".R1.fastq",
          runDir + sampleID + "-" + runID + ".R2.fastq")
sajvanderzeeuw's avatar
sajvanderzeeuw committed
173
        add(samToFastq, isIntermediate = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
174
175
176
177
178
179
180
181
182
        val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir, startJobs = false)
        mapping.input_R1 = samToFastq.fastqR1
        mapping.input_R2 = samToFastq.fastqR2
        mapping.init
        mapping.biopetScript
        addAll(mapping.functions) // Add functions of mapping to curent function pool
        libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
      } else {
        var readGroupOke = true
183
        val inputSam = SamReaderFactory.makeDefault.open(bamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
        val header = inputSam.getFileHeader.getReadGroups
        for (readGroup <- inputSam.getFileHeader.getReadGroups) {
          if (readGroup.getSample != sampleID) logger.warn("Sample ID readgroup in bam file is not the same")
          if (readGroup.getLibrary != runID) logger.warn("Library ID readgroup in bam file is not the same")
          if (readGroup.getSample != sampleID || readGroup.getLibrary != runID) readGroupOke = false
        }
        inputSam.close

        if (!readGroupOke) {
          if (config("correct_readgroups", default = false)) {
            logger.info("Correcting readgroups, file:" + bamFile)
            val aorrg = AddOrReplaceReadGroups(this, bamFile, new File(runDir + sampleID + "-" + runID + ".bam"))
            aorrg.RGID = sampleID + "-" + runID
            aorrg.RGLB = runID
            aorrg.RGSM = sampleID
            if (runConfig.contains("PL")) aorrg.RGPL = runConfig("PL").toString
            else aorrg.RGPL = "illumina"
            if (runConfig.contains("PU")) aorrg.RGPU = runConfig("PU").toString
            else aorrg.RGPU = "na"
            if (runConfig.contains("CN")) aorrg.RGCN = runConfig("CN").toString
            add(aorrg, isIntermediate = true)
            bamFile = aorrg.output
Peter van 't Hof's avatar
Peter van 't Hof committed
206
207
          } else throw new IllegalStateException("Readgroup sample and/or library of input bamfile is not correct, file: " + bamFile +
            "\nPossible to set 'correct_readgroups' to true on config to automatic fix this")
Peter van 't Hof's avatar
Peter van 't Hof committed
208
209
210
211
        }
        addAll(BamMetrics(this, bamFile, runDir + "metrics/").functions)

        libraryOutput.mappedBamFile = bamFile
212
213
      }
    } else logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
Peter van 't Hof's avatar
Peter van 't Hof committed
214

Peter van 't Hof's avatar
Peter van 't Hof committed
215
    val gatkVariantcalling = new GatkVariantcalling(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
216
    gatkVariantcalling.inputBams = List(libraryOutput.mappedBamFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
217
218
219
    gatkVariantcalling.outputDir = runDir
    gatkVariantcalling.variantcalling = config("library_variantcalling", default = false)
    gatkVariantcalling.preProcesBams = true
220
    gatkVariantcalling.sampleID = sampleID
Peter van 't Hof's avatar
Peter van 't Hof committed
221
222
223
    gatkVariantcalling.init
    gatkVariantcalling.biopetScript
    addAll(gatkVariantcalling.functions)
224
    libraryOutput.variantcalling = gatkVariantcalling.scriptOutput
Peter van 't Hof's avatar
Peter van 't Hof committed
225

Peter van 't Hof's avatar
Peter van 't Hof committed
226
    return libraryOutput
227
228
229
  }
}

230
object GatkPipeline extends PipelineCommand