Shiva.scala 13.1 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
package nl.lumc.sasc.biopet.pipelines.shiva

17
18
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.extensions.gatk._
22
import nl.lumc.sasc.biopet.extensions.tools.ValidateVcf
23
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.pipelines.kopisu.Kopisu
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.utils.config.Configurable
28
import org.broadinstitute.gatk.queue.QScript
29
import org.broadinstitute.gatk.queue.function.QFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
30

Peter van 't Hof's avatar
Peter van 't Hof committed
31
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
 * Created by pjvan_thof on 2/26/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
36
class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTrait with Reference with TargetRegions { qscript =>
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  def this() = this(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  override def reportClass: Option[ReportBuilderExtension] = {
41
42
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
43
    shiva.summaryDbFile = summaryDbFile
44
45
46
    Some(shiva)
  }

47
48
49
50
51
52
  override def defaults = Map(
    "haplotypecaller" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "genotypegvcfs" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "unifiedgenotyper" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0)
  )

53
54
  lazy val usePrintReads: Boolean = config("use_printreads", default = true)

Sander Bollen's avatar
Sander Bollen committed
55
  /** Method to make the variantcalling namespace of shiva */
56
57
58
  def makeVariantcalling(multisample: Boolean,
                         sample: Option[String] = None,
                         library: Option[String] = None): ShivaVariantcalling with QScript = {
Peter van 't Hof's avatar
Peter van 't Hof committed
59
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
60
      override def namePrefix = "multisample"
Sander Bollen's avatar
Sander Bollen committed
61
      override def configNamespace: String = "shivavariantcalling"
Peter van 't Hof's avatar
Peter van 't Hof committed
62
63
64
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
Sander Bollen's avatar
Sander Bollen committed
65
      override def configNamespace = "shivavariantcalling"
66
67
      sampleId = sample
      libId = library
Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
70
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
71
  /** Method to make a sample */
72
  override def makeSample(id: String) = new this.Sample(id)
73

Peter van 't Hof's avatar
Peter van 't Hof committed
74
  /** Class that will generate jobs for a sample */
75
  class Sample(sampleId: String) extends super.Sample(sampleId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
76
    /** Method to make a library */
77
    override def makeLibrary(id: String) = new this.Library(id)
Peter van 't Hof's avatar
Peter van 't Hof committed
78

79
    /** Sample specific settings */
80
81
    override def summarySettings = super.summarySettings ++
      Map("single_sample_variantcalling" -> variantcalling.isDefined, "use_indel_realigner" -> useIndelRealigner)
82

Peter van 't Hof's avatar
Peter van 't Hof committed
83
    /** Class to generate jobs for a library */
84
    class Library(libId: String) extends super.Library(libId) {
85

86
87
88
89
      override def summaryFiles = super.summaryFiles ++
        variantcalling.map("final" -> _.finalFile) ++
        bqsrFile.map("baserecal" -> _) ++
        bqsrAfterFile.map("baserecal_after" -> _)
90

91
92
93
94
95
96
97
98
      lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
      lazy val useBaseRecalibration: Boolean = {
        val c: Boolean = config("use_base_recalibration", default = true)
        val br = new BaseRecalibrator(qscript)
        if (c && br.knownSites.isEmpty)
          logger.warn("No Known site found, skipping base recalibration, file: " + inputBam)
        c && br.knownSites.nonEmpty
      }
99
100
      lazy val useAnalyzeCovariates: Boolean = if (useBaseRecalibration) config("use_analyze_covariates", default = true) else false

Peter van 't Hof's avatar
Peter van 't Hof committed
101
102
      lazy val bqsrFile: Option[File] = if (useBaseRecalibration) Some(createFile("baserecal")) else None
      lazy val bqsrAfterFile: Option[File] = if (useAnalyzeCovariates) Some(createFile("baserecal.after")) else None
103

Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
      override def keepFinalBamfile = super.keepFinalBamfile && !useIndelRealigner && !useBaseRecalibration

106
107
108
      override def bamFile = mapping.map(_.mergedBamFile)

      override def preProcessBam = if (useIndelRealigner && usePrintReads)
109
110
        bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
      else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
111
      else if (usePrintReads) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
112
113
      else bamFile

114
      /** Library specific settings */
115
      override def summarySettings = super.summarySettings ++ Map(
116
117
        "library_variantcalling" -> variantcalling.isDefined,
        "use_indel_realigner" -> useIndelRealigner,
118
119
120
        "use_base_recalibration" -> useBaseRecalibration,
        "useAnalyze_covariates" -> useAnalyzeCovariates
      )
121

122
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
123
        (bamFile.isDefined || preProcessBam.isDefined)) {
124
        Some(makeVariantcalling(multisample = false, sample = Some(sampleId), library = Some(libId)))
Peter van 't Hof's avatar
Peter van 't Hof committed
125
126
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
127
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
128
      override def addJobs() = {
129
        super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
130

131
132
        if (useIndelRealigner && useBaseRecalibration) {
          val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true)
133
          addBaseRecalibrator(file, libDir, libraries.size > 1, usePrintReads)
134
135
136
        } else if (useIndelRealigner) {
          addIndelRealign(bamFile.get, libDir, libraries.size > 1)
        } else if (useBaseRecalibration) {
137
          addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1, usePrintReads)
138
139
        }

Peter van 't Hof's avatar
Peter van 't Hof committed
140
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
141
142
          vc.sampleId = Some(sampleId)
          vc.libId = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
143
          vc.outputDir = new File(libDir, "variantcalling")
144
145
          if (preProcessBam.isDefined) vc.inputBams = Map(sampleId -> preProcessBam.get)
          else vc.inputBams = Map(sampleId -> bamFile.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
146
          add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
147
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
148
      }
149
150
151

      /** Adds base recalibration jobs */
      def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean, usePrintreads: Boolean): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
152
        require(bqsrFile.isDefined, "bqsrFile should contain something at this point")
Peter van 't Hof's avatar
Peter van 't Hof committed
153
        val baseRecalibrator = BaseRecalibrator(qscript, inputBam, bqsrFile.get) // at this point bqsrFile should exist
154
155
156
157
158

        if (baseRecalibrator.knownSites.isEmpty) return inputBam
        add(baseRecalibrator)

        if (useAnalyzeCovariates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
159
160
          val baseRecalibratorAfter = BaseRecalibrator(qscript, inputBam, bqsrAfterFile.get)
          baseRecalibratorAfter.BQSR = bqsrFile
161
162
163
164
          add(baseRecalibratorAfter)
          add(AnalyzeCovariates(qscript, baseRecalibrator.out, baseRecalibratorAfter.out, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
        }
        if (usePrintreads) {
Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
167
          val printReads = PrintReads(qscript, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
          printReads.BQSR = Some(baseRecalibrator.out)
          printReads.isIntermediate = isIntermediate
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
          add(printReads)
          printReads.out
        } else inputBam
      }

    } // end of library

    lazy val bqsrFile: Option[File] = {
      val files = libraries.flatMap(_._2.bqsrFile).toList
      if (files.isEmpty) None else {
        val gather = new BqsrGather
        gather.inputBqsrFiles = files
        gather.outputBqsrFile = createFile("baserecal")
        add(gather)
        Some(gather.outputBqsrFile)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
184
185
    }

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
186
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
187
      Some(makeVariantcalling(multisample = false, sample = Some(sampleId)))
Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
    } else None

190
191
192
193
194
195
196
197
    override def keepMergedFiles: Boolean = config("keep_merged_files", default = !useIndelRealigner)

    lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)

    override def preProcessBam = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
      bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam"))
    } else bamFile

198
199
    override def summaryFiles = super.summaryFiles ++ variantcalling.map("final" -> _.finalFile)

Peter van 't Hof's avatar
Peter van 't Hof committed
200
    /** This will add sample jobs */
201
202
    override def addJobs(): Unit = {
      super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
203

204
205
206
207
      if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
        addIndelRealign(bamFile.get, sampleDir, false)
      }

208
      preProcessBam.foreach { bam =>
Peter van 't Hof's avatar
Peter van 't Hof committed
209
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
210
211
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
212
          vc.inputBams = Map(sampleId -> bam)
Peter van 't Hof's avatar
Peter van 't Hof committed
213
          add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
214
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
215
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
216
    }
217
  } // End of sample
Peter van 't Hof's avatar
Peter van 't Hof committed
218

219
  lazy val multisampleVariantCalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
220
221
222
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
223
  lazy val svCalling = if (config("sv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
224
    Some(new ShivaSvCalling(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
225
226
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
227
  lazy val cnvCalling = if (config("cnv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
228
    Some(new Kopisu(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
229
230
  } else None

231
232
233
234
235
  lazy val annotation = if (multisampleVariantCalling.isDefined &&
    config("annotation", default = false).asBoolean) {
    Some(new Toucan(this))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
236
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
237
  override def addMultiSampleJobs() = {
238
239
    super.addMultiSampleJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
240
    addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta(), new File(outputDir, ".validate"))).getOrElse(Nil))
241

242
    multisampleVariantCalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
243
      vc.outputDir = new File(outputDir, "variantcalling")
244
      vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
245
246
      if (!usePrintReads)
        vc.inputBqsrFiles = samples.flatMap { case (sampleId, sample) => sample.bqsrFile.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
247
      add(vc)
248
249
250
      if (!usePrintReads) {
        import variantcallers._
        if (vc.callers.exists(_ match {
Peter van 't Hof's avatar
Peter van 't Hof committed
251
252
          case _: HaplotypeCaller | _: HaplotypeCallerAllele | _: HaplotypeCallerGvcf => false
          case _: UnifiedGenotyper | _: UnifiedGenotyperAllele => false
253
254
255
          case _ => true
        })) logger.warn("Not all variantcallers chosen can read BQSR files, All non-GATK")
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
256

257
      annotation.foreach { toucan =>
Peter van 't Hof's avatar
Peter van 't Hof committed
258
        toucan.outputDir = new File(outputDir, "annotation")
259
        toucan.inputVcf = vc.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
260
        add(toucan)
Peter van 't Hof's avatar
Peter van 't Hof committed
261
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
262
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
263

Peter van 't Hof's avatar
Peter van 't Hof committed
264
    svCalling.foreach { sv =>
Peter van 't Hof's avatar
Peter van 't Hof committed
265
      sv.outputDir = new File(outputDir, "sv_calling")
266
      sv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
267
      add(sv)
Peter van 't Hof's avatar
Peter van 't Hof committed
268
269
270
271
272
273
274
    }

    cnvCalling.foreach { cnv =>
      cnv.outputDir = new File(outputDir, "cnv_calling")
      cnv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
      add(cnv)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
275
276
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
277
  /** Settings of pipeline for summary */
278
  override def summarySettings = super.summarySettings ++ Map(
279
280
281
    "annotation" -> annotation.isDefined,
    "multisample_variantcalling" -> multisampleVariantCalling.isDefined,
    "sv_calling" -> svCalling.isDefined,
282
    "cnv_calling" -> cnvCalling.isDefined,
283
    "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
284
285
    "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")),
    "use_print_reads" -> usePrintReads
286
  )
287
288
289
290
291
292
293
294
295
296
297
298
299
300

  /** Adds indel realignment jobs */
  def addIndelRealign(inputBam: File, dir: File, isIntermediate: Boolean): File = {
    val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir)
    realignerTargetCreator.isIntermediate = true
    add(realignerTargetCreator)

    val indelRealigner = IndelRealigner(this, inputBam, realignerTargetCreator.out, dir)
    indelRealigner.isIntermediate = isIntermediate
    add(indelRealigner)

    indelRealigner.out
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
301
}
Peter van 't Hof's avatar
Peter van 't Hof committed
302

Peter van 't Hof's avatar
Peter van 't Hof committed
303
/** This object give a default main method to the pipelines */
304
305
306
307
308
object Shiva extends PipelineCommand {

  // This is used to only execute 1 validation per vcf file
  private var validateVcfSeen: Set[(File, File)] = Set()

Peter van 't Hof's avatar
Peter van 't Hof committed
309
  def makeValidateVcfJobs(root: Configurable, vcfFile: File, referenceFile: File, outputDir: File): List[QFunction] = {
310
311
312
313
314
315
    if (validateVcfSeen.contains((vcfFile, referenceFile))) Nil
    else {
      validateVcfSeen ++= Set((vcfFile, referenceFile))
      val validateVcf = new ValidateVcf(root)
      validateVcf.inputVcf = vcfFile
      validateVcf.reference = referenceFile
Peter van 't Hof's avatar
Peter van 't Hof committed
316
      validateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".validateVcf.out")
317
318
319

      val checkValidateVcf = new CheckValidateVcf
      checkValidateVcf.inputLogFile = validateVcf.jobOutputFile
Peter van 't Hof's avatar
Peter van 't Hof committed
320
      checkValidateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".checkValidateVcf.out")
321
322
323
324
325

      List(validateVcf, checkValidateVcf)
    }
  }
}