Shiva.scala 13.7 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
package nl.lumc.sasc.biopet.pipelines.shiva

17
18
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.extensions.gatk._
22
import nl.lumc.sasc.biopet.extensions.tools.ValidateVcf
23
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.pipelines.kopisu.Kopisu
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.mapping.{ Mapping, MultisampleMappingTrait }
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.utils.config.Configurable
28
import org.broadinstitute.gatk.queue.QScript
29
import org.broadinstitute.gatk.queue.function.QFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
30

Peter van 't Hof's avatar
Peter van 't Hof committed
31
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
 * Created by pjvan_thof on 2/26/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
36
class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTrait with Reference with TargetRegions { qscript =>
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  def this() = this(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  override def reportClass: Option[ReportBuilderExtension] = {
41
42
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
43
    shiva.summaryDbFile = summaryDbFile
44
45
46
    Some(shiva)
  }

47
48
49
50
51
52
  override def defaults = Map(
    "haplotypecaller" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "genotypegvcfs" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "unifiedgenotyper" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0)
  )

53
54
  lazy val usePrintReads: Boolean = config("use_printreads", default = true)

Sander Bollen's avatar
Sander Bollen committed
55
  /** Method to make the variantcalling namespace of shiva */
56
57
58
  def makeVariantcalling(multisample: Boolean,
                         sample: Option[String] = None,
                         library: Option[String] = None): ShivaVariantcalling with QScript = {
Peter van 't Hof's avatar
Peter van 't Hof committed
59
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
60
      override def namePrefix = "multisample"
Sander Bollen's avatar
Sander Bollen committed
61
      override def configNamespace: String = "shivavariantcalling"
Peter van 't Hof's avatar
Peter van 't Hof committed
62
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
63
      genders = samples.map { case (sampleName, sample) => sampleName -> sample.gender }
Peter van 't Hof's avatar
Peter van 't Hof committed
64
65
    }
    else new ShivaVariantcalling(qscript) {
Sander Bollen's avatar
Sander Bollen committed
66
      override def configNamespace = "shivavariantcalling"
67
68
      sampleId = sample
      libId = library
69
      genders = sample.map(x => x -> samples(x).gender).toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
72
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
73
  /** Method to make a sample */
74
  override def makeSample(id: String) = new this.Sample(id)
75

Peter van 't Hof's avatar
Peter van 't Hof committed
76
  /** Class that will generate jobs for a sample */
77
  class Sample(sampleId: String) extends super.Sample(sampleId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
78
    /** Method to make a library */
79
    override def makeLibrary(id: String) = new this.Library(id)
Peter van 't Hof's avatar
Peter van 't Hof committed
80

81
    /** Sample specific settings */
Peter van 't Hof's avatar
Peter van 't Hof committed
82
    override def summarySettings: Map[String, Any] = super.summarySettings ++
83
      Map("single_sample_variantcalling" -> variantcalling.isDefined, "use_indel_realigner" -> useIndelRealigner)
84

Peter van 't Hof's avatar
Peter van 't Hof committed
85
    /** Class to generate jobs for a library */
86
    class Library(libId: String) extends super.Library(libId) {
87

Peter van 't Hof's avatar
Peter van 't Hof committed
88
      override def summaryFiles: Map[String, File] = super.summaryFiles ++
89
90
91
        variantcalling.map("final" -> _.finalFile) ++
        bqsrFile.map("baserecal" -> _) ++
        bqsrAfterFile.map("baserecal_after" -> _)
92

93
94
95
96
97
98
99
100
      lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
      lazy val useBaseRecalibration: Boolean = {
        val c: Boolean = config("use_base_recalibration", default = true)
        val br = new BaseRecalibrator(qscript)
        if (c && br.knownSites.isEmpty)
          logger.warn("No Known site found, skipping base recalibration, file: " + inputBam)
        c && br.knownSites.nonEmpty
      }
101
102
      lazy val useAnalyzeCovariates: Boolean = if (useBaseRecalibration) config("use_analyze_covariates", default = true) else false

Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
      lazy val bqsrFile: Option[File] = if (useBaseRecalibration) Some(createFile("baserecal")) else None
      lazy val bqsrAfterFile: Option[File] = if (useAnalyzeCovariates) Some(createFile("baserecal.after")) else None
105

Peter van 't Hof's avatar
Peter van 't Hof committed
106
      override def keepFinalBamfile: Boolean = super.keepFinalBamfile && !useIndelRealigner && !useBaseRecalibration
Peter van 't Hof's avatar
Peter van 't Hof committed
107

Peter van 't Hof's avatar
Peter van 't Hof committed
108
      override def bamFile: Option[Mapping#File] = mapping.map(_.mergedBamFile)
109

Peter van 't Hof's avatar
Peter van 't Hof committed
110
      override def preProcessBam: Option[Mapping#File] = if (useIndelRealigner && usePrintReads && useBaseRecalibration)
111
112
        bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
      else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
Peter van 't Hof's avatar
Peter van 't Hof committed
113
      else if (usePrintReads && useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
114
115
      else bamFile

116
      /** Library specific settings */
Peter van 't Hof's avatar
Peter van 't Hof committed
117
      override def summarySettings: Map[String, Any] = super.summarySettings ++ Map(
118
119
        "library_variantcalling" -> variantcalling.isDefined,
        "use_indel_realigner" -> useIndelRealigner,
120
121
122
        "use_base_recalibration" -> useBaseRecalibration,
        "useAnalyze_covariates" -> useAnalyzeCovariates
      )
123

Peter van 't Hof's avatar
Peter van 't Hof committed
124
      lazy val variantcalling: Option[ShivaVariantcalling with QScript] = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
125
        (bamFile.isDefined || preProcessBam.isDefined)) {
126
        Some(makeVariantcalling(multisample = false, sample = Some(sampleId), library = Some(libId)))
Peter van 't Hof's avatar
Peter van 't Hof committed
127
128
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
129
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
130
      override def addJobs(): Unit = {
131
        super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
132

133
134
        if (useIndelRealigner && useBaseRecalibration) {
          val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true)
135
          addBaseRecalibrator(file, libDir, libraries.size > 1, usePrintReads)
136
137
138
        } else if (useIndelRealigner) {
          addIndelRealign(bamFile.get, libDir, libraries.size > 1)
        } else if (useBaseRecalibration) {
139
          addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1, usePrintReads)
140
141
        }

Peter van 't Hof's avatar
Peter van 't Hof committed
142
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
143
144
          vc.sampleId = Some(sampleId)
          vc.libId = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
145
          vc.outputDir = new File(libDir, "variantcalling")
146
147
          if (preProcessBam.isDefined) vc.inputBams = Map(sampleId -> preProcessBam.get)
          else vc.inputBams = Map(sampleId -> bamFile.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
148
          add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
149
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
150
      }
151
152
153

      /** Adds base recalibration jobs */
      def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean, usePrintreads: Boolean): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
154
        require(bqsrFile.isDefined, "bqsrFile should contain something at this point")
Peter van 't Hof's avatar
Peter van 't Hof committed
155
        val baseRecalibrator = BaseRecalibrator(qscript, inputBam, bqsrFile.get) // at this point bqsrFile should exist
156
157
158
159
160

        if (baseRecalibrator.knownSites.isEmpty) return inputBam
        add(baseRecalibrator)

        if (useAnalyzeCovariates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
161
162
          val baseRecalibratorAfter = BaseRecalibrator(qscript, inputBam, bqsrAfterFile.get)
          baseRecalibratorAfter.BQSR = bqsrFile
163
164
165
166
          add(baseRecalibratorAfter)
          add(AnalyzeCovariates(qscript, baseRecalibrator.out, baseRecalibratorAfter.out, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
        }
        if (usePrintreads) {
Peter van 't Hof's avatar
Peter van 't Hof committed
167
168
169
          val printReads = PrintReads(qscript, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
          printReads.BQSR = Some(baseRecalibrator.out)
          printReads.isIntermediate = isIntermediate
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
          add(printReads)
          printReads.out
        } else inputBam
      }

    } // end of library

    lazy val bqsrFile: Option[File] = {
      val files = libraries.flatMap(_._2.bqsrFile).toList
      if (files.isEmpty) None else {
        val gather = new BqsrGather
        gather.inputBqsrFiles = files
        gather.outputBqsrFile = createFile("baserecal")
        add(gather)
        Some(gather.outputBqsrFile)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
186
187
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
188
    lazy val variantcalling: Option[ShivaVariantcalling with QScript] = if (config("single_sample_variantcalling", default = false).asBoolean) {
189
      Some(makeVariantcalling(multisample = false, sample = Some(sampleId)))
Peter van 't Hof's avatar
Peter van 't Hof committed
190
191
    } else None

192
193
194
195
    override def keepMergedFiles: Boolean = config("keep_merged_files", default = !useIndelRealigner)

    lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)

Peter van 't Hof's avatar
Peter van 't Hof committed
196
    override def preProcessBam: Option[File] = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
197
198
199
      bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam"))
    } else bamFile

Peter van 't Hof's avatar
Peter van 't Hof committed
200
    override def summaryFiles: Map[String, File] = super.summaryFiles ++ variantcalling.map("final" -> _.finalFile)
201

Peter van 't Hof's avatar
Peter van 't Hof committed
202
    /** This will add sample jobs */
203
204
    override def addJobs(): Unit = {
      super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
205

206
      if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
Peter van 't Hof's avatar
Peter van 't Hof committed
207
        addIndelRealign(bamFile.get, sampleDir, isIntermediate = false)
208
209
      }

210
      preProcessBam.foreach { bam =>
Peter van 't Hof's avatar
Peter van 't Hof committed
211
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
212
213
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
214
          vc.inputBams = Map(sampleId -> bam)
Peter van 't Hof's avatar
Peter van 't Hof committed
215
          add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
216
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
217
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
218
    }
219
  } // End of sample
Peter van 't Hof's avatar
Peter van 't Hof committed
220

Peter van 't Hof's avatar
Peter van 't Hof committed
221
  lazy val multisampleVariantCalling: Option[ShivaVariantcalling with QScript] = if (config("multisample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
222
223
224
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
225
  lazy val svCalling: Option[ShivaSvCalling] = if (config("sv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
226
    Some(new ShivaSvCalling(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
227
228
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
229
  lazy val cnvCalling: Option[Kopisu] = if (config("cnv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
230
    Some(new Kopisu(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
231
232
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
233
  lazy val annotation: Option[Toucan] = if (multisampleVariantCalling.isDefined &&
234
235
236
237
    config("annotation", default = false).asBoolean) {
    Some(new Toucan(this))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
238
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
239
  override def addMultiSampleJobs(): Unit = {
240
241
    super.addMultiSampleJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
242
    addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta(), new File(outputDir, ".validate"))).getOrElse(Nil))
243

244
    multisampleVariantCalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
245
      vc.outputDir = new File(outputDir, "variantcalling")
246
      vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
247
248
      if (!usePrintReads)
        vc.inputBqsrFiles = samples.flatMap { case (sampleId, sample) => sample.bqsrFile.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
249
      add(vc)
250
251
      if (!usePrintReads) {
        import variantcallers._
Peter van 't Hof's avatar
Peter van 't Hof committed
252
        if (vc.callers.exists {
Peter van 't Hof's avatar
Peter van 't Hof committed
253
254
          case _: HaplotypeCaller | _: HaplotypeCallerAllele | _: HaplotypeCallerGvcf => false
          case _: UnifiedGenotyper | _: UnifiedGenotyperAllele => false
255
          case _ => true
Peter van 't Hof's avatar
Peter van 't Hof committed
256
        }) logger.warn("Not all variantcallers chosen can read BQSR files, All non-GATK")
257
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
258

259
      annotation.foreach { toucan =>
Peter van 't Hof's avatar
Peter van 't Hof committed
260
        toucan.outputDir = new File(outputDir, "annotation")
261
        toucan.inputVcf = vc.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
262
        add(toucan)
Peter van 't Hof's avatar
Peter van 't Hof committed
263
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
264
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
265

Peter van 't Hof's avatar
Peter van 't Hof committed
266
    svCalling.foreach { sv =>
Peter van 't Hof's avatar
Peter van 't Hof committed
267
      sv.outputDir = new File(outputDir, "sv_calling")
268
      sv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
269
      add(sv)
Peter van 't Hof's avatar
Peter van 't Hof committed
270
271
272
273
274
275
276
    }

    cnvCalling.foreach { cnv =>
      cnv.outputDir = new File(outputDir, "cnv_calling")
      cnv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
      add(cnv)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
277
278
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
279
  /** Settings of pipeline for summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
280
  override def summarySettings: Map[String, Any] = super.summarySettings ++ Map(
281
282
283
    "annotation" -> annotation.isDefined,
    "multisample_variantcalling" -> multisampleVariantCalling.isDefined,
    "sv_calling" -> svCalling.isDefined,
284
    "cnv_calling" -> cnvCalling.isDefined,
285
    "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
286
287
    "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")),
    "use_print_reads" -> usePrintReads
288
  )
289
290
291
292
293
294
295
296
297
298
299
300
301
302

  /** Adds indel realignment jobs */
  def addIndelRealign(inputBam: File, dir: File, isIntermediate: Boolean): File = {
    val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir)
    realignerTargetCreator.isIntermediate = true
    add(realignerTargetCreator)

    val indelRealigner = IndelRealigner(this, inputBam, realignerTargetCreator.out, dir)
    indelRealigner.isIntermediate = isIntermediate
    add(indelRealigner)

    indelRealigner.out
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
303
}
Peter van 't Hof's avatar
Peter van 't Hof committed
304

Peter van 't Hof's avatar
Peter van 't Hof committed
305
/** This object give a default main method to the pipelines */
306
307
308
309
310
object Shiva extends PipelineCommand {

  // This is used to only execute 1 validation per vcf file
  private var validateVcfSeen: Set[(File, File)] = Set()

Peter van 't Hof's avatar
Peter van 't Hof committed
311
  def makeValidateVcfJobs(root: Configurable, vcfFile: File, referenceFile: File, outputDir: File): List[QFunction] = {
312
313
314
315
316
317
    if (validateVcfSeen.contains((vcfFile, referenceFile))) Nil
    else {
      validateVcfSeen ++= Set((vcfFile, referenceFile))
      val validateVcf = new ValidateVcf(root)
      validateVcf.inputVcf = vcfFile
      validateVcf.reference = referenceFile
Peter van 't Hof's avatar
Peter van 't Hof committed
318
      validateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".validateVcf.out")
319
320
321

      val checkValidateVcf = new CheckValidateVcf
      checkValidateVcf.inputLogFile = validateVcf.jobOutputFile
Peter van 't Hof's avatar
Peter van 't Hof committed
322
      checkValidateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".checkValidateVcf.out")
323
324
325
326
327

      List(validateVcf, checkValidateVcf)
    }
  }
}