Shiva.scala 12.7 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
package nl.lumc.sasc.biopet.pipelines.shiva

17
18
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.extensions.gatk._
22
import nl.lumc.sasc.biopet.extensions.tools.ValidateVcf
23
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.pipelines.kopisu.Kopisu
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.utils.config.Configurable
28
import org.broadinstitute.gatk.queue.QScript
29
import org.broadinstitute.gatk.queue.function.QFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
30

Peter van 't Hof's avatar
Peter van 't Hof committed
31
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
 * Created by pjvan_thof on 2/26/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
36
class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTrait with Reference with TargetRegions { qscript =>
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  def this() = this(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  override def reportClass: Option[ReportBuilderExtension] = {
41
42
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
43
    shiva.summaryDbFile = summaryDbFile
44
45
46
    Some(shiva)
  }

47
48
49
50
51
52
  override def defaults = Map(
    "haplotypecaller" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "genotypegvcfs" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "unifiedgenotyper" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0)
  )

Sander Bollen's avatar
Sander Bollen committed
53
  /** Method to make the variantcalling namespace of shiva */
54
55
56
  def makeVariantcalling(multisample: Boolean,
                         sample: Option[String] = None,
                         library: Option[String] = None): ShivaVariantcalling with QScript = {
Peter van 't Hof's avatar
Peter van 't Hof committed
57
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
      override def namePrefix = "multisample"
Sander Bollen's avatar
Sander Bollen committed
59
      override def configNamespace: String = "shivavariantcalling"
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
62
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
Sander Bollen's avatar
Sander Bollen committed
63
      override def configNamespace = "shivavariantcalling"
64
65
      sampleId = sample
      libId = library
Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
68
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
69
  /** Method to make a sample */
70
  override def makeSample(id: String) = new this.Sample(id)
71

Peter van 't Hof's avatar
Peter van 't Hof committed
72
  /** Class that will generate jobs for a sample */
73
  class Sample(sampleId: String) extends super.Sample(sampleId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
74
    /** Method to make a library */
75
    override def makeLibrary(id: String) = new this.Library(id)
Peter van 't Hof's avatar
Peter van 't Hof committed
76

77
    /** Sample specific settings */
78
79
    override def summarySettings = super.summarySettings ++
      Map("single_sample_variantcalling" -> variantcalling.isDefined, "use_indel_realigner" -> useIndelRealigner)
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
    /** Class to generate jobs for a library */
82
    class Library(libId: String) extends super.Library(libId) {
83

84
85
86
87
      override def summaryFiles = super.summaryFiles ++
        variantcalling.map("final" -> _.finalFile) ++
        bqsrFile.map("baserecal" -> _) ++
        bqsrAfterFile.map("baserecal_after" -> _)
88

89
90
91
92
93
94
95
96
      lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
      lazy val useBaseRecalibration: Boolean = {
        val c: Boolean = config("use_base_recalibration", default = true)
        val br = new BaseRecalibrator(qscript)
        if (c && br.knownSites.isEmpty)
          logger.warn("No Known site found, skipping base recalibration, file: " + inputBam)
        c && br.knownSites.nonEmpty
      }
97
98
99
      lazy val usePrintReads: Boolean = if (useBaseRecalibration) config("use_printreads", default = true) else false
      lazy val useAnalyzeCovariates: Boolean = if (useBaseRecalibration) config("use_analyze_covariates", default = true) else false

Peter van 't Hof's avatar
Peter van 't Hof committed
100
101
      lazy val bqsrFile: Option[File] = if (useBaseRecalibration) Some(createFile("baserecal")) else None
      lazy val bqsrAfterFile: Option[File] = if (useAnalyzeCovariates) Some(createFile("baserecal.after")) else None
102

Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
      override def keepFinalBamfile = super.keepFinalBamfile && !useIndelRealigner && !useBaseRecalibration

105
106
107
      override def bamFile = mapping.map(_.mergedBamFile)

      override def preProcessBam = if (useIndelRealigner && usePrintReads)
108
109
        bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
      else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
110
      else if (usePrintReads) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
111
112
      else bamFile

113
      /** Library specific settings */
114
      override def summarySettings = super.summarySettings ++ Map(
115
116
        "library_variantcalling" -> variantcalling.isDefined,
        "use_indel_realigner" -> useIndelRealigner,
117
118
119
120
        "use_base_recalibration" -> useBaseRecalibration,
        "use_print_reads" -> usePrintReads,
        "useAnalyze_covariates" -> useAnalyzeCovariates
      )
121

122
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
123
        (bamFile.isDefined || preProcessBam.isDefined)) {
124
        Some(makeVariantcalling(multisample = false, sample = Some(sampleId), library = Some(libId)))
Peter van 't Hof's avatar
Peter van 't Hof committed
125
126
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
127
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
128
      override def addJobs() = {
129
        super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
130

131
132
        if (useIndelRealigner && useBaseRecalibration) {
          val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true)
133
          addBaseRecalibrator(file, libDir, libraries.size > 1, usePrintReads)
134
135
136
        } else if (useIndelRealigner) {
          addIndelRealign(bamFile.get, libDir, libraries.size > 1)
        } else if (useBaseRecalibration) {
137
          addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1, usePrintReads)
138
139
        }

Peter van 't Hof's avatar
Peter van 't Hof committed
140
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
141
142
          vc.sampleId = Some(sampleId)
          vc.libId = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
143
          vc.outputDir = new File(libDir, "variantcalling")
144
145
          if (preProcessBam.isDefined) vc.inputBams = Map(sampleId -> preProcessBam.get)
          else vc.inputBams = Map(sampleId -> bamFile.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
146
          add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
147
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
148
      }
149
150
151

      /** Adds base recalibration jobs */
      def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean, usePrintreads: Boolean): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
152
        val baseRecalibrator = BaseRecalibrator(qscript, inputBam, bqsrFile.get) // at this point bqsrFile should exist
153
154
155
156
157

        if (baseRecalibrator.knownSites.isEmpty) return inputBam
        add(baseRecalibrator)

        if (useAnalyzeCovariates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
158
159
          val baseRecalibratorAfter = BaseRecalibrator(qscript, inputBam, bqsrAfterFile.get)
          baseRecalibratorAfter.BQSR = bqsrFile
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
          add(baseRecalibratorAfter)
          add(AnalyzeCovariates(qscript, baseRecalibrator.out, baseRecalibratorAfter.out, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
        }

        val printReads = PrintReads(qscript, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
        printReads.BQSR = Some(baseRecalibrator.out)
        printReads.isIntermediate = isIntermediate
        if (usePrintreads) {
          add(printReads)
          printReads.out
        } else inputBam
      }

    } // end of library

    lazy val bqsrFile: Option[File] = {
      val files = libraries.flatMap(_._2.bqsrFile).toList
      if (files.isEmpty) None else {
        val gather = new BqsrGather
        gather.inputBqsrFiles = files
        gather.outputBqsrFile = createFile("baserecal")
        add(gather)
        Some(gather.outputBqsrFile)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
184
185
    }

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
186
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
187
      Some(makeVariantcalling(multisample = false, sample = Some(sampleId)))
Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
    } else None

190
191
192
193
194
195
196
197
    override def keepMergedFiles: Boolean = config("keep_merged_files", default = !useIndelRealigner)

    lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)

    override def preProcessBam = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
      bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam"))
    } else bamFile

198
199
    override def summaryFiles = super.summaryFiles ++ variantcalling.map("final" -> _.finalFile)

Peter van 't Hof's avatar
Peter van 't Hof committed
200
    /** This will add sample jobs */
201
202
    override def addJobs(): Unit = {
      super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
203

204
205
206
207
      if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
        addIndelRealign(bamFile.get, sampleDir, false)
      }

208
      preProcessBam.foreach { bam =>
Peter van 't Hof's avatar
Peter van 't Hof committed
209
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
210
211
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
212
          vc.inputBams = Map(sampleId -> bam)
Peter van 't Hof's avatar
Peter van 't Hof committed
213
          add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
214
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
215
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
216
    }
217
  } // End of sample
Peter van 't Hof's avatar
Peter van 't Hof committed
218

219
  lazy val multisampleVariantCalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
220
221
222
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
223
  lazy val svCalling = if (config("sv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
224
    Some(new ShivaSvCalling(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
225
226
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
227
  lazy val cnvCalling = if (config("cnv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
228
    Some(new Kopisu(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
229
230
  } else None

231
232
233
234
235
  lazy val annotation = if (multisampleVariantCalling.isDefined &&
    config("annotation", default = false).asBoolean) {
    Some(new Toucan(this))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
236
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
237
  override def addMultiSampleJobs() = {
238
239
    super.addMultiSampleJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
240
    addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta(), new File(outputDir, ".validate"))).getOrElse(Nil))
241

242
    multisampleVariantCalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
243
      vc.outputDir = new File(outputDir, "variantcalling")
244
      vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
245
      vc.inputBqsrFiles = samples.flatMap { case (sampleId, sample) => sample.bqsrFile.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
246
      add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
247

248
      annotation.foreach { toucan =>
Peter van 't Hof's avatar
Peter van 't Hof committed
249
        toucan.outputDir = new File(outputDir, "annotation")
250
        toucan.inputVcf = vc.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
251
        add(toucan)
Peter van 't Hof's avatar
Peter van 't Hof committed
252
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
253
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
254

Peter van 't Hof's avatar
Peter van 't Hof committed
255
    svCalling.foreach { sv =>
Peter van 't Hof's avatar
Peter van 't Hof committed
256
      sv.outputDir = new File(outputDir, "sv_calling")
257
      sv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
258
      add(sv)
Peter van 't Hof's avatar
Peter van 't Hof committed
259
260
261
262
263
264
265
    }

    cnvCalling.foreach { cnv =>
      cnv.outputDir = new File(outputDir, "cnv_calling")
      cnv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
      add(cnv)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
266
267
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
268
  /** Settings of pipeline for summary */
269
  override def summarySettings = super.summarySettings ++ Map(
270
271
272
    "annotation" -> annotation.isDefined,
    "multisample_variantcalling" -> multisampleVariantCalling.isDefined,
    "sv_calling" -> svCalling.isDefined,
273
    "cnv_calling" -> cnvCalling.isDefined,
274
275
276
    "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
    "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
  )
277
278
279
280
281
282
283
284
285
286
287
288
289
290

  /** Adds indel realignment jobs */
  def addIndelRealign(inputBam: File, dir: File, isIntermediate: Boolean): File = {
    val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir)
    realignerTargetCreator.isIntermediate = true
    add(realignerTargetCreator)

    val indelRealigner = IndelRealigner(this, inputBam, realignerTargetCreator.out, dir)
    indelRealigner.isIntermediate = isIntermediate
    add(indelRealigner)

    indelRealigner.out
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
291
}
Peter van 't Hof's avatar
Peter van 't Hof committed
292

Peter van 't Hof's avatar
Peter van 't Hof committed
293
/** This object give a default main method to the pipelines */
294
295
296
297
298
object Shiva extends PipelineCommand {

  // This is used to only execute 1 validation per vcf file
  private var validateVcfSeen: Set[(File, File)] = Set()

Peter van 't Hof's avatar
Peter van 't Hof committed
299
  def makeValidateVcfJobs(root: Configurable, vcfFile: File, referenceFile: File, outputDir: File): List[QFunction] = {
300
301
302
303
304
305
    if (validateVcfSeen.contains((vcfFile, referenceFile))) Nil
    else {
      validateVcfSeen ++= Set((vcfFile, referenceFile))
      val validateVcf = new ValidateVcf(root)
      validateVcf.inputVcf = vcfFile
      validateVcf.reference = referenceFile
Peter van 't Hof's avatar
Peter van 't Hof committed
306
      validateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".validateVcf.out")
307
308
309

      val checkValidateVcf = new CheckValidateVcf
      checkValidateVcf.inputLogFile = validateVcf.jobOutputFile
Peter van 't Hof's avatar
Peter van 't Hof committed
310
      checkValidateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".checkValidateVcf.out")
311
312
313
314
315

      List(validateVcf, checkValidateVcf)
    }
  }
}