Shiva.scala 11.3 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
package nl.lumc.sasc.biopet.pipelines.shiva

17
18
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.extensions.gatk._
22
import nl.lumc.sasc.biopet.extensions.tools.ValidateVcf
23
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.pipelines.kopisu.Kopisu
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.utils.config.Configurable
28
import org.broadinstitute.gatk.queue.QScript
29
import org.broadinstitute.gatk.queue.function.QFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
30

Peter van 't Hof's avatar
Peter van 't Hof committed
31
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
 * Created by pjvan_thof on 2/26/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
36
class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTrait with Reference with TargetRegions { qscript =>
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  def this() = this(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  override def reportClass: Option[ReportBuilderExtension] = {
41
42
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
43
    shiva.summaryDbFile = summaryDbFile
44
45
46
    Some(shiva)
  }

47
48
49
50
51
52
  override def defaults = Map(
    "haplotypecaller" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "genotypegvcfs" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "unifiedgenotyper" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0)
  )

Sander Bollen's avatar
Sander Bollen committed
53
  /** Method to make the variantcalling namespace of shiva */
54
  def makeVariantcalling(multisample: Boolean, sample: Option[String] = None, library: Option[String] = None): ShivaVariantcalling with QScript = {
Peter van 't Hof's avatar
Peter van 't Hof committed
55
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
56
      override def namePrefix = "multisample"
Sander Bollen's avatar
Sander Bollen committed
57
      override def configNamespace: String = "shivavariantcalling"
Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
60
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
Sander Bollen's avatar
Sander Bollen committed
61
      override def configNamespace = "shivavariantcalling"
62
63
      sampleId = sample
      libId = library
Peter van 't Hof's avatar
Peter van 't Hof committed
64
65
66
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
67
  /** Method to make a sample */
68
  override def makeSample(id: String) = new this.Sample(id)
69

Peter van 't Hof's avatar
Peter van 't Hof committed
70
  /** Class that will generate jobs for a sample */
71
  class Sample(sampleId: String) extends super.Sample(sampleId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
72
    /** Method to make a library */
73
    override def makeLibrary(id: String) = new this.Library(id)
Peter van 't Hof's avatar
Peter van 't Hof committed
74

75
    /** Sample specific settings */
76
77
    override def summarySettings = super.summarySettings ++
      Map("single_sample_variantcalling" -> variantcalling.isDefined, "use_indel_realigner" -> useIndelRealigner)
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79
    /** Class to generate jobs for a library */
80
    class Library(libId: String) extends super.Library(libId) {
81

82
83
      override def summaryFiles = super.summaryFiles ++ variantcalling.map("final" -> _.finalFile)

84
85
86
87
88
89
90
91
92
      lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
      lazy val useBaseRecalibration: Boolean = {
        val c: Boolean = config("use_base_recalibration", default = true)
        val br = new BaseRecalibrator(qscript)
        if (c && br.knownSites.isEmpty)
          logger.warn("No Known site found, skipping base recalibration, file: " + inputBam)
        c && br.knownSites.nonEmpty
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
      override def keepFinalBamfile = super.keepFinalBamfile && !useIndelRealigner && !useBaseRecalibration

95
96
97
98
99
100
      override def preProcessBam = if (useIndelRealigner && useBaseRecalibration)
        bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
      else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
      else if (useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
      else bamFile

101
      /** Library specific settings */
102
103
104
105
      override def summarySettings = Map(
        "library_variantcalling" -> variantcalling.isDefined,
        "use_indel_realigner" -> useIndelRealigner,
        "use_base_recalibration" -> useBaseRecalibration)
106

107
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
108
        (bamFile.isDefined || preProcessBam.isDefined)) {
109
        Some(makeVariantcalling(multisample = false, sample = Some(sampleId), library = Some(libId)))
Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
112
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
113
      override def addJobs() = {
114
        super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
115

116
117
118
119
120
121
122
123
124
        if (useIndelRealigner && useBaseRecalibration) {
          val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true)
          addBaseRecalibrator(file, libDir, libraries.size > 1)
        } else if (useIndelRealigner) {
          addIndelRealign(bamFile.get, libDir, libraries.size > 1)
        } else if (useBaseRecalibration) {
          addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1)
        }

Peter van 't Hof's avatar
Peter van 't Hof committed
125
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
126
127
          vc.sampleId = Some(sampleId)
          vc.libId = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
128
          vc.outputDir = new File(libDir, "variantcalling")
129
130
          if (preProcessBam.isDefined) vc.inputBams = Map(sampleId -> preProcessBam.get)
          else vc.inputBams = Map(sampleId -> bamFile.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
131
          add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
132
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
135
      }
    }

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
136
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
137
      Some(makeVariantcalling(multisample = false, sample = Some(sampleId)))
Peter van 't Hof's avatar
Peter van 't Hof committed
138
139
    } else None

140
141
142
143
144
145
146
147
    override def keepMergedFiles: Boolean = config("keep_merged_files", default = !useIndelRealigner)

    lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)

    override def preProcessBam = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
      bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam"))
    } else bamFile

148
149
    override def summaryFiles = super.summaryFiles ++ variantcalling.map("final" -> _.finalFile)

Peter van 't Hof's avatar
Peter van 't Hof committed
150
    /** This will add sample jobs */
151
152
    override def addJobs(): Unit = {
      super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
153

154
155
156
157
      if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
        addIndelRealign(bamFile.get, sampleDir, false)
      }

158
      preProcessBam.foreach { bam =>
Peter van 't Hof's avatar
Peter van 't Hof committed
159
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
160
161
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
162
          vc.inputBams = Map(sampleId -> bam)
Peter van 't Hof's avatar
Peter van 't Hof committed
163
          add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
164
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
165
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
166
167
168
    }
  }

169
  lazy val multisampleVariantCalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
170
171
172
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
173
  lazy val svCalling = if (config("sv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
174
    Some(new ShivaSvCalling(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
175
176
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
177
  lazy val cnvCalling = if (config("cnv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
178
    Some(new Kopisu(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
179
180
  } else None

181
182
183
184
185
  lazy val annotation = if (multisampleVariantCalling.isDefined &&
    config("annotation", default = false).asBoolean) {
    Some(new Toucan(this))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
186
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
187
  override def addMultiSampleJobs() = {
188
189
    super.addMultiSampleJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
190
    addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta(), new File(outputDir, ".validate"))).getOrElse(Nil))
191

192
    multisampleVariantCalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
193
      vc.outputDir = new File(outputDir, "variantcalling")
194
      vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
195
      add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
196

197
      annotation.foreach { toucan =>
Peter van 't Hof's avatar
Peter van 't Hof committed
198
        toucan.outputDir = new File(outputDir, "annotation")
199
        toucan.inputVcf = vc.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
200
        add(toucan)
Peter van 't Hof's avatar
Peter van 't Hof committed
201
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
202
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
203

Peter van 't Hof's avatar
Peter van 't Hof committed
204
    svCalling.foreach { sv =>
Peter van 't Hof's avatar
Peter van 't Hof committed
205
      sv.outputDir = new File(outputDir, "sv_calling")
206
      sv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
207
      add(sv)
Peter van 't Hof's avatar
Peter van 't Hof committed
208
209
210
211
212
213
214
    }

    cnvCalling.foreach { cnv =>
      cnv.outputDir = new File(outputDir, "cnv_calling")
      cnv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
      add(cnv)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
215
216
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
217
  /** Settings of pipeline for summary */
218
  override def summarySettings = super.summarySettings ++ Map(
219
220
221
    "annotation" -> annotation.isDefined,
    "multisample_variantcalling" -> multisampleVariantCalling.isDefined,
    "sv_calling" -> svCalling.isDefined,
222
    "cnv_calling" -> cnvCalling.isDefined,
223
224
225
    "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
    "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
  )
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261

  /** Adds indel realignment jobs */
  def addIndelRealign(inputBam: File, dir: File, isIntermediate: Boolean): File = {
    val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir)
    realignerTargetCreator.isIntermediate = true
    add(realignerTargetCreator)

    val indelRealigner = IndelRealigner(this, inputBam, realignerTargetCreator.out, dir)
    indelRealigner.isIntermediate = isIntermediate
    add(indelRealigner)

    indelRealigner.out
  }

  /** Adds base recalibration jobs */
  def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean): File = {
    val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal"))

    if (baseRecalibrator.knownSites.isEmpty) return inputBam
    add(baseRecalibrator)

    if (config("use_analyze_covariates", default = true).asBoolean) {
      val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after"))
      baseRecalibratorAfter.BQSR = Some(baseRecalibrator.out)
      add(baseRecalibratorAfter)

      add(AnalyzeCovariates(this, baseRecalibrator.out, baseRecalibratorAfter.out, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
    }

    val printReads = PrintReads(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
    printReads.BQSR = Some(baseRecalibrator.out)
    printReads.isIntermediate = isIntermediate
    add(printReads)

    printReads.out
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
262
}
Peter van 't Hof's avatar
Peter van 't Hof committed
263

Peter van 't Hof's avatar
Peter van 't Hof committed
264
/** This object give a default main method to the pipelines */
265
266
267
268
269
object Shiva extends PipelineCommand {

  // This is used to only execute 1 validation per vcf file
  private var validateVcfSeen: Set[(File, File)] = Set()

Peter van 't Hof's avatar
Peter van 't Hof committed
270
  def makeValidateVcfJobs(root: Configurable, vcfFile: File, referenceFile: File, outputDir: File): List[QFunction] = {
271
272
273
274
275
276
    if (validateVcfSeen.contains((vcfFile, referenceFile))) Nil
    else {
      validateVcfSeen ++= Set((vcfFile, referenceFile))
      val validateVcf = new ValidateVcf(root)
      validateVcf.inputVcf = vcfFile
      validateVcf.reference = referenceFile
Peter van 't Hof's avatar
Peter van 't Hof committed
277
      validateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".validateVcf.out")
278
279
280

      val checkValidateVcf = new CheckValidateVcf
      checkValidateVcf.inputLogFile = validateVcf.jobOutputFile
Peter van 't Hof's avatar
Peter van 't Hof committed
281
      checkValidateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".checkValidateVcf.out")
282
283
284
285
286

      List(validateVcf, checkValidateVcf)
    }
  }
}