Shiva.scala 10.7 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
package nl.lumc.sasc.biopet.pipelines.shiva

17
18
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.extensions.gatk._
22
import nl.lumc.sasc.biopet.extensions.tools.ValidateVcf
23
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.pipelines.kopisu.Kopisu
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.utils.config.Configurable
28
import org.broadinstitute.gatk.queue.QScript
29
import org.broadinstitute.gatk.queue.function.QFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
30

Peter van 't Hof's avatar
Peter van 't Hof committed
31
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
 * Created by pjvan_thof on 2/26/15.
 */
36
37
class Shiva(val root: Configurable) extends QScript with MultisampleMappingTrait with Reference with TargetRegions { qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  def this() = this(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  override def reportClass: Option[ReportBuilderExtension] = {
41
42
43
44
45
46
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
    shiva.summaryFile = summaryFile
    Some(shiva)
  }

47
48
49
50
51
52
  override def defaults = Map(
    "haplotypecaller" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "genotypegvcfs" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "unifiedgenotyper" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0)
  )

Sander Bollen's avatar
Sander Bollen committed
53
  /** Method to make the variantcalling namespace of shiva */
54
  def makeVariantcalling(multisample: Boolean = false): ShivaVariantcalling with QScript = {
Peter van 't Hof's avatar
Peter van 't Hof committed
55
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
56
      override def namePrefix = "multisample"
Sander Bollen's avatar
Sander Bollen committed
57
      override def configNamespace: String = "shivavariantcalling"
Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
60
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
Sander Bollen's avatar
Sander Bollen committed
61
      override def configNamespace = "shivavariantcalling"
Peter van 't Hof's avatar
Peter van 't Hof committed
62
63
64
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
65
  /** Method to make a sample */
66
  override def makeSample(id: String) = new this.Sample(id)
67

Peter van 't Hof's avatar
Peter van 't Hof committed
68
  /** Class that will generate jobs for a sample */
69
  class Sample(sampleId: String) extends super.Sample(sampleId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
70
    /** Method to make a library */
71
    override def makeLibrary(id: String) = new this.Library(id)
Peter van 't Hof's avatar
Peter van 't Hof committed
72

73
    /** Sample specific settings */
74
75
    override def summarySettings = super.summarySettings ++
      Map("single_sample_variantcalling" -> variantcalling.isDefined, "use_indel_realigner" -> useIndelRealigner)
76

Peter van 't Hof's avatar
Peter van 't Hof committed
77
    /** Class to generate jobs for a library */
78
    class Library(libId: String) extends super.Library(libId) {
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

      lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
      lazy val useBaseRecalibration: Boolean = {
        val c: Boolean = config("use_base_recalibration", default = true)
        val br = new BaseRecalibrator(qscript)
        if (c && br.knownSites.isEmpty)
          logger.warn("No Known site found, skipping base recalibration, file: " + inputBam)
        c && br.knownSites.nonEmpty
      }

      override def preProcessBam = if (useIndelRealigner && useBaseRecalibration)
        bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
      else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
      else if (useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
      else bamFile

95
      /** Library specific settings */
96
97
98
99
      override def summarySettings = Map(
        "library_variantcalling" -> variantcalling.isDefined,
        "use_indel_realigner" -> useIndelRealigner,
        "use_base_recalibration" -> useBaseRecalibration)
100

101
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
104
105
        (bamFile.isDefined || preProcessBam.isDefined)) {
        Some(makeVariantcalling(multisample = false))
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
106
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
107
      override def addJobs() = {
108
        super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
109

110
111
112
113
114
115
116
117
118
        if (useIndelRealigner && useBaseRecalibration) {
          val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true)
          addBaseRecalibrator(file, libDir, libraries.size > 1)
        } else if (useIndelRealigner) {
          addIndelRealign(bamFile.get, libDir, libraries.size > 1)
        } else if (useBaseRecalibration) {
          addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1)
        }

Peter van 't Hof's avatar
Peter van 't Hof committed
119
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
          vc.sampleId = Some(sampleId)
          vc.libId = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
122
          vc.outputDir = new File(libDir, "variantcalling")
123
124
          if (preProcessBam.isDefined) vc.inputBams = Map(sampleId -> preProcessBam.get)
          else vc.inputBams = Map(sampleId -> bamFile.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
125
          add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
126
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
127
128
129
      }
    }

Peter van 't Hof's avatar
fix bug    
Peter van 't Hof committed
130
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
131
      Some(makeVariantcalling(multisample = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
132
133
    } else None

134
135
136
137
138
139
140
141
    override def keepMergedFiles: Boolean = config("keep_merged_files", default = !useIndelRealigner)

    lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)

    override def preProcessBam = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
      bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam"))
    } else bamFile

Peter van 't Hof's avatar
Peter van 't Hof committed
142
    /** This will add sample jobs */
143
144
    override def addJobs(): Unit = {
      super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
145

146
147
148
149
      if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
        addIndelRealign(bamFile.get, sampleDir, false)
      }

150
      preProcessBam.foreach { bam =>
Peter van 't Hof's avatar
Peter van 't Hof committed
151
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
152
153
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
154
          vc.inputBams = Map(sampleId -> bam)
Peter van 't Hof's avatar
Peter van 't Hof committed
155
          add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
156
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
157
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
158
159
160
    }
  }

161
  lazy val multisampleVariantCalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
162
163
164
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
165
  lazy val svCalling = if (config("sv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
166
    Some(new ShivaSvCalling(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
167
168
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
169
  lazy val cnvCalling = if (config("cnv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
170
    Some(new Kopisu(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
  } else None

173
174
175
176
177
  lazy val annotation = if (multisampleVariantCalling.isDefined &&
    config("annotation", default = false).asBoolean) {
    Some(new Toucan(this))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
178
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
179
  override def addMultiSampleJobs() = {
180
181
    super.addMultiSampleJobs()

182
183
    addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta())).getOrElse(Nil))

184
    multisampleVariantCalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
185
      vc.outputDir = new File(outputDir, "variantcalling")
186
      vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
187
      add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
188

189
      annotation.foreach { toucan =>
Peter van 't Hof's avatar
Peter van 't Hof committed
190
        toucan.outputDir = new File(outputDir, "annotation")
191
        toucan.inputVcf = vc.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
192
        add(toucan)
Peter van 't Hof's avatar
Peter van 't Hof committed
193
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
194
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
195

Peter van 't Hof's avatar
Peter van 't Hof committed
196
    svCalling.foreach { sv =>
Peter van 't Hof's avatar
Peter van 't Hof committed
197
      sv.outputDir = new File(outputDir, "sv_calling")
198
      sv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
199
      add(sv)
Peter van 't Hof's avatar
Peter van 't Hof committed
200
201
202
203
204
205
206
    }

    cnvCalling.foreach { cnv =>
      cnv.outputDir = new File(outputDir, "cnv_calling")
      cnv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
      add(cnv)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
207
208
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
209
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
210
  def summaryFile = new File(outputDir, "Shiva.summary.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
211

Peter van 't Hof's avatar
Peter van 't Hof committed
212
  /** Settings of pipeline for summary */
213
  override def summarySettings = super.summarySettings ++ Map(
214
215
216
    "annotation" -> annotation.isDefined,
    "multisample_variantcalling" -> multisampleVariantCalling.isDefined,
    "sv_calling" -> svCalling.isDefined,
217
    "cnv_calling" -> cnvCalling.isDefined,
218
219
220
    "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
    "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
  )
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256

  /** Adds indel realignment jobs */
  def addIndelRealign(inputBam: File, dir: File, isIntermediate: Boolean): File = {
    val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir)
    realignerTargetCreator.isIntermediate = true
    add(realignerTargetCreator)

    val indelRealigner = IndelRealigner(this, inputBam, realignerTargetCreator.out, dir)
    indelRealigner.isIntermediate = isIntermediate
    add(indelRealigner)

    indelRealigner.out
  }

  /** Adds base recalibration jobs */
  def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean): File = {
    val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal"))

    if (baseRecalibrator.knownSites.isEmpty) return inputBam
    add(baseRecalibrator)

    if (config("use_analyze_covariates", default = true).asBoolean) {
      val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after"))
      baseRecalibratorAfter.BQSR = Some(baseRecalibrator.out)
      add(baseRecalibratorAfter)

      add(AnalyzeCovariates(this, baseRecalibrator.out, baseRecalibratorAfter.out, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
    }

    val printReads = PrintReads(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
    printReads.BQSR = Some(baseRecalibrator.out)
    printReads.isIntermediate = isIntermediate
    add(printReads)

    printReads.out
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
257
}
Peter van 't Hof's avatar
Peter van 't Hof committed
258

Peter van 't Hof's avatar
Peter van 't Hof committed
259
/** This object give a default main method to the pipelines */
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
object Shiva extends PipelineCommand {

  // This is used to only execute 1 validation per vcf file
  private var validateVcfSeen: Set[(File, File)] = Set()

  def makeValidateVcfJobs(root: Configurable, vcfFile: File, referenceFile: File): List[QFunction] = {
    if (validateVcfSeen.contains((vcfFile, referenceFile))) Nil
    else {
      validateVcfSeen ++= Set((vcfFile, referenceFile))
      val validateVcf = new ValidateVcf(root)
      validateVcf.inputVcf = vcfFile
      validateVcf.reference = referenceFile

      val checkValidateVcf = new CheckValidateVcf
      checkValidateVcf.inputLogFile = validateVcf.jobOutputFile

      List(validateVcf, checkValidateVcf)
    }
  }
}