Shiva.scala 13.7 KB
Newer Older
bow's avatar
bow committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15 16
package nl.lumc.sasc.biopet.pipelines.shiva

17 18
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.extensions.gatk._
22
import nl.lumc.sasc.biopet.extensions.tools.ValidateVcf
23
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
24
import nl.lumc.sasc.biopet.pipelines.kopisu.Kopisu
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.mapping.{ Mapping, MultisampleMappingTrait }
26
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.utils.config.Configurable
28
import org.broadinstitute.gatk.queue.QScript
29
import org.broadinstitute.gatk.queue.function.QFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
30

Peter van 't Hof's avatar
Peter van 't Hof committed
31
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
32 33
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
34 35
 * Created by pjvan_thof on 2/26/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
36
class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTrait with Reference with TargetRegions { qscript =>
37

38
  def this() = this(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  override def reportClass: Option[ReportBuilderExtension] = {
41 42
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
43
    shiva.summaryDbFile = summaryDbFile
44 45 46
    Some(shiva)
  }

47 48 49 50 51 52
  override def defaults = Map(
    "haplotypecaller" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "genotypegvcfs" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "unifiedgenotyper" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0)
  )

53 54
  lazy val usePrintReads: Boolean = config("use_printreads", default = true)

55
  /** Method to make the variantcalling namespace of shiva */
56 57 58
  def makeVariantcalling(multisample: Boolean,
                         sample: Option[String] = None,
                         library: Option[String] = None): ShivaVariantcalling with QScript = {
Peter van 't Hof's avatar
Peter van 't Hof committed
59
    if (multisample) new ShivaVariantcalling(qscript) {
60
      override def namePrefix = "multisample"
61
      override def configNamespace: String = "shivavariantcalling"
Peter van 't Hof's avatar
Peter van 't Hof committed
62
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
63
      genders = samples.map { case (sampleName, sample) => sampleName -> sample.gender }
Peter van 't Hof's avatar
Peter van 't Hof committed
64 65
    }
    else new ShivaVariantcalling(qscript) {
66
      override def configNamespace = "shivavariantcalling"
67 68
      sampleId = sample
      libId = library
69
      genders = sample.map(x => x -> samples(x).gender).toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
70 71 72
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
73
  /** Method to make a sample */
74
  override def makeSample(id: String) = new this.Sample(id)
75

Peter van 't Hof's avatar
Peter van 't Hof committed
76
  /** Class that will generate jobs for a sample */
77
  class Sample(sampleId: String) extends super.Sample(sampleId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
78
    /** Method to make a library */
79
    override def makeLibrary(id: String) = new this.Library(id)
Peter van 't Hof's avatar
Peter van 't Hof committed
80

81
    /** Sample specific settings */
Peter van 't Hof's avatar
Peter van 't Hof committed
82
    override def summarySettings: Map[String, Any] = super.summarySettings ++
83
      Map("single_sample_variantcalling" -> variantcalling.isDefined, "use_indel_realigner" -> useIndelRealigner)
84

Peter van 't Hof's avatar
Peter van 't Hof committed
85
    /** Class to generate jobs for a library */
86
    class Library(libId: String) extends super.Library(libId) {
87

Peter van 't Hof's avatar
Peter van 't Hof committed
88
      override def summaryFiles: Map[String, File] = super.summaryFiles ++
89 90 91
        variantcalling.map("final" -> _.finalFile) ++
        bqsrFile.map("baserecal" -> _) ++
        bqsrAfterFile.map("baserecal_after" -> _)
92

93 94 95 96 97 98 99 100
      lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
      lazy val useBaseRecalibration: Boolean = {
        val c: Boolean = config("use_base_recalibration", default = true)
        val br = new BaseRecalibrator(qscript)
        if (c && br.knownSites.isEmpty)
          logger.warn("No Known site found, skipping base recalibration, file: " + inputBam)
        c && br.knownSites.nonEmpty
      }
101 102
      lazy val useAnalyzeCovariates: Boolean = if (useBaseRecalibration) config("use_analyze_covariates", default = true) else false

Peter van 't Hof's avatar
Peter van 't Hof committed
103 104
      lazy val bqsrFile: Option[File] = if (useBaseRecalibration) Some(createFile("baserecal")) else None
      lazy val bqsrAfterFile: Option[File] = if (useAnalyzeCovariates) Some(createFile("baserecal.after")) else None
105

Peter van 't Hof's avatar
Peter van 't Hof committed
106
      override def keepFinalBamfile: Boolean = super.keepFinalBamfile && !useIndelRealigner && !useBaseRecalibration
Peter van 't Hof's avatar
Peter van 't Hof committed
107

Peter van 't Hof's avatar
Peter van 't Hof committed
108
      override def preProcessBam: Option[Mapping#File] = if (useIndelRealigner && usePrintReads && useBaseRecalibration)
109 110
        bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
      else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
Peter van 't Hof's avatar
Peter van 't Hof committed
111
      else if (usePrintReads && useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
112 113
      else bamFile

114
      /** Library specific settings */
Peter van 't Hof's avatar
Peter van 't Hof committed
115
      override def summarySettings: Map[String, Any] = super.summarySettings ++ Map(
116 117
        "library_variantcalling" -> variantcalling.isDefined,
        "use_indel_realigner" -> useIndelRealigner,
118 119 120
        "use_base_recalibration" -> useBaseRecalibration,
        "useAnalyze_covariates" -> useAnalyzeCovariates
      )
121

Peter van 't Hof's avatar
Peter van 't Hof committed
122
      lazy val variantcalling: Option[ShivaVariantcalling with QScript] = if (config("library_variantcalling", default = false).asBoolean &&
123
        (bamFile.isDefined || preProcessBam.isDefined)) {
124
        Some(makeVariantcalling(multisample = false, sample = Some(sampleId), library = Some(libId)))
125 126
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
127
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
128
      override def addJobs(): Unit = {
129
        super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
130

131 132
        if (useIndelRealigner && useBaseRecalibration) {
          val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true)
133
          addBaseRecalibrator(file, libDir, libraries.size > 1, usePrintReads)
134 135 136
        } else if (useIndelRealigner) {
          addIndelRealign(bamFile.get, libDir, libraries.size > 1)
        } else if (useBaseRecalibration) {
137
          addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1, usePrintReads)
138 139
        }

140
        variantcalling.foreach(vc => {
141 142
          vc.sampleId = Some(sampleId)
          vc.libId = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
143
          vc.outputDir = new File(libDir, "variantcalling")
144 145
          if (preProcessBam.isDefined) vc.inputBams = Map(sampleId -> preProcessBam.get)
          else vc.inputBams = Map(sampleId -> bamFile.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
146
          add(vc)
147
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
148
      }
149 150 151

      /** Adds base recalibration jobs */
      def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean, usePrintreads: Boolean): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
152
        require(bqsrFile.isDefined, "bqsrFile should contain something at this point")
Peter van 't Hof's avatar
Peter van 't Hof committed
153
        val baseRecalibrator = BaseRecalibrator(qscript, inputBam, bqsrFile.get) // at this point bqsrFile should exist
154 155 156 157 158

        if (baseRecalibrator.knownSites.isEmpty) return inputBam
        add(baseRecalibrator)

        if (useAnalyzeCovariates) {
Peter van 't Hof's avatar
Peter van 't Hof committed
159 160
          val baseRecalibratorAfter = BaseRecalibrator(qscript, inputBam, bqsrAfterFile.get)
          baseRecalibratorAfter.BQSR = bqsrFile
161 162 163 164
          add(baseRecalibratorAfter)
          add(AnalyzeCovariates(qscript, baseRecalibrator.out, baseRecalibratorAfter.out, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
        }
        if (usePrintreads) {
Peter van 't Hof's avatar
Peter van 't Hof committed
165 166 167
          val printReads = PrintReads(qscript, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
          printReads.BQSR = Some(baseRecalibrator.out)
          printReads.isIntermediate = isIntermediate
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
          add(printReads)
          printReads.out
        } else inputBam
      }

    } // end of library

    lazy val bqsrFile: Option[File] = {
      val files = libraries.flatMap(_._2.bqsrFile).toList
      if (files.isEmpty) None else {
        val gather = new BqsrGather
        gather.inputBqsrFiles = files
        gather.outputBqsrFile = createFile("baserecal")
        add(gather)
        Some(gather.outputBqsrFile)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
184 185
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
186
    lazy val variantcalling: Option[ShivaVariantcalling with QScript] = if (config("single_sample_variantcalling", default = false).asBoolean) {
187
      Some(makeVariantcalling(multisample = false, sample = Some(sampleId)))
188 189
    } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
190
    override def keepMergedFiles: Boolean = config("keep_merged_files", default = !useIndelRealigner || (libraries.size == 1))
191 192 193

    lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)

Peter van 't Hof's avatar
Peter van 't Hof committed
194
    override def preProcessBam: Option[File] = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
195 196 197
      bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam"))
    } else bamFile

Peter van 't Hof's avatar
Peter van 't Hof committed
198
    override def summaryFiles: Map[String, File] = super.summaryFiles ++ variantcalling.map("final" -> _.finalFile)
199

Peter van 't Hof's avatar
Peter van 't Hof committed
200
    /** This will add sample jobs */
201 202
    override def addJobs(): Unit = {
      super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
203

204
      if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
Peter van 't Hof's avatar
Peter van 't Hof committed
205
        addIndelRealign(bamFile.get, sampleDir, isIntermediate = false)
206 207
      }

208
      preProcessBam.foreach { bam =>
209
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
210 211
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
212
          vc.inputBams = Map(sampleId -> bam)
Peter van 't Hof's avatar
Peter van 't Hof committed
213
          add(vc)
214
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
215
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
216
    }
217
  } // End of sample
Peter van 't Hof's avatar
Peter van 't Hof committed
218

Peter van 't Hof's avatar
Peter van 't Hof committed
219
  lazy val multisampleVariantCalling: Option[ShivaVariantcalling with QScript] = if (config("multisample_variantcalling", default = true).asBoolean) {
220 221 222
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
223
  lazy val svCalling: Option[ShivaSvCalling] = if (config("sv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
224
    Some(new ShivaSvCalling(this))
225 226
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
227
  lazy val cnvCalling: Option[Kopisu] = if (config("cnv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
228
    Some(new Kopisu(this))
229 230
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
231
  lazy val annotation: Option[Toucan] = if (multisampleVariantCalling.isDefined &&
232 233 234 235
    config("annotation", default = false).asBoolean) {
    Some(new Toucan(this))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
236
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
237
  override def addMultiSampleJobs(): Unit = {
238 239
    super.addMultiSampleJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
240
    addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta(), new File(outputDir, ".validate"))).getOrElse(Nil))
241

242
    multisampleVariantCalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
243
      vc.outputDir = new File(outputDir, "variantcalling")
244
      vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
245 246
      if (!usePrintReads)
        vc.inputBqsrFiles = samples.flatMap { case (sampleId, sample) => sample.bqsrFile.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
247
      add(vc)
248 249
      if (!usePrintReads) {
        import variantcallers._
Peter van 't Hof's avatar
Peter van 't Hof committed
250
        if (vc.callers.exists {
Peter van 't Hof's avatar
Peter van 't Hof committed
251 252
          case _: HaplotypeCaller | _: HaplotypeCallerAllele | _: HaplotypeCallerGvcf => false
          case _: UnifiedGenotyper | _: UnifiedGenotyperAllele => false
253
          case _ => true
Peter van 't Hof's avatar
Peter van 't Hof committed
254
        }) logger.warn("Not all variantcallers chosen can read BQSR files, All non-GATK")
255
      }
256

257
      annotation.foreach { toucan =>
258
        toucan.outputDir = new File(outputDir, "annotation")
259
        toucan.inputVcf = vc.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
260
        add(toucan)
261
      }
262
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
263

264
    svCalling.foreach { sv =>
Peter van 't Hof's avatar
Peter van 't Hof committed
265
      sv.outputDir = new File(outputDir, "sv_calling")
266
      sv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
267
      add(sv)
268 269 270 271 272 273 274
    }

    cnvCalling.foreach { cnv =>
      cnv.outputDir = new File(outputDir, "cnv_calling")
      cnv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
      add(cnv)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
275 276
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
277
  /** Settings of pipeline for summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
278
  override def summarySettings: Map[String, Any] = super.summarySettings ++ Map(
279 280 281
    "annotation" -> annotation.isDefined,
    "multisample_variantcalling" -> multisampleVariantCalling.isDefined,
    "sv_calling" -> svCalling.isDefined,
282
    "cnv_calling" -> cnvCalling.isDefined,
283
    "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
284 285
    "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")),
    "use_print_reads" -> usePrintReads
286
  )
287 288 289 290 291 292 293 294 295 296 297 298 299 300

  /** Adds indel realignment jobs */
  def addIndelRealign(inputBam: File, dir: File, isIntermediate: Boolean): File = {
    val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir)
    realignerTargetCreator.isIntermediate = true
    add(realignerTargetCreator)

    val indelRealigner = IndelRealigner(this, inputBam, realignerTargetCreator.out, dir)
    indelRealigner.isIntermediate = isIntermediate
    add(indelRealigner)

    indelRealigner.out
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
301
}
302

Peter van 't Hof's avatar
Peter van 't Hof committed
303
/** This object give a default main method to the pipelines */
304 305 306 307 308
object Shiva extends PipelineCommand {

  // This is used to only execute 1 validation per vcf file
  private var validateVcfSeen: Set[(File, File)] = Set()

Peter van 't Hof's avatar
Peter van 't Hof committed
309
  def makeValidateVcfJobs(root: Configurable, vcfFile: File, referenceFile: File, outputDir: File): List[QFunction] = {
310 311 312 313 314 315
    if (validateVcfSeen.contains((vcfFile, referenceFile))) Nil
    else {
      validateVcfSeen ++= Set((vcfFile, referenceFile))
      val validateVcf = new ValidateVcf(root)
      validateVcf.inputVcf = vcfFile
      validateVcf.reference = referenceFile
Peter van 't Hof's avatar
Peter van 't Hof committed
316
      validateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".validateVcf.out")
317 318 319

      val checkValidateVcf = new CheckValidateVcf
      checkValidateVcf.inputLogFile = validateVcf.jobOutputFile
Peter van 't Hof's avatar
Peter van 't Hof committed
320
      checkValidateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".checkValidateVcf.out")
321 322 323 324 325

      List(validateVcf, checkValidateVcf)
    }
  }
}