Shiva.scala 11.1 KB
Newer Older
bow's avatar
bow committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15 16
package nl.lumc.sasc.biopet.pipelines.shiva

17 18
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.extensions.gatk._
22
import nl.lumc.sasc.biopet.extensions.tools.ValidateVcf
23
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.pipelines.kopisu.Kopisu
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.utils.config.Configurable
28
import org.broadinstitute.gatk.queue.QScript
29
import org.broadinstitute.gatk.queue.function.QFunction
Peter van 't Hof's avatar
Peter van 't Hof committed
30

Peter van 't Hof's avatar
Peter van 't Hof committed
31
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
32 33
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
34 35
 * Created by pjvan_thof on 2/26/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
36
class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTrait with Reference with TargetRegions { qscript =>
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  def this() = this(null)
Peter van 't Hof's avatar
Peter van 't Hof committed
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  override def reportClass: Option[ReportBuilderExtension] = {
41 42
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
43
    shiva.summaryDbFile = summaryDbFile
44 45 46
    Some(shiva)
  }

47 48 49 50 51 52
  override def defaults = Map(
    "haplotypecaller" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "genotypegvcfs" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0),
    "unifiedgenotyper" -> Map("stand_call_conf" -> 30, "stand_emit_conf" -> 0)
  )

Sander Bollen's avatar
Sander Bollen committed
53
  /** Method to make the variantcalling namespace of shiva */
54
  def makeVariantcalling(multisample: Boolean = false): ShivaVariantcalling with QScript = {
Peter van 't Hof's avatar
Peter van 't Hof committed
55
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
56
      override def namePrefix = "multisample"
Sander Bollen's avatar
Sander Bollen committed
57
      override def configNamespace: String = "shivavariantcalling"
Peter van 't Hof's avatar
Peter van 't Hof committed
58 59 60
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
Sander Bollen's avatar
Sander Bollen committed
61
      override def configNamespace = "shivavariantcalling"
Peter van 't Hof's avatar
Peter van 't Hof committed
62 63 64
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
65
  /** Method to make a sample */
66
  override def makeSample(id: String) = new this.Sample(id)
67

Peter van 't Hof's avatar
Peter van 't Hof committed
68
  /** Class that will generate jobs for a sample */
69
  class Sample(sampleId: String) extends super.Sample(sampleId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
70
    /** Method to make a library */
71
    override def makeLibrary(id: String) = new this.Library(id)
Peter van 't Hof's avatar
Peter van 't Hof committed
72

73
    /** Sample specific settings */
74 75
    override def summarySettings = super.summarySettings ++
      Map("single_sample_variantcalling" -> variantcalling.isDefined, "use_indel_realigner" -> useIndelRealigner)
76

Peter van 't Hof's avatar
Peter van 't Hof committed
77
    /** Class to generate jobs for a library */
78
    class Library(libId: String) extends super.Library(libId) {
79 80 81 82 83 84 85 86 87 88

      lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
      lazy val useBaseRecalibration: Boolean = {
        val c: Boolean = config("use_base_recalibration", default = true)
        val br = new BaseRecalibrator(qscript)
        if (c && br.knownSites.isEmpty)
          logger.warn("No Known site found, skipping base recalibration, file: " + inputBam)
        c && br.knownSites.nonEmpty
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
89 90
      override def keepFinalBamfile = super.keepFinalBamfile && !useIndelRealigner && !useBaseRecalibration

91 92 93 94 95 96
      override def preProcessBam = if (useIndelRealigner && useBaseRecalibration)
        bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
      else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
      else if (useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam"))
      else bamFile

97
      /** Library specific settings */
98 99 100 101
      override def summarySettings = Map(
        "library_variantcalling" -> variantcalling.isDefined,
        "use_indel_realigner" -> useIndelRealigner,
        "use_base_recalibration" -> useBaseRecalibration)
102

103
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
104 105 106 107
        (bamFile.isDefined || preProcessBam.isDefined)) {
        Some(makeVariantcalling(multisample = false))
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
108
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
109
      override def addJobs() = {
110
        super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
111

112 113 114 115 116 117 118 119 120
        if (useIndelRealigner && useBaseRecalibration) {
          val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true)
          addBaseRecalibrator(file, libDir, libraries.size > 1)
        } else if (useIndelRealigner) {
          addIndelRealign(bamFile.get, libDir, libraries.size > 1)
        } else if (useBaseRecalibration) {
          addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1)
        }

Peter van 't Hof's avatar
Peter van 't Hof committed
121
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
122 123
          vc.sampleId = Some(sampleId)
          vc.libId = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
124
          vc.outputDir = new File(libDir, "variantcalling")
125 126
          if (preProcessBam.isDefined) vc.inputBams = Map(sampleId -> preProcessBam.get)
          else vc.inputBams = Map(sampleId -> bamFile.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
127
          add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
128
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
129 130 131
      }
    }

Peter van 't Hof's avatar
fix bug  
Peter van 't Hof committed
132
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
133
      Some(makeVariantcalling(multisample = false))
Peter van 't Hof's avatar
Peter van 't Hof committed
134 135
    } else None

136 137 138 139 140 141 142 143
    override def keepMergedFiles: Boolean = config("keep_merged_files", default = !useIndelRealigner)

    lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)

    override def preProcessBam = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
      bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam"))
    } else bamFile

Peter van 't Hof's avatar
Peter van 't Hof committed
144
    /** This will add sample jobs */
145 146
    override def addJobs(): Unit = {
      super.addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
147

148 149 150 151
      if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) {
        addIndelRealign(bamFile.get, sampleDir, false)
      }

152
      preProcessBam.foreach { bam =>
Peter van 't Hof's avatar
Peter van 't Hof committed
153
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
154 155
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
156
          vc.inputBams = Map(sampleId -> bam)
Peter van 't Hof's avatar
Peter van 't Hof committed
157
          add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
158
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
159
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
160 161 162
    }
  }

163
  lazy val multisampleVariantCalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
164 165 166
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
167
  lazy val svCalling = if (config("sv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
168
    Some(new ShivaSvCalling(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
169 170
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
171
  lazy val cnvCalling = if (config("cnv_calling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
172
    Some(new Kopisu(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
173 174
  } else None

175 176 177 178 179
  lazy val annotation = if (multisampleVariantCalling.isDefined &&
    config("annotation", default = false).asBoolean) {
    Some(new Toucan(this))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
180
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
181
  override def addMultiSampleJobs() = {
182 183
    super.addMultiSampleJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
184
    addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta(), new File(outputDir, ".validate"))).getOrElse(Nil))
185

186
    multisampleVariantCalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
187
      vc.outputDir = new File(outputDir, "variantcalling")
188
      vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
189
      add(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
190

191
      annotation.foreach { toucan =>
Peter van 't Hof's avatar
Peter van 't Hof committed
192
        toucan.outputDir = new File(outputDir, "annotation")
193
        toucan.inputVcf = vc.finalFile
Peter van 't Hof's avatar
Peter van 't Hof committed
194
        add(toucan)
Peter van 't Hof's avatar
Peter van 't Hof committed
195
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
196
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
197

Peter van 't Hof's avatar
Peter van 't Hof committed
198
    svCalling.foreach { sv =>
Peter van 't Hof's avatar
Peter van 't Hof committed
199
      sv.outputDir = new File(outputDir, "sv_calling")
200
      sv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
Peter van 't Hof's avatar
Peter van 't Hof committed
201
      add(sv)
Peter van 't Hof's avatar
Peter van 't Hof committed
202 203 204 205 206 207 208
    }

    cnvCalling.foreach { cnv =>
      cnv.outputDir = new File(outputDir, "cnv_calling")
      cnv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
      add(cnv)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
209 210
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
211
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
212
  def summaryFile = new File(outputDir, "Shiva.summary.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
213

Peter van 't Hof's avatar
Peter van 't Hof committed
214
  /** Settings of pipeline for summary */
215
  override def summarySettings = super.summarySettings ++ Map(
216 217 218
    "annotation" -> annotation.isDefined,
    "multisample_variantcalling" -> multisampleVariantCalling.isDefined,
    "sv_calling" -> svCalling.isDefined,
219
    "cnv_calling" -> cnvCalling.isDefined,
220 221 222
    "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
    "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
  )
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258

  /** Adds indel realignment jobs */
  def addIndelRealign(inputBam: File, dir: File, isIntermediate: Boolean): File = {
    val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir)
    realignerTargetCreator.isIntermediate = true
    add(realignerTargetCreator)

    val indelRealigner = IndelRealigner(this, inputBam, realignerTargetCreator.out, dir)
    indelRealigner.isIntermediate = isIntermediate
    add(indelRealigner)

    indelRealigner.out
  }

  /** Adds base recalibration jobs */
  def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean): File = {
    val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal"))

    if (baseRecalibrator.knownSites.isEmpty) return inputBam
    add(baseRecalibrator)

    if (config("use_analyze_covariates", default = true).asBoolean) {
      val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after"))
      baseRecalibratorAfter.BQSR = Some(baseRecalibrator.out)
      add(baseRecalibratorAfter)

      add(AnalyzeCovariates(this, baseRecalibrator.out, baseRecalibratorAfter.out, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
    }

    val printReads = PrintReads(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
    printReads.BQSR = Some(baseRecalibrator.out)
    printReads.isIntermediate = isIntermediate
    add(printReads)

    printReads.out
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
259
}
Peter van 't Hof's avatar
Peter van 't Hof committed
260

Peter van 't Hof's avatar
Peter van 't Hof committed
261
/** This object give a default main method to the pipelines */
262 263 264 265 266
object Shiva extends PipelineCommand {

  // This is used to only execute 1 validation per vcf file
  private var validateVcfSeen: Set[(File, File)] = Set()

Peter van 't Hof's avatar
Peter van 't Hof committed
267
  def makeValidateVcfJobs(root: Configurable, vcfFile: File, referenceFile: File, outputDir: File): List[QFunction] = {
268 269 270 271 272 273
    if (validateVcfSeen.contains((vcfFile, referenceFile))) Nil
    else {
      validateVcfSeen ++= Set((vcfFile, referenceFile))
      val validateVcf = new ValidateVcf(root)
      validateVcf.inputVcf = vcfFile
      validateVcf.reference = referenceFile
Peter van 't Hof's avatar
Peter van 't Hof committed
274
      validateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".validateVcf.out")
275 276 277

      val checkValidateVcf = new CheckValidateVcf
      checkValidateVcf.inputLogFile = validateVcf.jobOutputFile
Peter van 't Hof's avatar
Peter van 't Hof committed
278
      checkValidateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".checkValidateVcf.out")
279 280 281 282 283

      List(validateVcf, checkValidateVcf)
    }
  }
}