BamMetrics.scala 8.28 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16 17
package nl.lumc.sasc.biopet.pipelines.bammetrics

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.annotations.{ AnnotationRefFlat, RibosomalRefFlat }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag }
Sander Bollen's avatar
Sander Bollen committed
22
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect, BedtoolsSort }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.picard._
24
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
25
import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat
Sander Bollen's avatar
Sander Bollen committed
26 27
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.utils.config.Configurable
28
import nl.lumc.sasc.biopet.utils.intervals.BedCheck
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import org.broadinstitute.gatk.queue.QScript
30

31 32 33
class BamMetrics(val root: Configurable) extends QScript
  with SummaryQScript
  with SampleLibraryTag
34
  with Reference
35 36 37
  with TargetRegions
  with AnnotationRefFlat
  with RibosomalRefFlat {
38

39 40 41 42 43
  def this() = this(null)

  @Input(doc = "Bam File", shortName = "BAM", required = true)
  var inputBam: File = _

44 45
  override def defaults = Map("bedtoolscoverage" -> Map("sorted" -> true))

Peter van 't Hof's avatar
Peter van 't Hof committed
46
  /** return location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
47
  def summaryFile = (sampleId, libId) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
48 49 50
    case (Some(s), Some(l)) => new File(outputDir, s + "-" + l + ".BamMetrics.summary.json")
    case (Some(s), _)       => new File(outputDir, s + ".BamMetrics.summary.json")
    case _                  => new File(outputDir, "BamMetrics.summary.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
51 52
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
53
  /** returns files to store in summary */
54 55
  def summaryFiles = Map("reference" -> referenceFasta(),
    "input_bam" -> inputBam) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
56 57
    ampliconBedFile.map("amplicon" -> _).toMap ++
    ampliconBedFile.map(x => "roi_" + x.getName.stripSuffix(".bed") -> x).toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
58

Peter van 't Hof's avatar
Peter van 't Hof committed
59
  /** return settings */
Peter van 't Hof's avatar
Peter van 't Hof committed
60 61
  def summarySettings = Map("amplicon_name" -> ampliconBedFile.collect { case x => x.getName.stripSuffix(".bed") },
    "roi_name" -> roiBedFiles.map(_.getName.stripSuffix(".bed")))
Peter van 't Hof's avatar
Peter van 't Hof committed
62

63 64 65 66 67 68 69
  override def reportClass = {
    val bammetricsReport = new BammetricsReport(this)
    bammetricsReport.outputDir = new File(outputDir, "report")
    bammetricsReport.summaryFile = summaryFile
    bammetricsReport.args = if (libId.isDefined) Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
Peter van 't Hof's avatar
Peter van 't Hof committed
70
    else Map("sampleId" -> sampleId.getOrElse("."))
71 72 73
    Some(bammetricsReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
74
  /** executed before script */
75
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
76
    inputFiles :+= new InputFile(inputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
77 78
    ampliconBedFile.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
    roiBedFiles.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
79 80
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
81
  /** Script to add jobs */
82
  def biopetScript() {
83
    add(SamtoolsFlagstat(this, inputBam, outputDir))
84

85
    val biopetFlagstat = BiopetFlagstat(this, inputBam, outputDir)
86 87 88
    add(biopetFlagstat)
    addSummarizable(biopetFlagstat, "biopet_flagstat")

Peter van 't Hof's avatar
Peter van 't Hof committed
89 90 91 92
    val multiMetrics = new CollectMultipleMetrics(this)
    multiMetrics.input = inputBam
    multiMetrics.outputName = new File(outputDir, inputBam.getName.stripSuffix(".bam"))
    add(multiMetrics)
93
    addSummarizable(multiMetrics, "multi_metrics")
94

95 96 97
    val gcBiasMetrics = CollectGcBiasMetrics(this, inputBam, outputDir)
    add(gcBiasMetrics)
    addSummarizable(gcBiasMetrics, "gc_bias")
98

99
    if (config("wgs_metrics", default = true)) {
bow's avatar
bow committed
100 101 102 103 104 105
      val wgsMetrics = new CollectWgsMetrics(this)
      wgsMetrics.input = inputBam
      wgsMetrics.output = swapExt(outputDir, inputBam, ".bam", ".wgs.metrics")
      add(wgsMetrics)
      addSummarizable(wgsMetrics, "wgs")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
106

107
    if (config("rna_metrics", default = false)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
108 109 110 111
      val rnaMetrics = new CollectRnaSeqMetrics(this)
      rnaMetrics.input = inputBam
      rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
      rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf"))
112 113
      rnaMetrics.refFlat = annotationRefFlat()
      rnaMetrics.ribosomalIntervals = ribosomalRefFlat()
Peter van 't Hof's avatar
Peter van 't Hof committed
114
      add(rnaMetrics)
115
      addSummarizable(rnaMetrics, "rna")
Peter van 't Hof's avatar
Peter van 't Hof committed
116 117
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
118 119 120 121 122 123 124 125 126 127 128 129 130
    case class Intervals(bed: File, intervals: File)

    // Create temp jobs to convert bed files to intervals lists
    val roiIntervals = roiBedFiles.map(roiBedFile => {
      val roiIntervals = swapExt(outputDir, roiBedFile, ".bed", ".intervals")
      val ampBedToInterval = BedToIntervalList(this, roiBedFile, roiIntervals)
      ampBedToInterval.isIntermediate = true
      add(ampBedToInterval)
      Intervals(roiBedFile, roiIntervals)
    })

    // Metrics that require a amplicon bed file
    val ampIntervals = ampliconBedFile.collect {
Peter van 't Hof's avatar
Peter van 't Hof committed
131 132 133
      case bedFile =>
        val ampIntervals = swapExt(outputDir, bedFile, ".bed", ".intervals")
        val ampBedToInterval = BedToIntervalList(this, bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
134 135 136 137 138 139
        ampBedToInterval.isIntermediate = true
        add(ampBedToInterval)

        val chsMetrics = CalculateHsMetrics(this, inputBam,
          List(ampIntervals), ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(chsMetrics)
140
        addSummarizable(chsMetrics, "hs_metrics")
141

Peter van 't Hof's avatar
Peter van 't Hof committed
142 143 144
        val pcrMetrics = CollectTargetedPcrMetrics(this, inputBam,
          ampIntervals, ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(pcrMetrics)
bow's avatar
bow committed
145
        addSummarizable(pcrMetrics, "targeted_pcr_metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
146

Peter van 't Hof's avatar
Peter van 't Hof committed
147
        Intervals(bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
148 149 150 151
    }

    // Create stats and coverage plot for each bed/interval file
    for (intervals <- roiIntervals ++ ampIntervals) {
152 153
      val targetName = intervals.bed.getName.stripSuffix(".bed")
      val targetDir = new File(outputDir, targetName)
Peter van 't Hof's avatar
Peter van 't Hof committed
154 155

      val biStrict = BedtoolsIntersect(this, inputBam, intervals.bed,
156
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.sam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
157
        minOverlap = config("strict_intersect_overlap", default = 1.0))
158 159
      val biopetFlagstatStrict = BiopetFlagstat(this, biStrict.output, targetDir)
      addSummarizable(biopetFlagstatStrict, targetName + "_biopet_flagstat_strict")
160
      add(new BiopetFifoPipe(this, List(biStrict, biopetFlagstatStrict)))
Peter van 't Hof's avatar
Peter van 't Hof committed
161 162

      val biLoose = BedtoolsIntersect(this, inputBam, intervals.bed,
163
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.sam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
164
        minOverlap = config("loose_intersect_overlap", default = 0.01))
165 166
      val biopetFlagstatLoose = BiopetFlagstat(this, biLoose.output, targetDir)
      addSummarizable(biopetFlagstatLoose, targetName + "_biopet_flagstat_loose")
167
      add(new BiopetFifoPipe(this, List(biLoose, biopetFlagstatLoose)))
168

Sander Bollen's avatar
Sander Bollen committed
169 170 171 172 173
      val sorter = new BedtoolsSort(this)
      sorter.input = intervals.bed
      sorter.output = swapExt(targetDir, intervals.bed, ".bed", ".sorted.bed")
      add(sorter)
      val bedCov = BedtoolsCoverage(this, sorter.output, inputBam, depth = true)
174
      val covStats = CoverageStats(this, targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
175 176
      covStats.title = Some("Coverage Plot")
      covStats.subTitle = Some(s"for file '$targetName.bed'")
177
      add(bedCov | covStats)
Peter van 't Hof's avatar
Peter van 't Hof committed
178
      addSummarizable(covStats, targetName + "_cov_stats")
179
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
180

Peter van 't Hof's avatar
Peter van 't Hof committed
181
    addSummaryJobs()
182 183 184 185
  }
}

object BamMetrics extends PipelineCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
186
  /** Make default implementation of BamMetrics and runs script already */
Peter van 't Hof's avatar
Peter van 't Hof committed
187 188 189 190
  def apply(root: Configurable,
            bamFile: File, outputDir: File,
            sampleId: Option[String] = None,
            libId: Option[String] = None): BamMetrics = {
191
    val bamMetrics = new BamMetrics(root)
Peter van 't Hof's avatar
Peter van 't Hof committed
192 193
    bamMetrics.sampleId = sampleId
    bamMetrics.libId = libId
194 195 196
    bamMetrics.inputBam = bamFile
    bamMetrics.outputDir = outputDir

Peter van 't Hof's avatar
Peter van 't Hof committed
197 198 199
    bamMetrics.init()
    bamMetrics.biopetScript()
    bamMetrics
200 201
  }
}