BamMetrics.scala 8.46 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16 17
package nl.lumc.sasc.biopet.pipelines.bammetrics

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.annotations.{ AnnotationRefFlat, RibosomalRefFlat }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
21 22
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect, BedtoolsSort }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.picard._
24
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.extensions.tools.{ BamStats, BiopetFlagstat }
Sander Bollen's avatar
Sander Bollen committed
26 27
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.utils.config.Configurable
28
import nl.lumc.sasc.biopet.utils.intervals.BedCheck
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import org.broadinstitute.gatk.queue.QScript
30

Peter van 't Hof's avatar
Peter van 't Hof committed
31
class BamMetrics(val parent: Configurable) extends QScript
32 33
  with SummaryQScript
  with SampleLibraryTag
34
  with Reference
35 36 37
  with TargetRegions
  with AnnotationRefFlat
  with RibosomalRefFlat {
38

39 40 41 42 43
  def this() = this(null)

  @Input(doc = "Bam File", shortName = "BAM", required = true)
  var inputBam: File = _

44 45
  override def defaults = Map("bedtoolscoverage" -> Map("sorted" -> true))

Peter van 't Hof's avatar
Peter van 't Hof committed
46
  /** returns files to store in summary */
47 48
  def summaryFiles = Map("reference" -> referenceFasta(),
    "input_bam" -> inputBam) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
49 50
    ampliconBedFile.map("amplicon" -> _).toMap ++
    ampliconBedFile.map(x => "roi_" + x.getName.stripSuffix(".bed") -> x).toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
51

Peter van 't Hof's avatar
Peter van 't Hof committed
52
  /** return settings */
Peter van 't Hof's avatar
Peter van 't Hof committed
53 54
  def summarySettings = Map("amplicon_name" -> ampliconBedFile.collect { case x => x.getName.stripSuffix(".bed") },
    "roi_name" -> roiBedFiles.map(_.getName.stripSuffix(".bed")))
Peter van 't Hof's avatar
Peter van 't Hof committed
55

56 57 58
  override def reportClass = {
    val bammetricsReport = new BammetricsReport(this)
    bammetricsReport.outputDir = new File(outputDir, "report")
59
    bammetricsReport.summaryDbFile = summaryDbFile
60 61 62
    bammetricsReport.args = if (libId.isDefined) Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
Peter van 't Hof's avatar
Peter van 't Hof committed
63
    else Map("sampleId" -> sampleId.getOrElse("."))
64 65 66
    Some(bammetricsReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
67
  /** executed before script */
68
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
69
    inputFiles :+= new InputFile(inputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
70 71
    ampliconBedFile.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
    roiBedFiles.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
72 73
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
74
  /** Script to add jobs */
75
  def biopetScript() {
76
    add(SamtoolsFlagstat(this, inputBam, outputDir))
77

78 79 80 81
    val bamStats = new BamStats(this)
    bamStats.bamFile = inputBam
    bamStats.outputDir = new File(outputDir, "bamstats")
    add(bamStats)
Peter van 't Hof's avatar
Peter van 't Hof committed
82
    addSummarizable(bamStats, "bamstats")
83

Peter van 't Hof's avatar
Peter van 't Hof committed
84 85 86 87
    val multiMetrics = new CollectMultipleMetrics(this)
    multiMetrics.input = inputBam
    multiMetrics.outputName = new File(outputDir, inputBam.getName.stripSuffix(".bam"))
    add(multiMetrics)
88
    addSummarizable(multiMetrics, "multi_metrics")
89

90 91 92
    val gcBiasMetrics = CollectGcBiasMetrics(this, inputBam, outputDir)
    add(gcBiasMetrics)
    addSummarizable(gcBiasMetrics, "gc_bias")
93

94
    if (config("wgs_metrics", default = true)) {
bow's avatar
bow committed
95 96 97 98 99 100
      val wgsMetrics = new CollectWgsMetrics(this)
      wgsMetrics.input = inputBam
      wgsMetrics.output = swapExt(outputDir, inputBam, ".bam", ".wgs.metrics")
      add(wgsMetrics)
      addSummarizable(wgsMetrics, "wgs")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
101

102
    if (config("rna_metrics", default = false)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
103 104 105 106
      val rnaMetrics = new CollectRnaSeqMetrics(this)
      rnaMetrics.input = inputBam
      rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
      rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf"))
107 108
      rnaMetrics.refFlat = annotationRefFlat()
      rnaMetrics.ribosomalIntervals = ribosomalRefFlat()
Peter van 't Hof's avatar
Peter van 't Hof committed
109
      add(rnaMetrics)
110
      addSummarizable(rnaMetrics, "rna")
Peter van 't Hof's avatar
Peter van 't Hof committed
111 112
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
113 114 115 116 117 118 119 120 121 122 123 124 125
    case class Intervals(bed: File, intervals: File)

    // Create temp jobs to convert bed files to intervals lists
    val roiIntervals = roiBedFiles.map(roiBedFile => {
      val roiIntervals = swapExt(outputDir, roiBedFile, ".bed", ".intervals")
      val ampBedToInterval = BedToIntervalList(this, roiBedFile, roiIntervals)
      ampBedToInterval.isIntermediate = true
      add(ampBedToInterval)
      Intervals(roiBedFile, roiIntervals)
    })

    // Metrics that require a amplicon bed file
    val ampIntervals = ampliconBedFile.collect {
Peter van 't Hof's avatar
Peter van 't Hof committed
126 127 128
      case bedFile =>
        val ampIntervals = swapExt(outputDir, bedFile, ".bed", ".intervals")
        val ampBedToInterval = BedToIntervalList(this, bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
129 130 131
        ampBedToInterval.isIntermediate = true
        add(ampBedToInterval)

132
        val chsMetrics = CollectHsMetrics(this, inputBam,
Peter van 't Hof's avatar
Peter van 't Hof committed
133 134
          List(ampIntervals), ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(chsMetrics)
135
        addSummarizable(chsMetrics, "hs_metrics")
136

Peter van 't Hof's avatar
Peter van 't Hof committed
137 138 139
        val pcrMetrics = CollectTargetedPcrMetrics(this, inputBam,
          ampIntervals, ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(pcrMetrics)
bow's avatar
bow committed
140
        addSummarizable(pcrMetrics, "targeted_pcr_metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
141

Peter van 't Hof's avatar
Peter van 't Hof committed
142
        Intervals(bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
143 144 145
    }

    // Create stats and coverage plot for each bed/interval file
146 147 148 149
    val allIntervalNames = (roiIntervals ++ ampIntervals).map(_.bed.getName)
    if (allIntervalNames.size != allIntervalNames.toSet.size) {
      logger.warn("There are multiple region files with the same name. Metric values might get overwritten")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
150
    for (intervals <- roiIntervals ++ ampIntervals) {
151 152
      val targetName = intervals.bed.getName.stripSuffix(".bed")
      val targetDir = new File(outputDir, targetName)
Peter van 't Hof's avatar
Peter van 't Hof committed
153 154

      val biStrict = BedtoolsIntersect(this, inputBam, intervals.bed,
155
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.sam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
156
        minOverlap = config("strict_intersect_overlap", default = 1.0))
157
      val biopetFlagstatStrict = BiopetFlagstat(this, biStrict.output, targetDir)
158
      addSummarizable(biopetFlagstatStrict, targetName + "_flagstats_strict")
159
      add(new BiopetFifoPipe(this, List(biStrict, biopetFlagstatStrict)))
Peter van 't Hof's avatar
Peter van 't Hof committed
160 161

      val biLoose = BedtoolsIntersect(this, inputBam, intervals.bed,
162
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.sam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
163
        minOverlap = config("loose_intersect_overlap", default = 0.01))
164
      val biopetFlagstatLoose = BiopetFlagstat(this, biLoose.output, targetDir)
165
      addSummarizable(biopetFlagstatLoose, targetName + "_flagstats_loose")
166
      add(new BiopetFifoPipe(this, List(biLoose, biopetFlagstatLoose)))
167

Peter van 't Hof's avatar
Peter van 't Hof committed
168 169 170 171 172 173 174 175 176
      val sortedBed = BamMetrics.sortedbedCache.getOrElse(intervals.bed, {
        val sorter = new BedtoolsSort(this)
        sorter.input = intervals.bed
        sorter.output = swapExt(targetDir, intervals.bed, ".bed", ".sorted.bed")
        add(sorter)
        BamMetrics.sortedbedCache += intervals.bed -> sorter.output
        sorter.output
      })
      val bedCov = BedtoolsCoverage(this, sortedBed, inputBam, depth = true)
177
      val covStats = CoverageStats(this, targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
178 179
      covStats.title = Some("Coverage Plot")
      covStats.subTitle = Some(s"for file '$targetName.bed'")
180
      add(bedCov | covStats)
Peter van 't Hof's avatar
Peter van 't Hof committed
181
      addSummarizable(covStats, targetName + "_cov_stats")
182
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
183

Peter van 't Hof's avatar
Peter van 't Hof committed
184
    addSummaryJobs()
185 186 187 188
  }
}

object BamMetrics extends PipelineCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
189
  /** Make default implementation of BamMetrics and runs script already */
Peter van 't Hof's avatar
Peter van 't Hof committed
190 191 192 193
  def apply(root: Configurable,
            bamFile: File, outputDir: File,
            sampleId: Option[String] = None,
            libId: Option[String] = None): BamMetrics = {
194
    val bamMetrics = new BamMetrics(root)
Peter van 't Hof's avatar
Peter van 't Hof committed
195 196
    bamMetrics.sampleId = sampleId
    bamMetrics.libId = libId
197 198 199
    bamMetrics.inputBam = bamFile
    bamMetrics.outputDir = outputDir

Peter van 't Hof's avatar
Peter van 't Hof committed
200 201 202
    bamMetrics.init()
    bamMetrics.biopetScript()
    bamMetrics
203
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
204 205

  private var sortedbedCache: Map[File, File] = Map()
206
}