BamMetrics.scala 8.97 KB
Newer Older
1
/**
2 3 4 5 6 7 8 9 10 11 12 13 14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
15 16 17
package nl.lumc.sasc.biopet.pipelines.bammetrics

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18

19
import nl.lumc.sasc.biopet.core.annotations.{AnnotationRefFlat, RibosomalRefFlat}
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
21 22
import nl.lumc.sasc.biopet.core.{BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag}
import nl.lumc.sasc.biopet.extensions.bedtools.{BedtoolsCoverage, BedtoolsIntersect, BedtoolsSort}
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.picard._
24
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
25
import nl.lumc.sasc.biopet.extensions.tools.{BamStats, BiopetFlagstat}
Sander Bollen's avatar
Sander Bollen committed
26 27
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.utils.config.Configurable
28
import nl.lumc.sasc.biopet.utils.intervals.BedCheck
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import org.broadinstitute.gatk.queue.QScript
30

31 32 33 34 35 36 37 38
class BamMetrics(val parent: Configurable)
    extends QScript
    with SummaryQScript
    with SampleLibraryTag
    with Reference
    with TargetRegions
    with AnnotationRefFlat
    with RibosomalRefFlat {
39

40 41 42 43 44
  def this() = this(null)

  @Input(doc = "Bam File", shortName = "BAM", required = true)
  var inputBam: File = _

45 46
  override def defaults = Map("bedtoolscoverage" -> Map("sorted" -> true))

Peter van 't Hof's avatar
Peter van 't Hof committed
47
  /** returns files to store in summary */
48 49 50 51
  def summaryFiles =
    Map("reference" -> referenceFasta(), "input_bam" -> inputBam) ++
      ampliconBedFile.map("amplicon" -> _).toMap ++
      ampliconBedFile.map(x => "roi_" + x.getName.stripSuffix(".bed") -> x).toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
52

Peter van 't Hof's avatar
Peter van 't Hof committed
53
  /** return settings */
54 55 56
  def summarySettings =
    Map("amplicon_name" -> ampliconBedFile.collect { case x => x.getName.stripSuffix(".bed") },
        "roi_name" -> roiBedFiles.map(_.getName.stripSuffix(".bed")))
Peter van 't Hof's avatar
Peter van 't Hof committed
57

58 59 60
  override def reportClass = {
    val bammetricsReport = new BammetricsReport(this)
    bammetricsReport.outputDir = new File(outputDir, "report")
61
    bammetricsReport.summaryDbFile = summaryDbFile
62 63 64 65
    bammetricsReport.args =
      if (libId.isDefined)
        Map("sampleId" -> sampleId.getOrElse("."), "libId" -> libId.getOrElse("."))
      else Map("sampleId" -> sampleId.getOrElse("."))
66 67 68
    Some(bammetricsReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
69
  /** executed before script */
70
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
71
    inputFiles :+= new InputFile(inputBam)
72 73
    ampliconBedFile.foreach(
      BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
Peter van 't Hof's avatar
Peter van 't Hof committed
74
    roiBedFiles.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
75 76
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
77
  /** Script to add jobs */
78
  def biopetScript() {
79
    add(SamtoolsFlagstat(this, inputBam, outputDir))
80

81 82 83 84
    val bamStats = new BamStats(this)
    bamStats.bamFile = inputBam
    bamStats.outputDir = new File(outputDir, "bamstats")
    add(bamStats)
Peter van 't Hof's avatar
Peter van 't Hof committed
85
    addSummarizable(bamStats, "bamstats")
86

Peter van 't Hof's avatar
Peter van 't Hof committed
87 88 89 90
    val multiMetrics = new CollectMultipleMetrics(this)
    multiMetrics.input = inputBam
    multiMetrics.outputName = new File(outputDir, inputBam.getName.stripSuffix(".bam"))
    add(multiMetrics)
91
    addSummarizable(multiMetrics, "multi_metrics")
92

93 94 95
    val gcBiasMetrics = CollectGcBiasMetrics(this, inputBam, outputDir)
    add(gcBiasMetrics)
    addSummarizable(gcBiasMetrics, "gc_bias")
96

97
    if (config("wgs_metrics", default = true)) {
bow's avatar
bow committed
98 99 100 101 102 103
      val wgsMetrics = new CollectWgsMetrics(this)
      wgsMetrics.input = inputBam
      wgsMetrics.output = swapExt(outputDir, inputBam, ".bam", ".wgs.metrics")
      add(wgsMetrics)
      addSummarizable(wgsMetrics, "wgs")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
104

105
    if (config("rna_metrics", default = false)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
106 107 108 109
      val rnaMetrics = new CollectRnaSeqMetrics(this)
      rnaMetrics.input = inputBam
      rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
      rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf"))
110 111
      rnaMetrics.refFlat = annotationRefFlat()
      rnaMetrics.ribosomalIntervals = ribosomalRefFlat()
Peter van 't Hof's avatar
Peter van 't Hof committed
112
      add(rnaMetrics)
113
      addSummarizable(rnaMetrics, "rna")
Peter van 't Hof's avatar
Peter van 't Hof committed
114 115
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
116 117 118 119 120 121 122 123 124 125 126 127 128
    case class Intervals(bed: File, intervals: File)

    // Create temp jobs to convert bed files to intervals lists
    val roiIntervals = roiBedFiles.map(roiBedFile => {
      val roiIntervals = swapExt(outputDir, roiBedFile, ".bed", ".intervals")
      val ampBedToInterval = BedToIntervalList(this, roiBedFile, roiIntervals)
      ampBedToInterval.isIntermediate = true
      add(ampBedToInterval)
      Intervals(roiBedFile, roiIntervals)
    })

    // Metrics that require a amplicon bed file
    val ampIntervals = ampliconBedFile.collect {
Peter van 't Hof's avatar
Peter van 't Hof committed
129 130 131
      case bedFile =>
        val ampIntervals = swapExt(outputDir, bedFile, ".bed", ".intervals")
        val ampBedToInterval = BedToIntervalList(this, bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
132 133 134
        ampBedToInterval.isIntermediate = true
        add(ampBedToInterval)

135 136 137 138 139
        val chsMetrics = CollectHsMetrics(this,
                                          inputBam,
                                          List(ampIntervals),
                                          ampIntervals :: roiIntervals.map(_.intervals),
                                          outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
140
        add(chsMetrics)
141
        addSummarizable(chsMetrics, "hs_metrics")
142

143 144 145 146 147
        val pcrMetrics = CollectTargetedPcrMetrics(this,
                                                   inputBam,
                                                   ampIntervals,
                                                   ampIntervals :: roiIntervals.map(_.intervals),
                                                   outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
148
        add(pcrMetrics)
bow's avatar
bow committed
149
        addSummarizable(pcrMetrics, "targeted_pcr_metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
150

Peter van 't Hof's avatar
Peter van 't Hof committed
151
        Intervals(bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
152 153 154
    }

    // Create stats and coverage plot for each bed/interval file
155 156
    val allIntervalNames = (roiIntervals ++ ampIntervals).map(_.bed.getName)
    if (allIntervalNames.size != allIntervalNames.toSet.size) {
157 158
      logger.warn(
        "There are multiple region files with the same name. Metric values might get overwritten")
159
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
160
    for (intervals <- roiIntervals ++ ampIntervals) {
161 162
      val targetName = intervals.bed.getName.stripSuffix(".bed")
      val targetDir = new File(outputDir, targetName)
Peter van 't Hof's avatar
Peter van 't Hof committed
163

164 165 166 167
      val biStrict = BedtoolsIntersect(
        this,
        inputBam,
        intervals.bed,
168
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.sam"),
169 170
        minOverlap = config("strict_intersect_overlap", default = 1.0)
      )
171
      val biopetFlagstatStrict = BiopetFlagstat(this, biStrict.output, targetDir)
172
      addSummarizable(biopetFlagstatStrict, targetName + "_flagstats_strict")
173
      add(new BiopetFifoPipe(this, List(biStrict, biopetFlagstatStrict)))
Peter van 't Hof's avatar
Peter van 't Hof committed
174

175 176 177 178
      val biLoose = BedtoolsIntersect(
        this,
        inputBam,
        intervals.bed,
179
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.sam"),
180 181
        minOverlap = config("loose_intersect_overlap", default = 0.01)
      )
182
      val biopetFlagstatLoose = BiopetFlagstat(this, biLoose.output, targetDir)
183
      addSummarizable(biopetFlagstatLoose, targetName + "_flagstats_loose")
184
      add(new BiopetFifoPipe(this, List(biLoose, biopetFlagstatLoose)))
185

186 187 188 189 190 191 192 193 194 195
      val sortedBed = BamMetrics.sortedbedCache.getOrElse(
        intervals.bed, {
          val sorter = new BedtoolsSort(this)
          sorter.input = intervals.bed
          sorter.output = swapExt(targetDir, intervals.bed, ".bed", ".sorted.bed")
          add(sorter)
          BamMetrics.sortedbedCache += intervals.bed -> sorter.output
          sorter.output
        }
      )
Peter van 't Hof's avatar
Peter van 't Hof committed
196
      val bedCov = BedtoolsCoverage(this, sortedBed, inputBam, depth = true)
197 198
      val covStats =
        CoverageStats(this, targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
199 200
      covStats.title = Some("Coverage Plot")
      covStats.subTitle = Some(s"for file '$targetName.bed'")
201
      add(bedCov | covStats)
Peter van 't Hof's avatar
Peter van 't Hof committed
202
      addSummarizable(covStats, targetName + "_cov_stats")
203
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
204

Peter van 't Hof's avatar
Peter van 't Hof committed
205
    addSummaryJobs()
206 207 208 209
  }
}

object BamMetrics extends PipelineCommand {
210

Peter van 't Hof's avatar
Peter van 't Hof committed
211
  /** Make default implementation of BamMetrics and runs script already */
Peter van 't Hof's avatar
Peter van 't Hof committed
212
  def apply(root: Configurable,
213 214
            bamFile: File,
            outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
215 216
            sampleId: Option[String] = None,
            libId: Option[String] = None): BamMetrics = {
217
    val bamMetrics = new BamMetrics(root)
Peter van 't Hof's avatar
Peter van 't Hof committed
218 219
    bamMetrics.sampleId = sampleId
    bamMetrics.libId = libId
220 221 222
    bamMetrics.inputBam = bamFile
    bamMetrics.outputDir = outputDir

Peter van 't Hof's avatar
Peter van 't Hof committed
223 224 225
    bamMetrics.init()
    bamMetrics.biopetScript()
    bamMetrics
226
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
227 228

  private var sortedbedCache: Map[File, File] = Map()
229
}