BamMetrics.scala 8.65 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
16
17
package nl.lumc.sasc.biopet.pipelines.bammetrics

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18

19
import nl.lumc.sasc.biopet.core.annotations.{AnnotationRefFlat, RibosomalRefFlat}
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
21
22
import nl.lumc.sasc.biopet.core.{BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag}
import nl.lumc.sasc.biopet.extensions.bedtools.{BedtoolsCoverage, BedtoolsIntersect, BedtoolsSort}
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.picard._
24
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
25
import nl.lumc.sasc.biopet.extensions.tools.{BamStats, BiopetFlagstat}
Sander Bollen's avatar
Sander Bollen committed
26
27
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.utils.config.Configurable
28
import nl.lumc.sasc.biopet.utils.intervals.BedCheck
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import org.broadinstitute.gatk.queue.QScript
30

31
32
33
class BamMetrics(val root: Configurable) extends QScript
  with SummaryQScript
  with SampleLibraryTag
34
  with Reference
35
36
37
  with TargetRegions
  with AnnotationRefFlat
  with RibosomalRefFlat {
38

39
40
41
42
43
  def this() = this(null)

  @Input(doc = "Bam File", shortName = "BAM", required = true)
  var inputBam: File = _

44
45
  override def defaults = Map("bedtoolscoverage" -> Map("sorted" -> true))

Peter van 't Hof's avatar
Peter van 't Hof committed
46
  /** return location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
47
  def summaryFile = (sampleId, libId) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
48
49
50
    case (Some(s), Some(l)) => new File(outputDir, s + "-" + l + ".BamMetrics.summary.json")
    case (Some(s), _)       => new File(outputDir, s + ".BamMetrics.summary.json")
    case _                  => new File(outputDir, "BamMetrics.summary.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
51
52
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
53
  /** returns files to store in summary */
54
55
  def summaryFiles = Map("reference" -> referenceFasta(),
    "input_bam" -> inputBam) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
56
57
    ampliconBedFile.map("amplicon" -> _).toMap ++
    ampliconBedFile.map(x => "roi_" + x.getName.stripSuffix(".bed") -> x).toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
58

Peter van 't Hof's avatar
Peter van 't Hof committed
59
  /** return settings */
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
  def summarySettings = Map("amplicon_name" -> ampliconBedFile.collect { case x => x.getName.stripSuffix(".bed") },
    "roi_name" -> roiBedFiles.map(_.getName.stripSuffix(".bed")))
Peter van 't Hof's avatar
Peter van 't Hof committed
62

63
64
65
66
67
68
69
  override def reportClass = {
    val bammetricsReport = new BammetricsReport(this)
    bammetricsReport.outputDir = new File(outputDir, "report")
    bammetricsReport.summaryFile = summaryFile
    bammetricsReport.args = if (libId.isDefined) Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
Peter van 't Hof's avatar
Peter van 't Hof committed
70
    else Map("sampleId" -> sampleId.getOrElse("."))
71
72
73
    Some(bammetricsReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
74
  /** executed before script */
75
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
76
    inputFiles :+= new InputFile(inputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
    ampliconBedFile.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
    roiBedFiles.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
79
80
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
81
  /** Script to add jobs */
82
  def biopetScript() {
83
    add(SamtoolsFlagstat(this, inputBam, outputDir))
84

85
    val biopetFlagstat = BiopetFlagstat(this, inputBam, outputDir)
86
87
88
    add(biopetFlagstat)
    addSummarizable(biopetFlagstat, "biopet_flagstat")

89
90
91
92
93
    val bamStats = new BamStats(this)
    bamStats.bamFile = inputBam
    bamStats.outputDir = new File(outputDir, "bamstats")
    add(bamStats)

Peter van 't Hof's avatar
Peter van 't Hof committed
94
95
96
97
    val multiMetrics = new CollectMultipleMetrics(this)
    multiMetrics.input = inputBam
    multiMetrics.outputName = new File(outputDir, inputBam.getName.stripSuffix(".bam"))
    add(multiMetrics)
98
    addSummarizable(multiMetrics, "multi_metrics")
99

100
101
102
    val gcBiasMetrics = CollectGcBiasMetrics(this, inputBam, outputDir)
    add(gcBiasMetrics)
    addSummarizable(gcBiasMetrics, "gc_bias")
103

104
    if (config("wgs_metrics", default = true)) {
bow's avatar
bow committed
105
106
107
108
109
110
      val wgsMetrics = new CollectWgsMetrics(this)
      wgsMetrics.input = inputBam
      wgsMetrics.output = swapExt(outputDir, inputBam, ".bam", ".wgs.metrics")
      add(wgsMetrics)
      addSummarizable(wgsMetrics, "wgs")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
111

112
    if (config("rna_metrics", default = false)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
115
116
      val rnaMetrics = new CollectRnaSeqMetrics(this)
      rnaMetrics.input = inputBam
      rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
      rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf"))
117
118
      rnaMetrics.refFlat = annotationRefFlat()
      rnaMetrics.ribosomalIntervals = ribosomalRefFlat()
Peter van 't Hof's avatar
Peter van 't Hof committed
119
      add(rnaMetrics)
120
      addSummarizable(rnaMetrics, "rna")
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
123
124
125
126
127
128
129
130
131
132
133
134
135
    case class Intervals(bed: File, intervals: File)

    // Create temp jobs to convert bed files to intervals lists
    val roiIntervals = roiBedFiles.map(roiBedFile => {
      val roiIntervals = swapExt(outputDir, roiBedFile, ".bed", ".intervals")
      val ampBedToInterval = BedToIntervalList(this, roiBedFile, roiIntervals)
      ampBedToInterval.isIntermediate = true
      add(ampBedToInterval)
      Intervals(roiBedFile, roiIntervals)
    })

    // Metrics that require a amplicon bed file
    val ampIntervals = ampliconBedFile.collect {
Peter van 't Hof's avatar
Peter van 't Hof committed
136
137
138
      case bedFile =>
        val ampIntervals = swapExt(outputDir, bedFile, ".bed", ".intervals")
        val ampBedToInterval = BedToIntervalList(this, bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
139
140
141
142
143
144
        ampBedToInterval.isIntermediate = true
        add(ampBedToInterval)

        val chsMetrics = CalculateHsMetrics(this, inputBam,
          List(ampIntervals), ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(chsMetrics)
145
        addSummarizable(chsMetrics, "hs_metrics")
146

Peter van 't Hof's avatar
Peter van 't Hof committed
147
148
149
        val pcrMetrics = CollectTargetedPcrMetrics(this, inputBam,
          ampIntervals, ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(pcrMetrics)
bow's avatar
bow committed
150
        addSummarizable(pcrMetrics, "targeted_pcr_metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
151

Peter van 't Hof's avatar
Peter van 't Hof committed
152
        Intervals(bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
153
154
155
156
    }

    // Create stats and coverage plot for each bed/interval file
    for (intervals <- roiIntervals ++ ampIntervals) {
157
158
      val targetName = intervals.bed.getName.stripSuffix(".bed")
      val targetDir = new File(outputDir, targetName)
Peter van 't Hof's avatar
Peter van 't Hof committed
159
160

      val biStrict = BedtoolsIntersect(this, inputBam, intervals.bed,
161
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.sam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
162
        minOverlap = config("strict_intersect_overlap", default = 1.0))
163
164
      val biopetFlagstatStrict = BiopetFlagstat(this, biStrict.output, targetDir)
      addSummarizable(biopetFlagstatStrict, targetName + "_biopet_flagstat_strict")
165
      add(new BiopetFifoPipe(this, List(biStrict, biopetFlagstatStrict)))
Peter van 't Hof's avatar
Peter van 't Hof committed
166
167

      val biLoose = BedtoolsIntersect(this, inputBam, intervals.bed,
168
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.sam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
169
        minOverlap = config("loose_intersect_overlap", default = 0.01))
170
171
      val biopetFlagstatLoose = BiopetFlagstat(this, biLoose.output, targetDir)
      addSummarizable(biopetFlagstatLoose, targetName + "_biopet_flagstat_loose")
172
      add(new BiopetFifoPipe(this, List(biLoose, biopetFlagstatLoose)))
173

Peter van 't Hof's avatar
Peter van 't Hof committed
174
175
176
177
178
179
180
181
182
      val sortedBed = BamMetrics.sortedbedCache.getOrElse(intervals.bed, {
        val sorter = new BedtoolsSort(this)
        sorter.input = intervals.bed
        sorter.output = swapExt(targetDir, intervals.bed, ".bed", ".sorted.bed")
        add(sorter)
        BamMetrics.sortedbedCache += intervals.bed -> sorter.output
        sorter.output
      })
      val bedCov = BedtoolsCoverage(this, sortedBed, inputBam, depth = true)
183
      val covStats = CoverageStats(this, targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
184
185
      covStats.title = Some("Coverage Plot")
      covStats.subTitle = Some(s"for file '$targetName.bed'")
186
      add(bedCov | covStats)
Peter van 't Hof's avatar
Peter van 't Hof committed
187
      addSummarizable(covStats, targetName + "_cov_stats")
188
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
189

Peter van 't Hof's avatar
Peter van 't Hof committed
190
    addSummaryJobs()
191
192
193
194
  }
}

object BamMetrics extends PipelineCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
195
  /** Make default implementation of BamMetrics and runs script already */
Peter van 't Hof's avatar
Peter van 't Hof committed
196
197
198
199
  def apply(root: Configurable,
            bamFile: File, outputDir: File,
            sampleId: Option[String] = None,
            libId: Option[String] = None): BamMetrics = {
200
    val bamMetrics = new BamMetrics(root)
Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
    bamMetrics.sampleId = sampleId
    bamMetrics.libId = libId
203
204
205
    bamMetrics.inputBam = bamFile
    bamMetrics.outputDir = outputDir

Peter van 't Hof's avatar
Peter van 't Hof committed
206
207
208
    bamMetrics.init()
    bamMetrics.biopetScript()
    bamMetrics
209
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
210
211

  private var sortedbedCache: Map[File, File] = Map()
212
}