BamMetrics.scala 8.05 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
18
package nl.lumc.sasc.biopet.pipelines.bammetrics

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
19

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.annotations.{ AnnotationRefFlat, RibosomalRefFlat }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.extensions.picard._
26
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
28
import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat
29
import nl.lumc.sasc.biopet.utils.intervals.BedCheck
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import org.broadinstitute.gatk.queue.QScript
31

32
33
34
class BamMetrics(val root: Configurable) extends QScript
  with SummaryQScript
  with SampleLibraryTag
35
  with Reference
36
37
38
  with TargetRegions
  with AnnotationRefFlat
  with RibosomalRefFlat {
39

40
41
42
43
44
  def this() = this(null)

  @Input(doc = "Bam File", shortName = "BAM", required = true)
  var inputBam: File = _

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  /** return location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
46
  def summaryFile = (sampleId, libId) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
47
48
49
    case (Some(s), Some(l)) => new File(outputDir, s + "-" + l + ".BamMetrics.summary.json")
    case (Some(s), _)       => new File(outputDir, s + ".BamMetrics.summary.json")
    case _                  => new File(outputDir, "BamMetrics.summary.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
50
51
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
52
  /** returns files to store in summary */
53
54
  def summaryFiles = Map("reference" -> referenceFasta(),
    "input_bam" -> inputBam) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
55
56
    ampliconBedFile.map("amplicon" -> _).toMap ++
    ampliconBedFile.map(x => "roi_" + x.getName.stripSuffix(".bed") -> x).toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
57

Peter van 't Hof's avatar
Peter van 't Hof committed
58
  /** return settings */
Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
  def summarySettings = Map("amplicon_name" -> ampliconBedFile.collect { case x => x.getName.stripSuffix(".bed") },
    "roi_name" -> roiBedFiles.map(_.getName.stripSuffix(".bed")))
Peter van 't Hof's avatar
Peter van 't Hof committed
61

62
63
64
65
66
67
68
  override def reportClass = {
    val bammetricsReport = new BammetricsReport(this)
    bammetricsReport.outputDir = new File(outputDir, "report")
    bammetricsReport.summaryFile = summaryFile
    bammetricsReport.args = if (libId.isDefined) Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
Peter van 't Hof's avatar
Peter van 't Hof committed
69
    else Map("sampleId" -> sampleId.getOrElse("."))
70
71
72
    Some(bammetricsReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
73
  /** executed before script */
74
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
75
    inputFiles :+= new InputFile(inputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
76
77
    ampliconBedFile.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
    roiBedFiles.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
78
79
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
80
  /** Script to add jobs */
81
  def biopetScript() {
82
    add(SamtoolsFlagstat(this, inputBam, outputDir))
83

84
    val biopetFlagstat = BiopetFlagstat(this, inputBam, outputDir)
85
86
87
    add(biopetFlagstat)
    addSummarizable(biopetFlagstat, "biopet_flagstat")

Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
90
91
    val multiMetrics = new CollectMultipleMetrics(this)
    multiMetrics.input = inputBam
    multiMetrics.outputName = new File(outputDir, inputBam.getName.stripSuffix(".bam"))
    add(multiMetrics)
92
    addSummarizable(multiMetrics, "multi_metrics")
93

94
95
96
    val gcBiasMetrics = CollectGcBiasMetrics(this, inputBam, outputDir)
    add(gcBiasMetrics)
    addSummarizable(gcBiasMetrics, "gc_bias")
97

98
    if (config("wgs_metrics", default = true)) {
bow's avatar
bow committed
99
100
101
102
103
104
      val wgsMetrics = new CollectWgsMetrics(this)
      wgsMetrics.input = inputBam
      wgsMetrics.output = swapExt(outputDir, inputBam, ".bam", ".wgs.metrics")
      add(wgsMetrics)
      addSummarizable(wgsMetrics, "wgs")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
105

106
    if (config("rna_metrics", default = false)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
107
108
109
110
      val rnaMetrics = new CollectRnaSeqMetrics(this)
      rnaMetrics.input = inputBam
      rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
      rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf"))
111
112
      rnaMetrics.refFlat = annotationRefFlat()
      rnaMetrics.ribosomalIntervals = ribosomalRefFlat()
Peter van 't Hof's avatar
Peter van 't Hof committed
113
      add(rnaMetrics)
114
      addSummarizable(rnaMetrics, "rna")
Peter van 't Hof's avatar
Peter van 't Hof committed
115
116
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
119
120
121
122
123
124
125
126
127
128
129
    case class Intervals(bed: File, intervals: File)

    // Create temp jobs to convert bed files to intervals lists
    val roiIntervals = roiBedFiles.map(roiBedFile => {
      val roiIntervals = swapExt(outputDir, roiBedFile, ".bed", ".intervals")
      val ampBedToInterval = BedToIntervalList(this, roiBedFile, roiIntervals)
      ampBedToInterval.isIntermediate = true
      add(ampBedToInterval)
      Intervals(roiBedFile, roiIntervals)
    })

    // Metrics that require a amplicon bed file
    val ampIntervals = ampliconBedFile.collect {
Peter van 't Hof's avatar
Peter van 't Hof committed
130
131
132
      case bedFile =>
        val ampIntervals = swapExt(outputDir, bedFile, ".bed", ".intervals")
        val ampBedToInterval = BedToIntervalList(this, bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
135
136
137
138
        ampBedToInterval.isIntermediate = true
        add(ampBedToInterval)

        val chsMetrics = CalculateHsMetrics(this, inputBam,
          List(ampIntervals), ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(chsMetrics)
139
        addSummarizable(chsMetrics, "hs_metrics")
140

Peter van 't Hof's avatar
Peter van 't Hof committed
141
142
143
        val pcrMetrics = CollectTargetedPcrMetrics(this, inputBam,
          ampIntervals, ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(pcrMetrics)
bow's avatar
bow committed
144
        addSummarizable(pcrMetrics, "targeted_pcr_metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
145

Peter van 't Hof's avatar
Peter van 't Hof committed
146
        Intervals(bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
147
148
149
150
    }

    // Create stats and coverage plot for each bed/interval file
    for (intervals <- roiIntervals ++ ampIntervals) {
151
152
      val targetName = intervals.bed.getName.stripSuffix(".bed")
      val targetDir = new File(outputDir, targetName)
Peter van 't Hof's avatar
Peter van 't Hof committed
153
154

      val biStrict = BedtoolsIntersect(this, inputBam, intervals.bed,
155
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.sam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
156
        minOverlap = config("strict_intersect_overlap", default = 1.0))
157
158
      val biopetFlagstatStrict = BiopetFlagstat(this, biStrict.output, targetDir)
      addSummarizable(biopetFlagstatStrict, targetName + "_biopet_flagstat_strict")
159
      add(new BiopetFifoPipe(this, List(biStrict, biopetFlagstatStrict)))
Peter van 't Hof's avatar
Peter van 't Hof committed
160
161

      val biLoose = BedtoolsIntersect(this, inputBam, intervals.bed,
162
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.sam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
163
        minOverlap = config("loose_intersect_overlap", default = 0.01))
164
165
      val biopetFlagstatLoose = BiopetFlagstat(this, biLoose.output, targetDir)
      addSummarizable(biopetFlagstatLoose, targetName + "_biopet_flagstat_loose")
166
      add(new BiopetFifoPipe(this, List(biLoose, biopetFlagstatLoose)))
167

168
      val bedCov = BedtoolsCoverage(this, intervals.bed, inputBam, depth = true)
169
      val covStats = CoverageStats(this, targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
170
171
      covStats.title = Some("Coverage Plot")
      covStats.subTitle = Some(s"for file '$targetName.bed'")
172
      add(bedCov | covStats)
Peter van 't Hof's avatar
Peter van 't Hof committed
173
      addSummarizable(covStats, targetName + "_cov_stats")
174
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
175

Peter van 't Hof's avatar
Peter van 't Hof committed
176
    addSummaryJobs()
177
178
179
180
  }
}

object BamMetrics extends PipelineCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
181
  /** Make default implementation of BamMetrics and runs script already */
Peter van 't Hof's avatar
Peter van 't Hof committed
182
183
184
185
  def apply(root: Configurable,
            bamFile: File, outputDir: File,
            sampleId: Option[String] = None,
            libId: Option[String] = None): BamMetrics = {
186
    val bamMetrics = new BamMetrics(root)
Peter van 't Hof's avatar
Peter van 't Hof committed
187
188
    bamMetrics.sampleId = sampleId
    bamMetrics.libId = libId
189
190
191
    bamMetrics.inputBam = bamFile
    bamMetrics.outputDir = outputDir

Peter van 't Hof's avatar
Peter van 't Hof committed
192
193
194
    bamMetrics.init()
    bamMetrics.biopetScript()
    bamMetrics
195
196
  }
}