BamMetrics.scala 8.02 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
16
17
package nl.lumc.sasc.biopet.pipelines.bammetrics

import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.annotations.{ AnnotationRefFlat, RibosomalRefFlat }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
22
23
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.picard._
25
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
27
import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat
28
import nl.lumc.sasc.biopet.utils.intervals.BedCheck
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import org.broadinstitute.gatk.queue.QScript
30

31
32
33
class BamMetrics(val root: Configurable) extends QScript
  with SummaryQScript
  with SampleLibraryTag
34
  with Reference
35
36
37
  with TargetRegions
  with AnnotationRefFlat
  with RibosomalRefFlat {
38

39
40
41
42
43
  def this() = this(null)

  @Input(doc = "Bam File", shortName = "BAM", required = true)
  var inputBam: File = _

Peter van 't Hof's avatar
Peter van 't Hof committed
44
  /** return location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
45
  def summaryFile = (sampleId, libId) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
46
47
48
    case (Some(s), Some(l)) => new File(outputDir, s + "-" + l + ".BamMetrics.summary.json")
    case (Some(s), _)       => new File(outputDir, s + ".BamMetrics.summary.json")
    case _                  => new File(outputDir, "BamMetrics.summary.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
49
50
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
51
  /** returns files to store in summary */
52
53
  def summaryFiles = Map("reference" -> referenceFasta(),
    "input_bam" -> inputBam) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
    ampliconBedFile.map("amplicon" -> _).toMap ++
    ampliconBedFile.map(x => "roi_" + x.getName.stripSuffix(".bed") -> x).toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57
  /** return settings */
Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
  def summarySettings = Map("amplicon_name" -> ampliconBedFile.collect { case x => x.getName.stripSuffix(".bed") },
    "roi_name" -> roiBedFiles.map(_.getName.stripSuffix(".bed")))
Peter van 't Hof's avatar
Peter van 't Hof committed
60

61
62
63
64
65
66
67
  override def reportClass = {
    val bammetricsReport = new BammetricsReport(this)
    bammetricsReport.outputDir = new File(outputDir, "report")
    bammetricsReport.summaryFile = summaryFile
    bammetricsReport.args = if (libId.isDefined) Map(
      "sampleId" -> sampleId.getOrElse("."),
      "libId" -> libId.getOrElse("."))
Peter van 't Hof's avatar
Peter van 't Hof committed
68
    else Map("sampleId" -> sampleId.getOrElse("."))
69
70
71
    Some(bammetricsReport)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
72
  /** executed before script */
73
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
74
    inputFiles :+= new InputFile(inputBam)
Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
    ampliconBedFile.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
    roiBedFiles.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
77
78
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
79
  /** Script to add jobs */
80
  def biopetScript() {
81
    add(SamtoolsFlagstat(this, inputBam, outputDir))
82

83
    val biopetFlagstat = BiopetFlagstat(this, inputBam, outputDir)
84
85
86
    add(biopetFlagstat)
    addSummarizable(biopetFlagstat, "biopet_flagstat")

Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
89
90
    val multiMetrics = new CollectMultipleMetrics(this)
    multiMetrics.input = inputBam
    multiMetrics.outputName = new File(outputDir, inputBam.getName.stripSuffix(".bam"))
    add(multiMetrics)
91
    addSummarizable(multiMetrics, "multi_metrics")
92

93
94
95
    val gcBiasMetrics = CollectGcBiasMetrics(this, inputBam, outputDir)
    add(gcBiasMetrics)
    addSummarizable(gcBiasMetrics, "gc_bias")
96

97
    if (config("wgs_metrics", default = true)) {
bow's avatar
bow committed
98
99
100
101
102
103
      val wgsMetrics = new CollectWgsMetrics(this)
      wgsMetrics.input = inputBam
      wgsMetrics.output = swapExt(outputDir, inputBam, ".bam", ".wgs.metrics")
      add(wgsMetrics)
      addSummarizable(wgsMetrics, "wgs")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
104

105
    if (config("rna_metrics", default = false)) {
Peter van 't Hof's avatar
Peter van 't Hof committed
106
107
108
109
      val rnaMetrics = new CollectRnaSeqMetrics(this)
      rnaMetrics.input = inputBam
      rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
      rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf"))
110
111
      rnaMetrics.refFlat = annotationRefFlat()
      rnaMetrics.ribosomalIntervals = ribosomalRefFlat()
Peter van 't Hof's avatar
Peter van 't Hof committed
112
      add(rnaMetrics)
113
      addSummarizable(rnaMetrics, "rna")
Peter van 't Hof's avatar
Peter van 't Hof committed
114
115
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
116
117
118
119
120
121
122
123
124
125
126
127
128
    case class Intervals(bed: File, intervals: File)

    // Create temp jobs to convert bed files to intervals lists
    val roiIntervals = roiBedFiles.map(roiBedFile => {
      val roiIntervals = swapExt(outputDir, roiBedFile, ".bed", ".intervals")
      val ampBedToInterval = BedToIntervalList(this, roiBedFile, roiIntervals)
      ampBedToInterval.isIntermediate = true
      add(ampBedToInterval)
      Intervals(roiBedFile, roiIntervals)
    })

    // Metrics that require a amplicon bed file
    val ampIntervals = ampliconBedFile.collect {
Peter van 't Hof's avatar
Peter van 't Hof committed
129
130
131
      case bedFile =>
        val ampIntervals = swapExt(outputDir, bedFile, ".bed", ".intervals")
        val ampBedToInterval = BedToIntervalList(this, bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
132
133
134
135
136
137
        ampBedToInterval.isIntermediate = true
        add(ampBedToInterval)

        val chsMetrics = CalculateHsMetrics(this, inputBam,
          List(ampIntervals), ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(chsMetrics)
138
        addSummarizable(chsMetrics, "hs_metrics")
139

Peter van 't Hof's avatar
Peter van 't Hof committed
140
141
142
        val pcrMetrics = CollectTargetedPcrMetrics(this, inputBam,
          ampIntervals, ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(pcrMetrics)
bow's avatar
bow committed
143
        addSummarizable(pcrMetrics, "targeted_pcr_metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
144

Peter van 't Hof's avatar
Peter van 't Hof committed
145
        Intervals(bedFile, ampIntervals)
Peter van 't Hof's avatar
Peter van 't Hof committed
146
147
148
149
    }

    // Create stats and coverage plot for each bed/interval file
    for (intervals <- roiIntervals ++ ampIntervals) {
150
151
      val targetName = intervals.bed.getName.stripSuffix(".bed")
      val targetDir = new File(outputDir, targetName)
Peter van 't Hof's avatar
Peter van 't Hof committed
152
153

      val biStrict = BedtoolsIntersect(this, inputBam, intervals.bed,
154
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.sam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
155
        minOverlap = config("strict_intersect_overlap", default = 1.0))
156
157
      val biopetFlagstatStrict = BiopetFlagstat(this, biStrict.output, targetDir)
      addSummarizable(biopetFlagstatStrict, targetName + "_biopet_flagstat_strict")
158
      add(new BiopetFifoPipe(this, List(biStrict, biopetFlagstatStrict)))
Peter van 't Hof's avatar
Peter van 't Hof committed
159
160

      val biLoose = BedtoolsIntersect(this, inputBam, intervals.bed,
161
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.sam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
162
        minOverlap = config("loose_intersect_overlap", default = 0.01))
163
164
      val biopetFlagstatLoose = BiopetFlagstat(this, biLoose.output, targetDir)
      addSummarizable(biopetFlagstatLoose, targetName + "_biopet_flagstat_loose")
165
      add(new BiopetFifoPipe(this, List(biLoose, biopetFlagstatLoose)))
166

167
      val bedCov = BedtoolsCoverage(this, intervals.bed, inputBam, depth = true)
168
      val covStats = CoverageStats(this, targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
169
170
      covStats.title = Some("Coverage Plot")
      covStats.subTitle = Some(s"for file '$targetName.bed'")
171
      add(bedCov | covStats)
Peter van 't Hof's avatar
Peter van 't Hof committed
172
      addSummarizable(covStats, targetName + "_cov_stats")
173
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
174

Peter van 't Hof's avatar
Peter van 't Hof committed
175
    addSummaryJobs()
176
177
178
179
  }
}

object BamMetrics extends PipelineCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
180
  /** Make default implementation of BamMetrics and runs script already */
Peter van 't Hof's avatar
Peter van 't Hof committed
181
182
183
184
  def apply(root: Configurable,
            bamFile: File, outputDir: File,
            sampleId: Option[String] = None,
            libId: Option[String] = None): BamMetrics = {
185
    val bamMetrics = new BamMetrics(root)
Peter van 't Hof's avatar
Peter van 't Hof committed
186
187
    bamMetrics.sampleId = sampleId
    bamMetrics.libId = libId
188
189
190
    bamMetrics.inputBam = bamFile
    bamMetrics.outputDir = outputDir

Peter van 't Hof's avatar
Peter van 't Hof committed
191
192
193
    bamMetrics.init()
    bamMetrics.biopetScript()
    bamMetrics
194
195
  }
}