BamMetrics.scala 7.78 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
19
20
import nl.lumc.sasc.biopet.scripts.CoverageStats
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.{ SampleLibraryTag, PipelineCommand }
22
import java.io.File
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.tools.BiopetFlagstat
24
25
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect }
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.extensions.picard._
27
28
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat

Peter van 't Hof's avatar
Peter van 't Hof committed
29
class BamMetrics(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
30
31
32
33
34
  def this() = this(null)

  @Input(doc = "Bam File", shortName = "BAM", required = true)
  var inputBam: File = _

Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
  /** Bed files for region of interests */
  var roiBedFiles: List[File] = config("regions_of_interest", Nil)
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
39
  /** Bed of amplicon that is used */
  var ampliconBedFile: Option[File] = config("amplicon_bed")
40

41
42
43
  /** Settings for CollectRnaSeqMetrics */
  var rnaMetricsSettings: Map[String, String] = Map()
  var transcriptRefFlatFile: Option[File] = config("transcript_refflat")
Peter van 't Hof's avatar
Peter van 't Hof committed
44

Peter van 't Hof's avatar
Peter van 't Hof committed
45
  /** return location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
46
47
48
49
50
51
  def summaryFile = (sampleId, libId) match {
    case (Some(sampleId), Some(libId)) => new File(outputDir, sampleId + "-" + libId + ".BamMetrics.summary.json")
    case (Some(sampleId), _)           => new File(outputDir, sampleId + ".BamMetrics.summary.json")
    case _                             => new File(outputDir, "BamMetrics.summary.json")
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
52
  /** returns files to store in summary */
Peter van 't Hof's avatar
Peter van 't Hof committed
53
54
55
  def summaryFiles = Map("input_bam" -> inputBam) ++
    ampliconBedFile.map("amplicon" -> _).toMap ++
    ampliconBedFile.map(x => "roi_" + x.getName.stripSuffix(".bed") -> x).toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
56

Peter van 't Hof's avatar
Peter van 't Hof committed
57
  /** return settings */
Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
  def summarySettings = Map("amplicon_name" -> ampliconBedFile.collect { case x => x.getName.stripSuffix(".bed") },
    "roi_name" -> roiBedFiles.map(_.getName.stripSuffix(".bed")))
Peter van 't Hof's avatar
Peter van 't Hof committed
60

Peter van 't Hof's avatar
Peter van 't Hof committed
61
  /** executed before script */
62
63
64
  def init() {
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
65
  /** Script to add jobs */
66
  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
67
    add(SamtoolsFlagstat(this, inputBam, swapExt(outputDir, inputBam, ".bam", ".flagstat")))
68

69
    val biopetFlagstat = BiopetFlagstat(this, inputBam, outputDir)
70
71
72
    add(biopetFlagstat)
    addSummarizable(biopetFlagstat, "biopet_flagstat")

Peter van 't Hof's avatar
Peter van 't Hof committed
73
74
75
76
    val multiMetrics = new CollectMultipleMetrics(this)
    multiMetrics.input = inputBam
    multiMetrics.outputName = new File(outputDir, inputBam.getName.stripSuffix(".bam"))
    add(multiMetrics)
77
    addSummarizable(multiMetrics, "multi_metrics")
78

79
80
81
    val gcBiasMetrics = CollectGcBiasMetrics(this, inputBam, outputDir)
    add(gcBiasMetrics)
    addSummarizable(gcBiasMetrics, "gc_bias")
82

bow's avatar
bow committed
83
84
85
86
87
88
89
    if (transcriptRefFlatFile.isEmpty) {
      val wgsMetrics = new CollectWgsMetrics(this)
      wgsMetrics.input = inputBam
      wgsMetrics.output = swapExt(outputDir, inputBam, ".bam", ".wgs.metrics")
      add(wgsMetrics)
      addSummarizable(wgsMetrics, "wgs")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
90

91
    if (transcriptRefFlatFile.isDefined) {
Peter van 't Hof's avatar
Peter van 't Hof committed
92
93
94
95
      val rnaMetrics = new CollectRnaSeqMetrics(this)
      rnaMetrics.input = inputBam
      rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics")
      rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf"))
96
97
98
      rnaMetrics.refFlat = transcriptRefFlatFile.get
      rnaMetrics.ribosomalIntervals = rnaMetricsSettings.get("ribosomal_intervals").collect { case n => new File(n) }
      rnaMetrics.strandSpecificity = rnaMetricsSettings.get("strand_specificity")
Peter van 't Hof's avatar
Peter van 't Hof committed
99
      add(rnaMetrics)
100
      addSummarizable(rnaMetrics, "rna")
Peter van 't Hof's avatar
Peter van 't Hof committed
101
102
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
    case class Intervals(bed: File, intervals: File)

    // Create temp jobs to convert bed files to intervals lists
    val roiIntervals = roiBedFiles.map(roiBedFile => {
      val roiIntervals = swapExt(outputDir, roiBedFile, ".bed", ".intervals")
      val ampBedToInterval = BedToIntervalList(this, roiBedFile, roiIntervals)
      ampBedToInterval.isIntermediate = true
      add(ampBedToInterval)
      Intervals(roiBedFile, roiIntervals)
    })

    // Metrics that require a amplicon bed file
    val ampIntervals = ampliconBedFile.collect {
      case ampliconBedFile => {
        val ampIntervals = swapExt(outputDir, ampliconBedFile, ".bed", ".intervals")
        val ampBedToInterval = BedToIntervalList(this, ampliconBedFile, ampIntervals)
        ampBedToInterval.isIntermediate = true
        add(ampBedToInterval)

        val chsMetrics = CalculateHsMetrics(this, inputBam,
          List(ampIntervals), ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(chsMetrics)
125
        addSummarizable(chsMetrics, "hs_metrics")
126

Peter van 't Hof's avatar
Peter van 't Hof committed
127
128
129
        val pcrMetrics = CollectTargetedPcrMetrics(this, inputBam,
          ampIntervals, ampIntervals :: roiIntervals.map(_.intervals), outputDir)
        add(pcrMetrics)
130
        addSummarizable(chsMetrics, "targeted_pcr_metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
131
132
133
134
135
136
137

        Intervals(ampliconBedFile, ampIntervals)
      }
    }

    // Create stats and coverage plot for each bed/interval file
    for (intervals <- roiIntervals ++ ampIntervals) {
138
139
      val targetName = intervals.bed.getName.stripSuffix(".bed")
      val targetDir = new File(outputDir, targetName)
Peter van 't Hof's avatar
Peter van 't Hof committed
140
141
142

      val biStrict = BedtoolsIntersect(this, inputBam, intervals.bed,
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.bam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
143
        minOverlap = config("strict_intersect_overlap", default = 1.0))
Peter van 't Hof's avatar
Peter van 't Hof committed
144
145
146
      biStrict.isIntermediate = true
      add(biStrict)
      add(SamtoolsFlagstat(this, biStrict.output, targetDir))
147
148
149
      val biopetFlagstatStrict = BiopetFlagstat(this, biStrict.output, targetDir)
      add(biopetFlagstatStrict)
      addSummarizable(biopetFlagstatStrict, targetName + "_biopet_flagstat_strict")
Peter van 't Hof's avatar
Peter van 't Hof committed
150
151
152

      val biLoose = BedtoolsIntersect(this, inputBam, intervals.bed,
        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.bam"),
Peter van 't Hof's avatar
Peter van 't Hof committed
153
        minOverlap = config("loose_intersect_overlap", default = 0.01))
Peter van 't Hof's avatar
Peter van 't Hof committed
154
155
156
      biLoose.isIntermediate = true
      add(biLoose)
      add(SamtoolsFlagstat(this, biLoose.output, targetDir))
157
158
159
      val biopetFlagstatLoose = BiopetFlagstat(this, biLoose.output, targetDir)
      add(biopetFlagstatLoose)
      addSummarizable(biopetFlagstatLoose, targetName + "_biopet_flagstat_loose")
160
161

      val coverageFile = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
162
163

      //FIXME:should use piping
Peter van 't Hof's avatar
Peter van 't Hof committed
164
      add(BedtoolsCoverage(this, inputBam, intervals.bed, coverageFile, depth = true), true)
165
      val covStats = CoverageStats(this, coverageFile, targetDir)
166
      covStats.title = Some("Coverage for " + targetName)
Peter van 't Hof's avatar
Peter van 't Hof committed
167
      covStats.subTitle = Some(".")
168
      add(covStats)
Peter van 't Hof's avatar
Peter van 't Hof committed
169
      addSummarizable(covStats, targetName + "_cov_stats")
170
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172

    addSummaryJobs
173
174
175
176
  }
}

object BamMetrics extends PipelineCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
177
  /** Make default implementation of BamMetrics and runs script already */
Peter van 't Hof's avatar
Peter van 't Hof committed
178
  def apply(root: Configurable, bamFile: File, outputDir: File): BamMetrics = {
179
180
181
182
    val bamMetrics = new BamMetrics(root)
    bamMetrics.inputBam = bamFile
    bamMetrics.outputDir = outputDir

Peter van 't Hof's avatar
Peter van 't Hof committed
183
184
185
    bamMetrics.init()
    bamMetrics.biopetScript()
    bamMetrics
186
187
  }
}